From 0ec435199ee54f2a97990d0a1cd337c1f90aeea2 Mon Sep 17 00:00:00 2001 From: Daniel Zou Date: Wed, 3 Aug 2022 22:11:06 -0500 Subject: [PATCH 1/5] Refactor Sparse/BlockArray API, RHS binary ops --- nums/core/array/application.py | 4 +- nums/core/array/base.py | 794 ++++++++++++++---- nums/core/array/blockarray.py | 521 +++--------- nums/core/array/random.py | 12 +- nums/core/array/sparse.py | 595 ++++++------- nums/core/array/utils.py | 25 - nums/core/array/view.py | 32 +- nums/core/kernel/kernel_interface.py | 3 + nums/core/kernel/numpy_kernel.py | 10 +- nums/experimental/optimizer/graph.py | 12 +- nums/experimental/optimizer/grapharray.py | 57 +- nums/experimental/optimizer/reduction_ops.py | 2 +- tests/core/array/test_bop.py | 6 +- tests/core/array/test_sparse.py | 31 +- tests/experimental/optimizer/test_copy.py | 2 +- .../experimental/optimizer/test_tensordot.py | 6 +- 16 files changed, 1157 insertions(+), 955 deletions(-) diff --git a/nums/core/array/application.py b/nums/core/array/application.py index 48654617..ce148994 100644 --- a/nums/core/array/application.py +++ b/nums/core/array/application.py @@ -917,7 +917,7 @@ def map_uop( # TODO(hme): Faster to create ndarray first, # and instantiate block array on return # to avoid instantiating blocks on BlockArray initialization. - rarr.blocks[grid_entry] = arr.blocks[grid_entry].uop_map( + rarr.blocks[grid_entry] = arr.blocks[grid_entry].map_uop( op_name, args=args, kwargs=kwargs ) return rarr @@ -928,7 +928,7 @@ def matmul(self, arr_1: BlockArray, arr_2: BlockArray) -> BlockArray: def tensordot( self, arr_1: BlockArray, arr_2: BlockArray, axes: int = 2 ) -> BlockArray: - return arr_1.tensordot(arr_2, axes) + return arr_1.tensordot(arr_1, arr_2, axes) def einsum(self, subscript, *operands): def _compute_syskwargs(blocks): diff --git a/nums/core/array/base.py b/nums/core/array/base.py index 9d0e34c5..04b3a534 100644 --- a/nums/core/array/base.py +++ b/nums/core/array/base.py @@ -23,11 +23,7 @@ from nums.core.grid.grid import ArrayGrid -class Block: - # pylint: disable=redefined-builtin, global-statement - # TODO(hme): Create a base class, and move this concrete class into blockarray.py. - # Do this when we implement a SparseBlock object. - +class BlockBase: block_id_counter = -1 def __init__( @@ -40,7 +36,7 @@ def __init__( km: KernelManager, id=None, ): - self._km = km + self.km = km self.grid_entry: tuple = grid_entry self.grid_shape: tuple = grid_shape self.oid: np.object = None @@ -56,31 +52,34 @@ def __init__( self._device = None self.fill_value = None - @property - def nnz(self): - return np.prod(self.shape) - @property def is_dense(self): return self.fill_value is None + @property + def nbytes(self): + raise NotImplementedError() + def __repr__(self): - return "Block(" + str(self.oid) + ")" + raise NotImplementedError() def size(self): return np.product(self.shape) - def copy(self, shallow=True): - assert shallow, "Only shallow copies are currently supported." - block = Block( - self.grid_entry, - self.grid_shape, - self.shape, - self.dtype, - self.transposed, - self._km, + def copy(self): + raise NotImplementedError() + + def get(self): + return self.km.get(self.oid) + + def astype(self, dtype): + block = self.copy() + block.dtype = dtype + block.oid = self.km.astype( + self.oid, + dtype.__name__, + syskwargs={"grid_entry": block.grid_entry, "grid_shape": block.grid_shape}, ) - block.oid = self.oid return block def true_grid_entry(self): @@ -93,40 +92,16 @@ def true_grid_shape(self): return tuple(reversed(self.grid_shape)) return self.grid_shape + def transpose(self, defer=False, redistribute=False): + raise NotImplementedError() + def device(self): if self._device is not None: return self._device - return self._km.device_grid.get_device( + return self.km.device_grid.get_device( self.true_grid_entry(), self.true_grid_shape() ) - def transpose(self, defer=False, redistribute=False): - # If defer is True, this operation does not modify the remote object. - # If defer is True and redistribute is False, - # this operation does not move the remote object. - grid_entryT = tuple(reversed(self.grid_entry)) - grid_shapeT = tuple(reversed(self.grid_shape)) - blockT = Block( - grid_entry=grid_entryT, - grid_shape=grid_shapeT, - shape=tuple(reversed(self.shape)), - dtype=self.dtype, - transposed=not self.transposed, - km=self._km, - ) - blockT.oid = self.oid - if not defer: - blockT.transposed = False - if redistribute: - syskwargs = {"grid_entry": grid_entryT, "grid_shape": grid_shapeT} - else: - syskwargs = { - "grid_entry": self.grid_entry, - "grid_shape": self.grid_shape, - } - blockT.oid = self._km.transpose(self.oid, syskwargs=syskwargs) - return blockT - def swapaxes(self, axis1, axis2): block = self.copy() grid_entry = list(block.grid_entry) @@ -141,7 +116,7 @@ def swapaxes(self, axis1, axis2): block.grid_shape = tuple(grid_shape) block.shape = tuple(shape) - block.oid = self._km.swapaxes( + block.oid = self.km.swapaxes( block.oid, axis1, axis2, @@ -149,48 +124,20 @@ def swapaxes(self, axis1, axis2): ) return block + def map_uop(self, op_name, args=None, kwargs=None, device=None): + raise NotImplementedError() + def ufunc(self, op_name, device=None): - return self.uop_map(op_name, device=device) + return self.map_uop(op_name, device=device) - def uop_map(self, op_name, args=None, kwargs=None, device=None): - # This retains transpose. - block = self.copy() - block.dtype = array_utils.get_uop_output_type(op_name, self.dtype) - args = () if args is None else args - kwargs = {} if kwargs is None else kwargs - if device is None: - syskwargs = {"grid_entry": block.grid_entry, "grid_shape": block.grid_shape} - else: - syskwargs = {"device": device} - block._device = device - block.oid = self._km.map_uop( - op_name, self.oid, args, kwargs, syskwargs=syskwargs - ) - return block + def block_from_scalar(self, other): + raise NotImplementedError() - def _block_from_other(self, other): - # Assume other is numeric. - # This only occurs during some numpy operations (e.g. np.mean), - # where a literal is used in the operation. - assert isinstance(other, (int, float, np.int, np.float)) - block = Block( - self.grid_entry, - self.grid_shape, - (1,), - self.dtype, - False, - self._km, - ) - # We pass syskwargs here for correct node placement for `other`, - # which should be local to self. - block.oid = self._km.put( - np.array(other, dtype=self.dtype), - syskwargs={ - "grid_entry": self.grid_entry, - "grid_shape": self.grid_shape, - }, - ) - return block + def conjugate(self): + return self.ufunc("conjugate") + + def sqrt(self): + return self.ufunc("sqrt") @staticmethod def block_meta(op, block1, block2, args): @@ -227,77 +174,105 @@ def block_meta(op, block1, block2, args): @staticmethod def init_block(op_name, block1, block2, args, device=None): - result_grid_entry, result_grid_shape, result_shape, dtype = Block.block_meta( - op_name, block1, block2, args - ) - block = Block( - grid_entry=result_grid_entry, - grid_shape=result_grid_shape, - shape=result_shape, - dtype=dtype, - transposed=False, - km=block1._km, - ) - block._device = device - return block + raise NotImplementedError() - def bop(self, op_name, other, args: dict, device=None): - if not isinstance(other, Block): - other = self._block_from_other(other) - block: Block = self.init_block(op_name, self, other, args, device) - if device is None: - syskwargs = {"grid_entry": block.grid_entry, "grid_shape": block.grid_shape} - else: - syskwargs = {"device": device} - block.oid = self._km.bop( - op_name, - self.oid, - other.oid, - self.transposed, - other.transposed, - axes=args.get("axes"), - syskwargs=syskwargs, - ) - return block + def _check_bop_implemented(self, other): + raise NotImplementedError() + + @staticmethod + def binary_op(op_name, a, b, args: dict, device=None): + raise NotImplementedError() + + def bop(self, op_name, other, args: dict, device=None, **kwargs): + raise NotImplementedError() def tensordot(self, other, axes): - return self.bop("tensordot", other, args={"axes": axes}) + raise NotImplementedError() def __add__(self, other): - return self.bop("add", other, args={}) + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("add", self, other, args={}) + + def __radd__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("add", other, self, args={}) def __sub__(self, other): - return self.bop("sub", other, args={}) + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("sub", self, other, args={}) + + def __rsub__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("sub", other, self, args={}) def __mul__(self, other): - return self.bop("mul", other, args={}) + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("mul", self, other, args={}) - def __matmul__(self, other): - return self.tensordot(other, axes=1) + def __rmul__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("mul", other, self, args={}) def __truediv__(self, other): - return self.bop("truediv", other, args={}) + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("truediv", self, other, args={}) + + def __rtruediv__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("trudiv", other, self, args={}) def __pow__(self, other): - return self.bop("pow", other, args={}) + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("pow", self, other, args={}) + + def __rpow__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("pow", other, self, args={}) + + def __matmul__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.tensordot(other, axes=1) def __ge__(self, other): - return self.bop("ge", other, args={}) + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("ge", self, other, args={}) def __gt__(self, other): - return self.bop("gt", other, args={}) + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("gt", self, other, args={}) def __le__(self, other): - return self.bop("le", other, args={}) + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("le", self, other, args={}) def __lt__(self, other): - return self.bop("lt", other, args={}) + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("lt", self, other, args={}) def __eq__(self, other): - return self.bop("eq", other, args={}) + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("eq", self, other, args={}) def __ne__(self, other): - return self.bop("ne", other, args={}) + if not self._check_bop_implemented(other): + return NotImplemented + return self.binary_op("ne", self, other, args={}) __iadd__ = __add__ __isub__ = __sub__ @@ -306,24 +281,169 @@ def __ne__(self, other): __itruediv__ = __truediv__ __ipow__ = __pow__ - def astype(self, dtype): + +class Block(BlockBase): + # pylint: disable=redefined-builtin, global-statement + + def __init__( + self, + grid_entry, + grid_shape, + shape, + dtype, + transposed, + km: KernelManager, + id=None, + ): + super().__init__(grid_entry, grid_shape, shape, dtype, transposed, km, id) + + @property + def nbytes(self): + return np.prod(self.shape) * np.dtype(self.dtype).itemsize + + def __repr__(self): + return "Block(" + str(self.oid) + ")" + + def copy(self, shallow=True): + assert shallow, "Only shallow copies are currently supported." + block = Block( + self.grid_entry, + self.grid_shape, + self.shape, + self.dtype, + self.transposed, + self.km, + ) + block.oid = self.oid + return block + + def transpose(self, defer=False, redistribute=False): + # If defer is True, this operation does not modify the remote object. + # If defer is True and redistribute is False, + # this operation does not move the remote object. + grid_entryT = tuple(reversed(self.grid_entry)) + grid_shapeT = tuple(reversed(self.grid_shape)) + blockT = Block( + grid_entry=grid_entryT, + grid_shape=grid_shapeT, + shape=tuple(reversed(self.shape)), + dtype=self.dtype, + transposed=not self.transposed, + km=self.km, + ) + blockT.oid = self.oid + if not defer: + blockT.transposed = False + if redistribute: + syskwargs = {"grid_entry": grid_entryT, "grid_shape": grid_shapeT} + else: + syskwargs = { + "grid_entry": self.grid_entry, + "grid_shape": self.grid_shape, + } + blockT.oid = self.km.transpose(self.oid, syskwargs=syskwargs) + return blockT + + def map_uop(self, op_name, args=None, kwargs=None, device=None): + # This retains transpose. block = self.copy() - block.dtype = dtype - block.oid = self._km.astype( - self.oid, - dtype.__name__, - syskwargs={"grid_entry": block.grid_entry, "grid_shape": block.grid_shape}, + block.dtype = array_utils.get_uop_output_type(op_name, self.dtype) + args = () if args is None else args + kwargs = {} if kwargs is None else kwargs + if device is None: + syskwargs = {"grid_entry": block.grid_entry, "grid_shape": block.grid_shape} + else: + syskwargs = {"device": device} + block._device = device + block.oid = self.km.map_uop( + op_name, self.oid, args, kwargs, syskwargs=syskwargs ) return block - def conjugate(self): - return self.ufunc("conjugate") + def block_from_scalar(self, other): + # Assume other is numeric. + # This only occurs during some numpy operations (e.g. np.mean), + # where a literal is used in the operation. + assert array_utils.is_scalar(other) + block = Block( + self.grid_entry, + self.grid_shape, + (1,), + self.dtype, + False, + self.km, + ) + # We pass syskwargs here for correct node placement for `other`, + # which should be local to self. + block.oid = self.km.put( + np.array(other, dtype=self.dtype), + syskwargs={ + "grid_entry": self.grid_entry, + "grid_shape": self.grid_shape, + }, + ) + return block - def sqrt(self): - return self.ufunc("sqrt") + @staticmethod + def init_block(op_name, block1, block2, args, device=None): + ( + result_grid_entry, + result_grid_shape, + result_shape, + dtype, + ) = BlockBase.block_meta(op_name, block1, block2, args) + block = Block( + grid_entry=result_grid_entry, + grid_shape=result_grid_shape, + shape=result_shape, + dtype=dtype, + transposed=False, + km=block1.km, + ) + block._device = device + return block - def get(self): - return self._km.get(self.oid) + def _check_bop_implemented(self, other): + if isinstance(other, Block) or array_utils.is_scalar(other): + return True + return False + + @staticmethod + def binary_op(op_name, a, b, args: dict, device=None): + if isinstance(a, Block) and array_utils.is_scalar(b): + b = a.block_from_scalar(b) + elif isinstance(b, Block) and array_utils.is_scalar(a): + a = b.block_from_scalar(a) + if not isinstance(a, Block) or not isinstance(b, Block): + raise NotImplementedError() + + block: Block = a.init_block(op_name, a, b, args, device) + if device is None: + syskwargs = {"grid_entry": block.grid_entry, "grid_shape": block.grid_shape} + else: + syskwargs = {"device": device} + block.oid = a.km.bop( + op_name, + a.oid, + b.oid, + a.transposed, + b.transposed, + axes=args.get("axes"), + syskwargs=syskwargs, + ) + return block + + def bop(self, op_name, other, args: dict, device=None): + try: + return self.binary_op(op_name, self, other, args, device) + except NotImplementedError: + return other.binary_op(op_name, self, other, args, device) + + def tensordot(self, other, axes): + try: + return self.binary_op("tensordot", self, other, args={"axes": axes}) + except NotImplementedError: + return other.binary_op("tensordot", self, other, args={"axes": axes}) class BlockArrayBase: @@ -336,30 +456,9 @@ def __init__(self, grid: ArrayGrid, km: KernelManager, blocks: np.ndarray = None self.size = np.product(self.shape) self.ndim = len(self.shape) self.dtype = self.grid.dtype - try: - self.nbytes = self.grid.nbytes() - except ValueError as _: - self.nbytes = None self.blocks = blocks - if self.blocks is None: - # TODO (hme): Subclass np.ndarray for self.blocks instances, - # and override key methods to better integrate with NumPy's ufuncs. - self.blocks = np.empty(shape=self.grid.grid_shape, dtype=Block) - for grid_entry in self.grid.get_entry_iterator(): - self.blocks[grid_entry] = Block( - grid_entry=grid_entry, - grid_shape=self.grid.grid_shape, - shape=self.grid.get_block_shape(grid_entry), - dtype=self.dtype, - transposed=False, - km=self.km, - ) self.fill_value = None - @property - def nnz(self): - return np.prod(self.shape) - @property def is_dense(self): return self.fill_value is None @@ -369,7 +468,7 @@ def __repr__(self): def get(self) -> np.ndarray: result: np.ndarray = np.zeros(shape=self.grid.shape, dtype=self.grid.dtype) - block_shape: np.ndarray = np.array(self.grid.block_shape, dtype=np.int) + block_shape: np.ndarray = np.array(self.grid.block_shape, dtype=np.int64) arrays: list = self.km.get( [ self.blocks[grid_entry].oid @@ -378,16 +477,50 @@ def get(self) -> np.ndarray: ) for block_index, grid_entry in enumerate(self.grid.get_entry_iterator()): start = block_shape * grid_entry - entry_shape = np.array(self.grid.get_block_shape(grid_entry), dtype=np.int) + entry_shape = np.array( + self.grid.get_block_shape(grid_entry), dtype=np.int64 + ) end = start + entry_shape slices = tuple(map(lambda item: slice(*item), zip(*(start, end)))) - block: Block = self.blocks[grid_entry] + block: BlockBase = self.blocks[grid_entry] arr: np.ndarray = arrays[block_index] if block.transposed: arr = arr.T result[slices] = arr.reshape(block.shape) return result + def touch(self): + """ + "Touch" an array. This is an efficient distributed "wait" operation. + """ + oids = [] + for grid_entry in self.grid.get_entry_iterator(): + block: BlockBase = self.blocks[grid_entry] + oids.append( + self.km.touch( + block.oid, + syskwargs={ + "grid_entry": block.grid_entry, + "grid_shape": block.grid_shape, + }, + ) + ) + self.km.get(oids) + return self + + def copy(self): + raise NotImplementedError() + + def astype(self, dtype): + raise NotImplementedError() + + def flattened_oids(self): + oids = [] + for grid_entry in self.grid.get_entry_iterator(): + oid = self.blocks[grid_entry].oid + oids.append(oid) + return oids + def broadcast_to(self, shape): b = array_utils.broadcast(self.shape, shape) result_block_shape = array_utils.broadcast_block_shape( @@ -410,3 +543,324 @@ def broadcast_to(self, shape): broadcast = it.itviews[0] result.blocks = broadcast return result + + def tree_reduce( + self, op_name, blocks_or_oids, result_grid_entry, result_grid_shape + ): + """ + Basic tree reduce imp. + Schedules op on same node as left operand. + :param op_name: The reduction op. + :param blocks_or_oids: A list of type Block or a list of tuples. + Tuples must be of the form + (oid, grid_entry, grid_shape, transposed) + :param result_grid_entry: The grid entry of the result block. This will be used + to compute the final reduction step. + :param result_grid_shape: The grid entry of the result block. This will be used + to compute the final reduction step. + :return: The oid of the result. + """ + oid_list = blocks_or_oids + if isinstance(blocks_or_oids[0], Block): + oid_list = [ + (b.oid, b.grid_entry, b.grid_shape, b.transposed) + for b in blocks_or_oids + ] + if len(oid_list) == 1: + return oid_list[0][0] + q = oid_list + while len(q) > 1: + a_oid, a_ge, a_gs, a_T = q.pop(0) + b_oid, _, _, b_T = q.pop(0) + ge, gs = ( + (result_grid_entry, result_grid_shape) if len(q) == 0 else (a_ge, a_gs) + ) + c_oid = self.km.bop_reduce( + op_name, + a_oid, + b_oid, + a_T, + b_T, + syskwargs={ + "grid_entry": ge, + "grid_shape": gs, + }, + ) + q.append((c_oid, ge, gs, False)) + r_oid, r_ge, r_gs, _ = q.pop(0) + assert r_ge == result_grid_entry + assert r_gs == result_grid_shape + return r_oid + + def check_or_convert_other(self, other, compute_block_shape=False): + raise NotImplementedError() + + def _check_bop_implemented(self, other): + raise NotImplementedError() + + # All operators: https://docs.python.org/3/library/operator.html + + ################# + # Unary functions + ################# + + def ufunc(self, op_name): + raise NotImplementedError() + + def __neg__(self): + return self.ufunc("negative") + + def __pos__(self): + return self + + def __abs__(self): + return self.ufunc("abs") + + def __invert__(self): + return self.ufunc("invert") + + ################# + # Arithmetic + ################# + + @staticmethod + def elementwise(op_name, a, b): + raise NotImplementedError() + + def __mod__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("mod", self, other) + + def __rmod__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("mod", other, self) + + __imod__ = __mod__ + + def __add__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("add", self, other) + + def __radd__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("add", other, self) + + __iadd__ = __add__ + + def __sub__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("sub", self, other) + + def __rsub__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("sub", other, self) + + __isub__ = __sub__ + + def __mul__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("mul", self, other) + + def __rmul__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("mul", other, self) + + __imul__ = __mul__ + + def __truediv__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("truediv", self, other) + + def __rtruediv__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("truediv", other, self) + + __itruediv__ = __truediv__ + + def __floordiv__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("floor_divide", self, other) + + def __rfloordiv__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("floor_divide", other, self) + + __ifloordiv__ = __floordiv__ + + def __pow__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("pow", self, other) + + def __rpow__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("pow", other, self) + + __ipow__ = __pow__ + + ################## + # Boolean + ################## + + # TODO (hme): Type check bool ops. + def __bool__(self): + # pylint: disable=no-member + if np.sum(self.shape) == len(self.shape): + # If all ones or scalar, then this is defined. + return self.get().__bool__() + return True + + def __or__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("bitwise_or", self, other) + + def __ror__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("bitwise_or", other, self) + + __ior__ = __or__ + + def __and__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("bitwise_and", self, other) + + def __rand__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("bitwise_and", other, self) + + __iand__ = __and__ + + def __xor__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("bitwise_xor", self, other) + + def __rxor__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("bitwise_xor", other, self) + + __ixor__ = __xor__ + + def __lshift__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("left_shift", self, other) + + def __rlshift__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("left_shift", other, self) + + __ilshift__ = __lshift__ + + def __rshift__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("right_shift", self, other) + + def __rrshift__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.elementwise("right_shift", other, self) + + __irshift__ = __rshift__ + + ################# + # Linear Algebra + ################# + + def _compute_tensordot_syskwargs(self, self_block: Block, other_block: Block): + # Schedule on larger block. + if np.product(self_block.shape) >= np.product(other_block.shape): + return self_block.true_grid_entry(), self_block.true_grid_shape() + else: + return other_block.true_grid_entry(), other_block.true_grid_shape() + + @staticmethod + def tensordot(a, b, axes=2): + raise NotImplementedError() + + def __matmul__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + if len(self.shape) > 2: + # TODO (bcp): NumPy's implementation does a stacked matmul, which is not supported yet. + raise NotImplementedError( + "Matrix multiply for tensors of rank > 2 not supported yet." + ) + else: + return self.tensordot(self, other, 1) + + def __rmatmul__(self, other): + if not self._check_bop_implemented(other): + return NotImplemented + return self.tensordot(other, self, 1) + + __imatmul__ = __matmul__ + + ################# + # Inequalities + ################# + + def __inequality__(self, op, other): + raise NotImplementedError() + + def __ge__(self, other): + return self.__inequality__("ge", other) + + def __rge__(self, other): + other = self.check_or_convert_other(other) + return other.__inequality__("ge", self) + + def __gt__(self, other): + return self.__inequality__("gt", other) + + def __rgt__(self, other): + other = self.check_or_convert_other(other) + return other.__inequality__("gt", self) + + def __le__(self, other): + return self.__inequality__("le", other) + + def __rle__(self, other): + other = self.check_or_convert_other(other) + return other.__inequality__("le", self) + + def __lt__(self, other): + return self.__inequality__("lt", other) + + def __rlt__(self, other): + other = self.check_or_convert_other(other) + return other.__inequality__("lt", self) + + def __eq__(self, other): + return self.__inequality__("eq", other) + + def __req__(self, other): + other = self.check_or_convert_other(other) + return other.__inequality__("eq", self) + + def __ne__(self, other): + return self.__inequality__("ne", other) + + def __rne__(self, other): + other = self.check_or_convert_other(other) + return other.__inequality__("ne", self) diff --git a/nums/core/array/blockarray.py b/nums/core/array/blockarray.py index 46a7daf3..9fccb971 100644 --- a/nums/core/array/blockarray.py +++ b/nums/core/array/blockarray.py @@ -17,7 +17,6 @@ import itertools import numpy as np -import nums.core.array.sparse as array_sparse from nums.core.array import utils as array_utils from nums.core.array.base import BlockArrayBase, Block @@ -30,6 +29,28 @@ class BlockArray(BlockArrayBase): + def __init__(self, grid: ArrayGrid, km: KernelManager, blocks: np.ndarray = None): + if blocks is not None: + assert blocks.dtype == Block, "BlockArray must be initialized with Blocks" + super().__init__(grid, km, blocks) + try: + self.nbytes = self.grid.nbytes() + except ValueError as _: + self.nbytes = None + if self.blocks is None: + # TODO (hme): Subclass np.ndarray for self.blocks instances, + # and override key methods to better integrate with NumPy's ufuncs. + self.blocks = np.empty(shape=self.grid.grid_shape, dtype=Block) + for grid_entry in self.grid.get_entry_iterator(): + self.blocks[grid_entry] = Block( + grid_entry=grid_entry, + grid_shape=self.grid.grid_shape, + shape=self.grid.get_block_shape(grid_entry), + dtype=self.dtype, + transposed=False, + km=self.km, + ) + @classmethod def empty(cls, shape, block_shape, dtype, km: KernelManager): return BlockArray.create("empty", shape, block_shape, dtype, km) @@ -115,24 +136,12 @@ def copy(self): rarr_copy.blocks[grid_entry] = self.blocks[grid_entry].copy() return rarr_copy - def touch(self): - """ - "Touch" an array. This is an efficient distributed "wait" operation. - """ - oids = [] - for grid_entry in self.grid.get_entry_iterator(): - block: Block = self.blocks[grid_entry] - oids.append( - self.km.touch( - block.oid, - syskwargs={ - "grid_entry": block.grid_entry, - "grid_shape": block.grid_shape, - }, - ) - ) - self.km.get(oids) - return self + def astype(self, dtype): + grid = ArrayGrid(self.shape, self.block_shape, dtype.__name__) + result = BlockArray(grid, self.km) + for grid_entry in result.grid.get_entry_iterator(): + result.blocks[grid_entry] = self.blocks[grid_entry].astype(dtype) + return result def is_single_block(self): return self.blocks.size == 1 @@ -323,7 +332,7 @@ def __getitem__(self, item): av: ArrayView = ArrayView.from_block_array(self) # TODO (hme): We don't have to create, but do so for now until we need to optimize. - return av[ss].create(BlockArray) + return av[ss].create() def _advanced_single_array_select(self, ss: tuple, axis: int = 0): # Create output array along the axis of the selection operation. @@ -635,60 +644,19 @@ def check_or_convert_other(self, other, compute_block_shape=False): block_shape = None if compute_block_shape else self.block_shape return BlockArray.to_block_array(other, self.km, block_shape=block_shape) + def _check_bop_implemented(self, other): + if isinstance(other, (BlockArray, np.ndarray, list)) or array_utils.is_scalar( + other + ): + return True + return False + def ufunc(self, op_name): result = self.copy() for grid_entry in self.grid.get_entry_iterator(): result.blocks[grid_entry] = self.blocks[grid_entry].ufunc(op_name) return result - def tree_reduce( - self, op_name, blocks_or_oids, result_grid_entry, result_grid_shape - ): - """ - Basic tree reduce imp. - Schedules op on same node as left operand. - :param op_name: The reduction op. - :param blocks_or_oids: A list of type Block or a list of tuples. - Tuples must be of the form - (oid, grid_entry, grid_shape, transposed) - :param result_grid_entry: The grid entry of the result block. This will be used - to compute the final reduction step. - :param result_grid_shape: The grid entry of the result block. This will be used - to compute the final reduction step. - :return: The oid of the result. - """ - oid_list = blocks_or_oids - if isinstance(blocks_or_oids[0], Block): - oid_list = [ - (b.oid, b.grid_entry, b.grid_shape, b.transposed) - for b in blocks_or_oids - ] - if len(oid_list) == 1: - return oid_list[0][0] - q = oid_list - while len(q) > 1: - a_oid, a_ge, a_gs, a_T = q.pop(0) - b_oid, _, _, b_T = q.pop(0) - ge, gs = ( - (result_grid_entry, result_grid_shape) if len(q) == 0 else (a_ge, a_gs) - ) - c_oid = self.km.bop_reduce( - op_name, - a_oid, - b_oid, - a_T, - b_T, - syskwargs={ - "grid_entry": ge, - "grid_shape": gs, - }, - ) - q.append((c_oid, ge, gs, False)) - r_oid, r_ge, r_gs, _ = q.pop(0) - assert r_ge == result_grid_entry - assert r_gs == result_grid_shape - return r_oid - def reduce_axis(self, op_name, axis, keepdims=False): if not (axis is None or isinstance(axis, (int, np.int32, np.int64))): raise NotImplementedError("Only integer axis is currently supported.") @@ -770,17 +738,66 @@ def reduce_axis(self, op_name, axis, keepdims=False): return result ################# - # Linear Algebra + # Arithmetic ################# - def _compute_tensordot_syskwargs(self, self_block: Block, other_block: Block): - # Schedule on larger block. - if np.product(self_block.shape) >= np.product(other_block.shape): - return self_block.true_grid_entry(), self_block.true_grid_shape() + @staticmethod + def elementwise(op_name, a, b): + if isinstance(a, BlockArray): + b = a.check_or_convert_other(b) + elif isinstance(b, BlockArray): + a = b.check_or_convert_other(a) else: - return other_block.true_grid_entry(), other_block.true_grid_shape() + raise NotImplementedError() + + if a.shape == b.shape and a.block_shape == b.block_shape: + return BlockArray._fast_elementwise(op_name, a, b) + blocks_op = a.blocks.__getattribute__("__%s__" % op_name) + return BlockArray.from_blocks(blocks_op(b.blocks), result_shape=None, km=a.km) + + @staticmethod + def _fast_elementwise(op_name, a, b): + """ + Implements fast scheduling for basic element-wise operations. + """ + dtype = array_utils.get_bop_output_type(op_name, a.dtype, b.dtype) + # Schedule the op first. + blocks = np.empty(shape=a.grid.grid_shape, dtype=Block) + for grid_entry in a.grid.get_entry_iterator(): + a_block: Block = a.blocks[grid_entry] + b_block: Block = b.blocks[grid_entry] + blocks[grid_entry] = block = Block( + grid_entry=grid_entry, + grid_shape=a_block.grid_shape, + shape=a_block.shape, + dtype=dtype, + transposed=False, + km=a.km, + ) + block.oid = a.km.bop( + op_name, + a_block.oid, + b_block.oid, + a_block.transposed, + b_block.transposed, + axes={}, + syskwargs={ + "grid_entry": grid_entry, + "grid_shape": a.grid.grid_shape, + }, + ) + return BlockArray( + ArrayGrid(a.shape, a.block_shape, dtype.__name__), + a.km, + blocks=blocks, + ) + + ################# + # Linear Algebra + ################# - def tensordot(self, other, axes=2): + @staticmethod + def tensordot(a, b, axes=2): if isinstance(axes, int): pass elif array_utils.is_array_like(axes): @@ -788,59 +805,58 @@ def tensordot(self, other, axes=2): else: raise TypeError(f"Unexpected axes type '{type(axes).__name__}'") - other = self.check_or_convert_other(other, compute_block_shape=True) + if isinstance(a, BlockArray): + b = a.check_or_convert_other(b, compute_block_shape=True) + elif isinstance(b, BlockArray): + a = b.check_or_convert_other(a, compute_block_shape=True) + else: + raise NotImplementedError() - if array_utils.np_tensordot_param_test( - self.shape, self.ndim, other.shape, other.ndim, axes - ): + if array_utils.np_tensordot_param_test(a.shape, a.ndim, b.shape, b.ndim, axes): raise ValueError("shape-mismatch for sum") if axes > 0: - this_axes = self.grid.grid_shape[:-axes] - this_sum_axes = self.grid.grid_shape[-axes:] - other_axes = other.grid.grid_shape[axes:] - other_sum_axes = other.grid.grid_shape[:axes] - assert this_sum_axes == other_sum_axes - result_shape = tuple(self.shape[:-axes] + other.shape[axes:]) - result_block_shape = tuple( - self.block_shape[:-axes] + other.block_shape[axes:] - ) + a_axes = a.grid.grid_shape[:-axes] + a_sum_axes = a.grid.grid_shape[-axes:] + b_axes = b.grid.grid_shape[axes:] + b_sum_axes = b.grid.grid_shape[:axes] + assert a_sum_axes == b_sum_axes + result_shape = tuple(a.shape[:-axes] + b.shape[axes:]) + result_block_shape = tuple(a.block_shape[:-axes] + b.block_shape[axes:]) else: - this_axes = self.grid.grid_shape - other_axes = other.grid.grid_shape - this_sum_axes = () - result_shape = tuple(self.shape + other.shape) - result_block_shape = tuple(self.block_shape + other.block_shape) + a_axes = a.grid.grid_shape + b_axes = b.grid.grid_shape + a_sum_axes = () + result_shape = tuple(a.shape + b.shape) + result_block_shape = tuple(a.block_shape + b.block_shape) result_grid = ArrayGrid( shape=result_shape, block_shape=result_block_shape, dtype=array_utils.get_bop_output_type( - "tensordot", self.dtype, other.dtype + "tensordot", a.dtype, b.dtype ).__name__, ) - assert result_grid.grid_shape == tuple(this_axes + other_axes) - result = BlockArray(result_grid, self.km) - this_dims = list(itertools.product(*map(range, this_axes))) - other_dims = list(itertools.product(*map(range, other_axes))) - sum_dims = list(itertools.product(*map(range, this_sum_axes))) - for i in this_dims: - for j in other_dims: + assert result_grid.grid_shape == tuple(a_axes + b_axes) + result = BlockArray(result_grid, a.km) + a_dims = list(itertools.product(*map(range, a_axes))) + b_dims = list(itertools.product(*map(range, b_axes))) + sum_dims = list(itertools.product(*map(range, a_sum_axes))) + for i in a_dims: + for j in b_dims: grid_entry = tuple(i + j) result_block: Block = result.blocks[grid_entry] sum_oids = [] for k in sum_dims: - self_block: Block = self.blocks[tuple(i + k)] - other_block: Block = other.blocks[tuple(k + j)] - dot_grid_args = self._compute_tensordot_syskwargs( - self_block, other_block - ) - dotted_oid = self.km.bop( + a_block: Block = a.blocks[tuple(i + k)] + b_block: Block = b.blocks[tuple(k + j)] + dot_grid_args = a._compute_tensordot_syskwargs(a_block, b_block) + dotted_oid = a.km.bop( "tensordot", - self_block.oid, - other_block.oid, - self_block.transposed, - other_block.transposed, + a_block.oid, + b_block.oid, + a_block.transposed, + b_block.transposed, axes=axes, syskwargs={ "grid_entry": dot_grid_args[0], @@ -850,177 +866,19 @@ def tensordot(self, other, axes=2): sum_oids.append( (dotted_oid, dot_grid_args[0], dot_grid_args[1], False) ) - result_block.oid = self.tree_reduce( + result_block.oid = a.tree_reduce( "sum", sum_oids, result_block.grid_entry, result_block.grid_shape ) return result - def __matmul__(self, other): - if len(self.shape) > 2: - # TODO (bcp): NumPy's implementation does a stacked matmul, which is not supported yet. - raise NotImplementedError( - "Matrix multiply for tensors of rank > 2 not supported yet." - ) - else: - return self.tensordot(other, 1) - - def __rmatmul__(self, other): - other = self.check_or_convert_other(other) - return other @ self - - __imatmul__ = __matmul__ - - ################# - # Arithmetic - ################# - - def _fast_element_wise(self, op_name, other): - """ - Implements fast scheduling for basic element-wise operations. - """ - dtype = array_utils.get_bop_output_type(op_name, self.dtype, other.dtype) - # Schedule the op first. - blocks = np.empty(shape=self.grid.grid_shape, dtype=Block) - for grid_entry in self.grid.get_entry_iterator(): - self_block: Block = self.blocks[grid_entry] - other_block: Block = other.blocks[grid_entry] - blocks[grid_entry] = block = Block( - grid_entry=grid_entry, - grid_shape=self_block.grid_shape, - shape=self_block.shape, - dtype=dtype, - transposed=False, - km=self.km, - ) - block.oid = self.km.bop( - op_name, - self_block.oid, - other_block.oid, - self_block.transposed, - other_block.transposed, - axes={}, - syskwargs={ - "grid_entry": grid_entry, - "grid_shape": self.grid.grid_shape, - }, - ) - return BlockArray( - ArrayGrid(self.shape, self.block_shape, dtype.__name__), - self.km, - blocks=blocks, - ) - - def __elementwise__(self, op_name, other): - other = self.check_or_convert_other(other) - if self.shape == other.shape and self.block_shape == other.block_shape: - return self._fast_element_wise(op_name, other) - blocks_op = self.blocks.__getattribute__("__%s__" % op_name) - return BlockArray.from_blocks( - blocks_op(other.blocks), result_shape=None, km=self.km - ) - - def __neg__(self): - return self.ufunc("negative") - - def __pos__(self): - return self - - def __abs__(self): - return self.ufunc("abs") - - def __mod__(self, other): - return self.__elementwise__("mod", other) - - def __rmod__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("mod", self) - - __imod__ = __mod__ - - def __add__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - return self.__elementwise__("add", other) - - def __radd__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - other = self.check_or_convert_other(other) - return other.__elementwise__("add", self) - - __iadd__ = __add__ - - def __sub__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - return self.__elementwise__("sub", other) - - def __rsub__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - other = self.check_or_convert_other(other) - return other.__elementwise__("sub", self) - - __isub__ = __sub__ - - def __mul__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - return self.__elementwise__("mul", other) - - def __rmul__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - other = self.check_or_convert_other(other) - return other.__elementwise__("mul", self) - - __imul__ = __mul__ - - def __truediv__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - return self.__elementwise__("truediv", other) - - def __rtruediv__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - other = self.check_or_convert_other(other) - return other / self - - __itruediv__ = __truediv__ - - def __floordiv__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - return self.__elementwise__("floor_divide", other) - - def __rfloordiv__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - other = self.check_or_convert_other(other) - return other.__elementwise__("floor_divide", self) - - __ifloordiv__ = __floordiv__ - - def __pow__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - return self.__elementwise__("pow", other) - - def __rpow__(self, other): - if isinstance(other, array_sparse.SparseBlockArray): - return NotImplemented - other = self.check_or_convert_other(other) - return other**self - - __ipow__ = __pow__ - ################# # Inequalities ################# def __inequality__(self, op, other): other = self.check_or_convert_other(other) + if other is NotImplemented: + return NotImplemented assert ( other.shape == () or other.shape == self.shape ), "Currently supports comparison with scalars only." @@ -1039,127 +897,8 @@ def __inequality__(self, op, other): result.blocks[grid_entry] = self.blocks[grid_entry].bop( op, other_block, args={} ) - return result - def __ge__(self, other): - return self.__inequality__("ge", other) - - def __rge__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("ge", self) - - def __gt__(self, other): - return self.__inequality__("gt", other) - - def __rgt__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("gt", self) - - def __le__(self, other): - return self.__inequality__("le", other) - - def __rle__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("le", self) - - def __lt__(self, other): - return self.__inequality__("lt", other) - - def __rlt__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("lt", self) - - def __eq__(self, other): - return self.__inequality__("eq", other) - - def __req__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("eq", self) - - def __ne__(self, other): - return self.__inequality__("ne", other) - - def __rne__(self, other): - other = self.check_or_convert_other(other) - return other.__inequality__("ne", self) - - ################## - # Boolean - ################## - - # TODO (hme): Type check bool ops. - def __bool__(self): - # pylint: disable=no-member - if np.sum(self.shape) == len(self.shape): - # If all ones or scalar, then this is defined. - return self.get().__bool__() - return True - - def __invert__(self): - return self.ufunc("invert") - - def __or__(self, other): - return self.__elementwise__("bitwise_or", other) - - def __ror__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("bitwise_or", self) - - __ior__ = __or__ - - def __and__(self, other): - return self.__elementwise__("bitwise_and", other) - - def __rand__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("bitwise_and", self) - - __iand__ = __and__ - - def __xor__(self, other): - return self.__elementwise__("bitwise_xor", other) - - def __rxor__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("bitwise_xor", self) - - __ixor__ = __xor__ - - def __lshift__(self, other): - return self.__elementwise__("left_shift", other) - - def __rlshift__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("left_shift", self) - - __ilshift__ = __lshift__ - - def __rshift__(self, other): - return self.__elementwise__("right_shift", other) - - def __rrshift__(self, other): - other = self.check_or_convert_other(other) - return other.__elementwise__("right_shift", self) - - __irshift__ = __rshift__ - - # All operators: https://docs.python.org/3/library/operator.html - - def astype(self, dtype): - grid = ArrayGrid(self.shape, self.block_shape, dtype.__name__) - result = BlockArray(grid, self.km) - for grid_entry in result.grid.get_entry_iterator(): - result.blocks[grid_entry] = self.blocks[grid_entry].astype(dtype) - return result - - def flattened_oids(self): - oids = [] - for grid_entry in self.grid.get_entry_iterator(): - oid = self.blocks[grid_entry].oid - oids.append(oid) - return oids - class Reshape: @staticmethod diff --git a/nums/core/array/random.py b/nums/core/array/random.py index 3452860d..fb17e1d8 100644 --- a/nums/core/array/random.py +++ b/nums/core/array/random.py @@ -220,21 +220,21 @@ def sparse_randint( self, low, high=None, - dtype=int, + dtype=None, shape=None, block_shape=None, p=0.01, fill_value=0, ): if dtype is None: - dtype = np.float64 + dtype = np.int64 assert isinstance(dtype, type) return self._sparse_sample_basic( "randint", + {"low": low, "high": high, "dtype": dtype}, shape, block_shape, dtype, - {"low": low, "high": high, "dtype": dtype}, p, fill_value, ) @@ -251,10 +251,10 @@ def sparse_uniform( ): return self._sparse_sample_basic( "uniform", + {"low": low, "high": high}, shape, block_shape, dtype, - {"low": low, "high": high}, p, fill_value, ) @@ -271,10 +271,10 @@ def sparse_normal( ): return self._sparse_sample_basic( "normal", + {"loc": loc, "scale": scale}, shape, block_shape, dtype, - {"loc": loc, "scale": scale}, p, fill_value, ) @@ -282,10 +282,10 @@ def sparse_normal( def _sparse_sample_basic( self, rfunc_name, + rfunc_args: Dict, shape, block_shape, dtype, - rfunc_args: Dict, p, fill_value, ) -> SparseBlockArray: diff --git a/nums/core/array/sparse.py b/nums/core/array/sparse.py index fb71b0bd..c3e99c6c 100644 --- a/nums/core/array/sparse.py +++ b/nums/core/array/sparse.py @@ -1,17 +1,16 @@ from typing import List from nums.core.array import utils as array_utils -from nums.core.array.base import BlockArrayBase, Block +from nums.core.array.base import BlockBase, Block, BlockArrayBase from nums.core.array.blockarray import BlockArray from nums.core.kernel.kernel_manager import KernelManager from nums.core.grid.grid import ArrayGrid import numpy as np import itertools +import warnings import sparse -# TODO: merge with Block -# TODO: RHS binary operations -class SparseBlock(Block): +class SparseBlock(BlockBase): def __init__( self, grid_entry, @@ -29,14 +28,12 @@ def __init__( self.index_dtype = index_dtype self.oid = None self._nnz: object = None - self._nbytes: object = ( - None # TODO: implement as lazily fetched exact result, same as nnz - ) + self._nbytes: object = None @property def nnz(self): if self._nnz is None: - self._nnz = self._km.sparse_nnz( + self._nnz = self.km.sparse_nnz( self.oid, syskwargs={ "grid_entry": self.grid_entry, @@ -44,30 +41,26 @@ def nnz(self): }, ) if not array_utils.is_int(self._nnz): - self._nnz = self._km.get(self._nnz) + self._nnz = self.km.get(self._nnz) return self._nnz - # TODO: implement as lazily fetched exact result @property def nbytes(self): - return self._estimate_nbytes(format="coo") - - # TODO: deprecate - def _estimate_nbytes(self, format=None): - if format is None: - return self.nnz - elif format == "coo": - return ( - self.nnz * np.dtype(self.dtype).itemsize - + self.nnz * self.ndim * np.dtype(self.index_dtype).itemsize + if self._nbytes is None: + self._nbytes = self.km.sparse_nbytes( + self.oid, + syskwargs={ + "grid_entry": self.grid_entry, + "grid_shape": self.grid_shape, + }, ) + if not array_utils.is_int(self._nbytes): + self._nbytes = self.km.get(self._nbytes) + return self._nbytes def __repr__(self): return f"SparseBlock({self.oid})" - def size(self): - return np.product(self.shape) - def copy(self, shallow=True): assert shallow, "Only shallow copies are currently supported." block = SparseBlock( @@ -76,7 +69,7 @@ def copy(self, shallow=True): self.shape, self.dtype, self.transposed, - self._km, + self.km, ) block.oid = self.oid return block @@ -90,7 +83,7 @@ def transpose(self, defer=False, redistribute=False): shape=tuple(reversed(self.shape)), dtype=self.dtype, transposed=not self.transposed, - km=self._km, + km=self.km, ) blockT.oid = self.oid if not defer: @@ -102,13 +95,10 @@ def transpose(self, defer=False, redistribute=False): "grid_entry": self.grid_entry, "grid_shape": self.grid_shape, } - blockT.oid = self._km.transpose(self.oid, syskwargs=syskwargs) + blockT.oid = self.km.transpose(self.oid, syskwargs=syskwargs) return blockT - def ufunc(self, op_name, device=None): - return self.uop_map(op_name, device=device) - - def uop_map(self, op_name, args=None, kwargs=None, device=None): + def map_uop(self, op_name, args=None, kwargs=None, device=None): block = self.copy() block.dtype = array_utils.get_uop_output_type(op_name, self.dtype) args = () if args is None else args @@ -118,14 +108,15 @@ def uop_map(self, op_name, args=None, kwargs=None, device=None): else: syskwargs = {"device": device} block._device = device - block.oid = self._km.sparse_map_uop( + block.oid = self.km.sparse_map_uop( op_name, self.oid, args, kwargs, syskwargs=syskwargs ) - block._nnz = self._km.sparse_nnz(block.oid, syskwargs=syskwargs) + block._nnz = self.km.sparse_nnz(block.oid, syskwargs=syskwargs) + block._nbytes = self.km.sparse_nbytes(block.oid, syskwargs=syskwargs) block.fill_value = np.__getattribute__(op_name)(self.fill_value) return block - def _block_from_scalar(self, other): + def block_from_scalar(self, other): assert array_utils.is_scalar(other) block = SparseBlock( self.grid_entry, @@ -133,12 +124,11 @@ def _block_from_scalar(self, other): (1,), self.dtype, False, - self._km, + self.km, fill_value=other, ) - # TODO: generalize for different kernels - block.oid = self._km.put( - sparse.COO.from_numpy(np.array(other), fill_value=other), + block.oid = self.km.sparse_block_from_scalar( + other, syskwargs={ "grid_entry": self.grid_entry, "grid_shape": self.grid_shape, @@ -153,7 +143,7 @@ def init_block(op_name, block1, block2, args, device=None): result_grid_shape, result_shape, dtype, - ) = Block.block_meta(op_name, block1, block2, args) + ) = BlockBase.block_meta(op_name, block1, block2, args) fill_value = array_utils.get_bop_fill_value( op_name, block1.fill_value, block2.fill_value ) @@ -165,21 +155,32 @@ def init_block(op_name, block1, block2, args, device=None): dtype=dtype, transposed=False, fill_value=fill_value, - km=block1._km, + km=block1.km, ) block._device = device return block - def bop(self, op_name, other, args: dict, device=None): - if not isinstance(other, Block): - other = self._block_from_scalar(other) + def _check_bop_implemented(self, other): + if isinstance(other, BlockBase) or array_utils.is_scalar(other): + return True + return False + + @staticmethod + def binary_op(op_name, a: BlockBase, b: BlockBase, args: dict, device=None): + if isinstance(a, SparseBlock) and array_utils.is_scalar(b): + b = a.block_from_scalar(b) + elif isinstance(b, SparseBlock) and array_utils.is_scalar(a): + a = b.block_from_scalar(a) + if not isinstance(a, BlockBase) or not isinstance(b, BlockBase): + raise NotImplementedError() + densify = array_utils.get_sparse_bop_return_type( - op_name, self.fill_value, other.fill_value + op_name, a.fill_value, b.fill_value ) if densify: - block = Block.init_block(op_name, self, other, args, device) + block = Block.init_block(op_name, a, b, args, device) else: - block = SparseBlock.init_block(op_name, self, other, args, device) + block = SparseBlock.init_block(op_name, a, b, args, device) if device is None: syskwargs = { "grid_entry": block.grid_entry, @@ -187,30 +188,33 @@ def bop(self, op_name, other, args: dict, device=None): } else: syskwargs = {"device": device} - block.oid = self._km.sparse_bop( + block.oid = a.km.sparse_bop( op_name, - self.oid, - other.oid, - self.transposed, - other.transposed, + a.oid, + b.oid, + a.transposed, + b.transposed, axes=args.get("axes"), densify=densify, syskwargs=syskwargs, ) if not densify: - block._nnz = self._km.sparse_nnz(block.oid, syskwargs=syskwargs) + block._nnz = a.km.sparse_nnz(block.oid, syskwargs=syskwargs) + block._nbytes = a.km.sparse_nbytes(block.oid, syskwargs=syskwargs) return block + def bop(self, op_name, other, args: dict, device=None): + return self.binary_op(op_name, self, other, args, device) + # TODO: densify when fill_value != 0 def tensordot(self, other, axes): assert self.fill_value == 0 if not other.is_dense: assert other.fill_value == 0 - return self.bop("tensordot", other, args={"axes": axes}) + return self.binary_op("tensordot", self, other, args={"axes": axes}) -# TODO: merge with BlockArray, have decorator for dense-only methods? -class SparseBlockArray(BlockArray): +class SparseBlockArray(BlockArrayBase): def __init__( self, grid: ArrayGrid, @@ -218,15 +222,11 @@ def __init__( fill_value=0, blocks: np.ndarray = None, ): - self.grid = grid - self.km = km - self.shape = self.grid.shape - self.block_shape = self.grid.block_shape - self.grid_shape = self.grid.grid_shape - self.size = np.product(self.shape) - self.ndim = len(self.shape) - self.dtype = self.grid.dtype - self.blocks = blocks + if blocks is not None: + assert ( + blocks.dtype == SparseBlock + ), "SparseBlockArray must be initialized with SparseBlocks" + super().__init__(grid, km, blocks) if self.blocks is None: self.blocks = np.empty(shape=self.grid_shape, dtype=SparseBlock) for grid_entry in self.grid.get_entry_iterator(): @@ -247,10 +247,6 @@ def __init__( def nnz(self): return self._get_nnz() - @property - def nbytes(self): - return self._get_nbytes() - def _get_nnz(self): if self._nnz == -1: self._nnz = 0 @@ -258,6 +254,10 @@ def _get_nnz(self): self._nnz += self.blocks[grid_entry].nnz return self._nnz + @property + def nbytes(self): + return self._get_nbytes() + def _get_nbytes(self): if self._nbytes == -1: self._nbytes = 0 @@ -265,6 +265,9 @@ def _get_nbytes(self): self._nbytes += self.blocks[grid_entry].nbytes return self._nbytes + def __repr__(self): + return f"SparseBlockArray({self.blocks})" + @classmethod def from_np(cls, arr, block_shape, copy, km, fill_value=0): dtype_str = str(arr.dtype) @@ -357,6 +360,7 @@ def from_ba(cls, ba: BlockArrayBase, fill_value=0): syskwargs=syskwargs, ) sblock._nnz = sba.km.sparse_nnz(sblock.oid, syskwargs=syskwargs) + sblock._nbytes = sba.km.sparse_nbytes(sblock.oid, syskwargs=syskwargs) sba.fill_value = fill_value return sba @@ -383,21 +387,12 @@ def copy(self): rarr_copy.blocks[grid_entry] = self.blocks[grid_entry].copy() return rarr_copy - @staticmethod - def to_block_array(obj, km: KernelManager, block_shape=None): - if isinstance(obj, (BlockArray, SparseBlockArray)): - return obj - if isinstance(obj, np.ndarray): - np_array = obj - elif isinstance(obj, list): - np_array = np.array(obj) - elif array_utils.is_scalar(obj): - return SparseBlockArray.from_scalar(obj, km) - else: - raise Exception("Unsupported type %s" % type(obj)) - if block_shape is None: - block_shape = km.get_block_shape(np_array.shape, np_array.dtype) - return BlockArray.from_np(np_array, block_shape, False, km) + def astype(self, dtype): + grid = ArrayGrid(self.shape, self.block_shape, dtype.__name__) + result = BlockArray(grid, self.km) + for grid_entry in result.grid.get_entry_iterator(): + result.blocks[grid_entry] = self.blocks[grid_entry].astype(dtype) + return result def transpose(self, defer=False, redistribute=False): if defer and redistribute: @@ -414,10 +409,33 @@ def transpose(self, defer=False, redistribute=False): ) return rarrT + @staticmethod + def to_block_array(obj, km: KernelManager, block_shape=None): + if isinstance(obj, (BlockArray, SparseBlockArray)): + return obj + if isinstance(obj, np.ndarray): + np_array = obj + elif isinstance(obj, list): + np_array = np.array(obj) + elif array_utils.is_scalar(obj): + return SparseBlockArray.from_scalar(obj, km) + else: + raise Exception("Unsupported type %s" % type(obj)) + if block_shape is None: + block_shape = km.get_block_shape(np_array.shape, np_array.dtype) + return BlockArray.from_np(np_array, block_shape, False, km) + def check_or_convert_other(self, other, compute_block_shape=False): block_shape = None if compute_block_shape else self.block_shape return SparseBlockArray.to_block_array(other, self.km, block_shape=block_shape) + def _check_bop_implemented(self, other): + if isinstance( + other, (BlockArrayBase, np.ndarray, list) + ) or array_utils.is_scalar(other): + return True + return False + def ufunc(self, op_name): result = self.copy() for grid_entry in self.grid.get_entry_iterator(): @@ -425,6 +443,197 @@ def ufunc(self, op_name): func = np.__getattribute__(op_name) result.fill_value = func(self.fill_value) result._nnz = -1 + result._nbytes = -1 + return result + + ################# + # Arithmetic + ################# + + @staticmethod + def elementwise(op_name, a, b): + if isinstance(a, SparseBlockArray): + b = a.check_or_convert_other(b) + elif isinstance(b, SparseBlockArray): + a = b.check_or_convert_other(a) + else: + raise NotImplementedError() + + densify = array_utils.get_sparse_bop_return_type( + op_name, + a.fill_value, + b.fill_value, + ) + if a.shape == b.shape and a.block_shape == b.block_shape: + return SparseBlockArray._fast_elementwise(op_name, a, b, densify) + else: + blocks_op = a.blocks.__getattribute__(f"__{op_name}__") + if densify: + result = BlockArray.from_blocks( + blocks_op(b.blocks), + result_shape=None, + km=a.km, + ) + else: + fill_value = array_utils.get_bop_fill_value( + op_name, a.fill_value, b.fill_value + ) + result = SparseBlockArray.from_blocks( + blocks_op(b.blocks), + result_shape=None, + fill_value=fill_value, + km=a.km, + ) + return result + + @staticmethod + def _fast_elementwise(op_name, a, b, densify): + # a, b have the same grid_shape and block_shape + dtype = array_utils.get_bop_output_type(op_name, a.dtype, b.dtype) + if densify: + block_type = Block + fill_value = None + else: + block_type = SparseBlock + fill_value = array_utils.get_bop_fill_value( + op_name, a.fill_value, b.fill_value + ) + blocks = np.empty(shape=a.grid_shape, dtype=block_type) + for grid_entry in a.grid.get_entry_iterator(): + a_block: BlockBase = a.blocks[grid_entry] + b_block: BlockBase = b.blocks[grid_entry] + blocks[grid_entry] = block_type( + grid_entry, + a_block.grid_shape, + a_block.shape, + dtype, + transposed=False, + km=a.km, + ) + blocks[grid_entry].oid = a.km.sparse_bop( + op_name, + a_block.oid, + b_block.oid, + a_block.transposed, + b_block.transposed, + axes={}, + densify=densify, + syskwargs={ + "grid_entry": grid_entry, + "grid_shape": a.grid.grid_shape, + }, + ) + grid = ArrayGrid(a.shape, a.block_shape, dtype.__name__) + if densify: + return BlockArray(grid, a.km, blocks=blocks) + else: + return SparseBlockArray(grid, a.km, fill_value, blocks=blocks) + + ################# + # Linear Algebra + ################# + + @staticmethod + def tensordot(a, b, axes=2): + if isinstance(axes, int): + pass + elif array_utils.is_array_like(axes): + raise NotImplementedError("Non-integer axes is currently not supported.") + else: + raise TypeError(f"Unexpected axes type '{type(axes).__name__}'") + + if isinstance(a, SparseBlockArray): + b = a.check_or_convert_other(b, compute_block_shape=True) + elif isinstance(b, SparseBlockArray): + a = b.check_or_convert_other(a, compute_block_shape=True) + else: + raise NotImplementedError() + + # PyData/Sparse only works with fill_value == 0 + # TODO: densify when fill_value != 0 + if not (a.is_dense or b.is_dense): + assert ( + a.fill_value == 0 and b.fill_value == 0 + ), "Sparse-sparse tensordot with non-zero fill value is not supported." + + if array_utils.np_tensordot_param_test(a.shape, a.ndim, b.shape, b.ndim, axes): + raise ValueError("shape-mismatch for sum") + + densify = array_utils.get_sparse_bop_return_type( + "tensordot", + a.fill_value, + b.fill_value, + ) + + if axes > 0: + a_axes = a.grid.grid_shape[:-axes] + a_sum_axes = a.grid.grid_shape[-axes:] + b_axes = b.grid.grid_shape[axes:] + b_sum_axes = b.grid.grid_shape[:axes] + assert a_sum_axes == b_sum_axes + result_shape = tuple(a.shape[:-axes] + b.shape[axes:]) + result_block_shape = tuple(a.block_shape[:-axes] + b.block_shape[axes:]) + else: + a_axes = a.grid.grid_shape + b_axes = b.grid.grid_shape + a_sum_axes = () + result_shape = tuple(a.shape + b.shape) + result_block_shape = tuple(a.block_shape + b.block_shape) + + result_grid = ArrayGrid( + shape=result_shape, + block_shape=result_block_shape, + dtype=array_utils.get_bop_output_type( + "tensordot", a.dtype, b.dtype + ).__name__, + ) + assert result_grid.grid_shape == tuple(a_axes + b_axes) + if densify: + result = BlockArray(result_grid, a.km) + else: + result = SparseBlockArray(result_grid, a.km, a.fill_value) + a_dims = list(itertools.product(*map(range, a_axes))) + b_dims = list(itertools.product(*map(range, b_axes))) + sum_dims = list(itertools.product(*map(range, a_sum_axes))) + for i in a_dims: + for j in b_dims: + grid_entry = tuple(i + j) + result_block: Block = result.blocks[grid_entry] + sum_oids = [] + for k in sum_dims: + a_block: Block = a.blocks[tuple(i + k)] + b_block: Block = b.blocks[tuple(k + j)] + dot_grid_args = a._compute_tensordot_syskwargs(a_block, b_block) + dotted_oid = a.km.sparse_bop( + "tensordot", + a_block.oid, + b_block.oid, + a_block.transposed, + b_block.transposed, + axes=axes, + densify=densify, + syskwargs={ + "grid_entry": dot_grid_args[0], + "grid_shape": dot_grid_args[1], + }, + ) + sum_oids.append( + (dotted_oid, dot_grid_args[0], dot_grid_args[1], False) + ) + result_block.oid = a.tree_reduce( + "sum", sum_oids, result_block.grid_entry, result_block.grid_shape + ) + if not densify: + syskwargs = { + "grid_entry": result_block.grid_entry, + "grid_shape": result_block.grid_shape, + } + result_block._nnz = a.km.sparse_nnz( + result_block.oid, syskwargs=syskwargs + ) + result_block._nbytes = a.km.sparse_nbytes( + result_block.oid, syskwargs=syskwargs + ) return result def sdtp(self, *block_arrays: List[BlockArray]): @@ -485,11 +694,11 @@ def sdtd(self, x: BlockArray, y: BlockArray, axes: int): assert result_grid.grid_shape == self.grid_shape assert result_grid.block_shape == self.block_shape result: SparseBlockArray = SparseBlockArray(self.grid, self.km, self.fill_value) - this_dims = list(itertools.product(*map(range, x_axes))) - other_dims = list(itertools.product(*map(range, y_axes))) + x_dims = list(itertools.product(*map(range, x_axes))) + y_dims = list(itertools.product(*map(range, y_axes))) sum_dims = tuple([0] * axes) - for i in this_dims: - for j in other_dims: + for i in x_dims: + for j in y_dims: grid_entry = tuple(i + j) x_block: Block = x.blocks[tuple(i + sum_dims)] y_block: Block = y.blocks[tuple(sum_dims + j)] @@ -505,217 +714,15 @@ def sdtd(self, x: BlockArray, y: BlockArray, axes: int): ) return result - def _fast_elementwise(self, op_name, other, densify): - dtype = array_utils.get_bop_output_type(op_name, self.dtype, other.dtype) - if densify: - blocks = np.empty(shape=self.grid_shape, dtype=Block) - else: - blocks = np.empty(shape=self.grid_shape, dtype=SparseBlock) - for grid_entry in self.grid.get_entry_iterator(): - self_block: SparseBlock = self.blocks[grid_entry] - other_block: Block = other.blocks[grid_entry] - blocks[grid_entry] = block = self_block.bop(op_name, other_block, args={}) - block.oid = self.km.sparse_bop( - op_name, - self_block.oid, - other_block.oid, - self_block.transposed, - other_block.transposed, - axes={}, - densify=densify, - syskwargs={ - "grid_entry": grid_entry, - "grid_shape": self.grid.grid_shape, - }, - ) - grid = ArrayGrid(self.shape, self.block_shape, dtype.__name__) - if densify: - return BlockArray(grid, self.km, blocks=blocks) - else: - fill_value = array_utils.get_bop_fill_value( - op_name, self.fill_value, other.fill_value - ) - result = SparseBlockArray(grid, self.km, fill_value, blocks=blocks) - return result - - def __elementwise__(self, op_name, other): - other = self.check_or_convert_other(other) - densify = array_utils.get_sparse_bop_return_type( - op_name, - self.fill_value, - other.fill_value, - ) - if self.shape == other.shape and self.block_shape == other.block_shape: - return self._fast_elementwise(op_name, other, densify) - blocks_op = self.blocks.__getattribute__("__%s__" % op_name) - if densify: - result = BlockArray.from_blocks( - blocks_op(other.blocks), - result_shape=None, - km=self.km, - ) - else: - fill_value = array_utils.get_bop_fill_value( - op_name, self.fill_value, other.fill_value - ) - result = SparseBlockArray.from_blocks( - blocks_op(other.blocks), - result_shape=None, - fill_value=fill_value, - km=self.km, - ) - return result - - def tensordot(self, other, axes=2): - if isinstance(axes, int): - pass - elif array_utils.is_array_like(axes): - raise NotImplementedError("Non-integer axes is currently not supported.") - else: - raise TypeError(f"Unexpected axes type '{type(axes).__name__}'") - - other = self.check_or_convert_other(other, compute_block_shape=True) - - if array_utils.np_tensordot_param_test( - self.shape, self.ndim, other.shape, other.ndim, axes - ): - raise ValueError("shape-mismatch for sum") - - # Pydata/Sparse only works with fill_value == 0 - # TODO: densify when fill_value != 0 - assert self.fill_value == 0 - if isinstance(other, SparseBlockArray): - assert other.fill_value == 0, "Sparse-dense tensordot may not be tractable." - densify = array_utils.get_sparse_bop_return_type( - "tensordot", - self.fill_value, - other.fill_value, - ) - - if axes > 0: - this_axes = self.grid.grid_shape[:-axes] - this_sum_axes = self.grid.grid_shape[-axes:] - other_axes = other.grid.grid_shape[axes:] - other_sum_axes = other.grid.grid_shape[:axes] - assert this_sum_axes == other_sum_axes - result_shape = tuple(self.shape[:-axes] + other.shape[axes:]) - result_block_shape = tuple( - self.block_shape[:-axes] + other.block_shape[axes:] - ) - else: - this_axes = self.grid.grid_shape - other_axes = other.grid.grid_shape - this_sum_axes = () - result_shape = tuple(self.shape + other.shape) - result_block_shape = tuple(self.block_shape + other.block_shape) - - result_grid = ArrayGrid( - shape=result_shape, - block_shape=result_block_shape, - dtype=array_utils.get_bop_output_type( - "tensordot", self.dtype, other.dtype - ).__name__, - ) - assert result_grid.grid_shape == tuple(this_axes + other_axes) - if densify: - result = BlockArray(result_grid, self.km) - else: - result = SparseBlockArray(result_grid, self.km, self.fill_value) - this_dims = list(itertools.product(*map(range, this_axes))) - other_dims = list(itertools.product(*map(range, other_axes))) - sum_dims = list(itertools.product(*map(range, this_sum_axes))) - for i in this_dims: - for j in other_dims: - grid_entry = tuple(i + j) - result_block: Block = result.blocks[grid_entry] - sum_oids = [] - for k in sum_dims: - self_block: Block = self.blocks[tuple(i + k)] - other_block: Block = other.blocks[tuple(k + j)] - dot_grid_args = self._compute_tensordot_syskwargs( - self_block, other_block - ) - dotted_oid = self.km.sparse_bop( - "tensordot", - self_block.oid, - other_block.oid, - self_block.transposed, - other_block.transposed, - axes=axes, - densify=densify, - syskwargs={ - "grid_entry": dot_grid_args[0], - "grid_shape": dot_grid_args[1], - }, - ) - sum_oids.append( - (dotted_oid, dot_grid_args[0], dot_grid_args[1], False) - ) - result_block.oid = self.tree_reduce( - "sum", sum_oids, result_block.grid_entry, result_block.grid_shape - ) - if not densify: - result_block._nnz = self.km.sparse_nnz( - result_block.oid, - syskwargs={ - "grid_entry": result_block.grid_entry, - "grid_shape": result_block.grid_shape, - }, - ) - return result - - def __add__(self, other): - return self.__elementwise__("add", other) - - def __radd__(self, other): - return self.__elementwise__("add", other) - - __iadd__ = __add__ - - def __sub__(self, other): - return self.__elementwise__("sub", other) - - def __rsub__(self, other): - # FIXME: not commutative - return self.__elementwise__("sub", other) - - __isub__ = __sub__ - - def __mul__(self, other): - return self.__elementwise__("mul", other) - - def __rmul__(self, other): - return self.__elementwise__("mul", other) - - __imul__ = __mul__ - - def __truediv__(self, other): - return self.__elementwise__("truediv", other) - - def __rtruediv__(self, other): - return self.__elementwise__("truediv", other) - - __itruediv__ = __truediv__ - - def __floordiv__(self, other): - return self.__elementwise__("floordiv", other) - - def __rfloordiv__(self, other): - return self.__elementwise__("floordiv", other) - - __ifloordiv__ = __floordiv__ - - def __pow__(self, other): - return self.__elementwise__("pow", other) - - def __rpow__(self, other): - return self.__elementwise__("pow", other) - - __ipow__ = __pow__ + ################# + # Inequalities + ################# def __inequality__(self, op_name, other): other = self.check_or_convert_other(other) - assert other.shape == (), "Currently supports comparison with scalars only." + assert ( + other.shape == () or other.shape == self.shape + ), "Currently supports comparison with scalars only." shape = array_utils.broadcast(self.shape, other.shape).shape block_shape = array_utils.broadcast_block_shape( self.shape, other.shape, self.block_shape diff --git a/nums/core/array/utils.py b/nums/core/array/utils.py index 8cefd766..872f5d9d 100644 --- a/nums/core/array/utils.py +++ b/nums/core/array/utils.py @@ -22,8 +22,6 @@ from nums.core.settings import np_ufunc_map from nums.core.array.errors import AxisError -from nums.core.array.base import Block -from nums.core.array.sparse import SparseBlock # pylint: disable = no-member, trailing-whitespace @@ -455,29 +453,6 @@ def normalize_axis_index(axis, ndim): return axis % ndim -# def get_sparse_bop_return_type(op_name, a: Block, b: Block): -# def sample_array(block): -# s = np.eye(2) -# if isinstance(block, SparseBlock): -# return sparse.COO.from_numpy(s, fill_value=block.fill_value) -# return s - -# sa = sample_array(a) -# sb = sample_array(b) -# if op_name == "tensordot": -# result = sparse.tensordot(sa, sb) -# else: -# op_name = np_ufunc_map.get(op_name, op_name) -# try: -# ufunc = np.__getattribute__(op_name) -# except Exception as _: -# ufunc = scipy.special.__getattribute__(op_name) -# result = sparse.elemwise(ufunc, sa, sb) -# if isinstance(result, sparse.SparseArray): -# return False -# return True - - def get_sparse_bop_return_type(op_name, a_fv, b_fv): def sample_array(fv): s = np.eye(2) diff --git a/nums/core/array/view.py b/nums/core/array/view.py index a9186c9c..f2e20ccb 100644 --- a/nums/core/array/view.py +++ b/nums/core/array/view.py @@ -13,7 +13,7 @@ # limitations under the License. -from typing import Tuple +from typing import Tuple, Type import numpy as np @@ -39,6 +39,7 @@ def from_subscript(cls, bab, subscript): def __init__(self, source, sel: BasicSelection = None, block_shape: tuple = None): self._source: BlockArrayBase = source self._km: KernelManager = self._source.km + self._concrete_cls = type(source) if sel is None: sel = BasicSelection.from_shape(self._source.shape) @@ -84,7 +85,7 @@ def basic_select(self, subscript: tuple): result: ArrayView = ArrayView(self._source, sel) return result - def create(self, concrete_cls=None) -> BlockArrayBase: + def create(self) -> BlockArrayBase: if self.sel.basic_steps(): if self.sel.is_aligned(self._source.block_shape): # Assertion below should form a conjunction with the above condition. @@ -95,20 +96,21 @@ def create(self, concrete_cls=None) -> BlockArrayBase: self.sel.get_broadcastable_block_shape(self.block_shape), self._source.block_shape, ) - return self.create_references(concrete_cls) + return self.create_references(self._concrete_cls) else: - return self.create_basic_single_step(concrete_cls) + return self.create_basic_single_step(self._concrete_cls) else: - return self.create_basic_multi_step(concrete_cls) + return self.create_basic_multi_step(self._concrete_cls) def create_references(self, concrete_cls) -> BlockArrayBase: # TODO (hme): Double check this. - array_cls = BlockArrayBase if concrete_cls is None else concrete_cls - dst_ba: BlockArrayBase = array_cls(self.grid, self._km) + # array_cls = BlockArrayBase if concrete_cls is None else concrete_cls + # dst_ba: BlockArrayBase = array_cls(self.grid, self._km) + dst_ba: BlockArrayBase = concrete_cls(self.grid, self._km) if 0 in self.shape: return dst_ba grid_offset = self.sel.position().value // np.array( - self._source.block_shape, dtype=np.int + self._source.block_shape, dtype=np.intp ) dst_inflated_shape = self.sel.get_broadcastable_shape() dst_inflated_block_shape = self.sel.get_broadcastable_block_shape( @@ -123,7 +125,9 @@ def create_references(self, concrete_cls) -> BlockArrayBase: ): dst_grid_entry = dst_grid_entry_iterator[dst_index] src_grid_entry = tuple( - (np.array(dst_inflated_grid_entry, dtype=np.int) + grid_offset).tolist() + ( + np.array(dst_inflated_grid_entry, dtype=np.intp) + grid_offset + ).tolist() ) dst_ba.blocks[dst_grid_entry].oid = self._source.blocks[src_grid_entry].oid dst_ba.blocks[dst_grid_entry].transposed = self._source.blocks[ @@ -261,7 +265,7 @@ def assign_references(self, dst_sel: BasicSelection, value): # but the destination selection may not have the same shape as value. # May need to broadcast value to destination selection output shape. dst_offset = dst_sel.position().value // np.array( - self._source.block_shape, dtype=np.int + self._source.block_shape, dtype=np.intp ) # Do we need to broadcast? if isinstance(value, ArrayView) and ( @@ -273,7 +277,7 @@ def assign_references(self, dst_sel: BasicSelection, value): # We don't need to create value to perform the reference copy. # No broadcasting required, so this should be okay. src_offset = value.sel.position().value // np.array( - value._source.block_shape, dtype=np.int + value._source.block_shape, dtype=np.intp ) src_inflated_shape = dst_sel.get_broadcastable_shape() src_inflated_block_shape = dst_sel.get_broadcastable_block_shape( @@ -286,12 +290,12 @@ def assign_references(self, dst_sel: BasicSelection, value): # Num axes in value grid may be too small. dst_grid_entry = tuple( ( - np.array(src_grid_entry_inflated, dtype=np.int) + dst_offset + np.array(src_grid_entry_inflated, dtype=np.intp) + dst_offset ).tolist() ) src_grid_entry = tuple( ( - np.array(src_grid_entry_inflated, dtype=np.int) + src_offset + np.array(src_grid_entry_inflated, dtype=np.intp) + src_offset ).tolist() ) # This is a reference assignment, and the grid properties between the @@ -322,7 +326,7 @@ def assign_references(self, dst_sel: BasicSelection, value): src_grid_entry = src_grid_entry_iterator[src_index] dst_grid_entry = tuple( ( - np.array(src_grid_entry_inflated, dtype=np.int) + dst_offset + np.array(src_grid_entry_inflated, dtype=np.intp) + dst_offset ).tolist() ) # This is a reference assignment, and the grid properties between the diff --git a/nums/core/kernel/kernel_interface.py b/nums/core/kernel/kernel_interface.py index 04bd194d..79c2be92 100644 --- a/nums/core/kernel/kernel_interface.py +++ b/nums/core/kernel/kernel_interface.py @@ -198,6 +198,9 @@ def sparse_map_uop(self, op_name, arr, args, kwargs, syskwargs: Dict): def sparse_bop(self, op, a1, a2, a1_T, a2_T, axes, densify, syskwargs: Dict): raise NotImplementedError() + def sparse_block_from_scalar(self, x, syskwargs: Dict): + raise NotImplementedError() + def sdtp(self, s, *dense_arrays, syskwargs: Dict): raise NotImplementedError() diff --git a/nums/core/kernel/numpy_kernel.py b/nums/core/kernel/numpy_kernel.py index ead191c7..ef671a5e 100644 --- a/nums/core/kernel/numpy_kernel.py +++ b/nums/core/kernel/numpy_kernel.py @@ -435,6 +435,7 @@ def bop(self, op, a1, a2, a1_T, a2_T, axes): ufunc = scipy.special.__getattribute__(op) return ufunc(a1, a2) + # Works for sparse too. def bop_reduce(self, op, a1, a2, a1_T, a2_T): if a1_T: a1 = a1.T @@ -561,10 +562,7 @@ def sparse_random_block( format="coo", fill_value=fill_value, ) - if rfunc_name != "randint": - # Only random and integer supports sampling of a specific type. - result = result.astype(dtype) - return result + return result.astype(dtype) def sparse_map_uop(self, op_name, arr, args, kwargs): """ @@ -605,6 +603,10 @@ def sparse_bop(self, op, a1, a2, a1_T, a2_T, axes, densify): assert isinstance(result, sparse.SparseArray) return result + def sparse_block_from_scalar(self, x): + assert np.isscalar(x) + return sparse.COO.from_numpy(np.array(x), fill_value=x) + def sdtp(self, s: sparse.COO, *dense_arrays): data = np.copy(s.data) for position in range(s.nnz): diff --git a/nums/experimental/optimizer/graph.py b/nums/experimental/optimizer/graph.py index 3ac3f3df..5c505340 100644 --- a/nums/experimental/optimizer/graph.py +++ b/nums/experimental/optimizer/graph.py @@ -443,9 +443,9 @@ def _collapse(self, device: Device): shape=self.shape(), dtype=self.dtype(), transposed=False, - km=child_block._km, + km=child_block.km, ) - block.oid = child_block._km.reduce_axis( + block.oid = child_block.km.reduce_axis( op_name=op_name, arr=child_block.oid, axis=self.axis, @@ -1006,12 +1006,12 @@ def _collapse(self, device: Device): assert isinstance(child, Leaf) block_oids.append(child.block.oid) if km is None: - km = child.block._km + km = child.block.km block: Block = Block( self._grid_entry, self._grid_shape, self._shape, self._dtype, False, km ) block._device = device - block.oid = block._km.call( + block.oid = block.km.call( self.op_hash, *block_oids, syskwargs={"device": device} ) leaf: Leaf = Leaf(self.cluster_state) @@ -1203,12 +1203,12 @@ def _collapse(self, device: Device): assert isinstance(child, Leaf) block_oids.append(child.block.oid) if km is None: - km = child.block._km + km = child.block.km block: Block = Block( self.grid_entry(), self.grid_shape(), self.shape(), self.dtype(), False, km ) block._device = device - block.oid = block._km.einsum( + block.oid = block.km.einsum( self.subscript, *block_oids, syskwargs={"device": device} ) leaf: Leaf = Leaf(self.cluster_state) diff --git a/nums/experimental/optimizer/grapharray.py b/nums/experimental/optimizer/grapharray.py index 5f604118..f03a4163 100644 --- a/nums/experimental/optimizer/grapharray.py +++ b/nums/experimental/optimizer/grapharray.py @@ -176,28 +176,29 @@ def to_blocks(self) -> np.ndarray: def other_to_ga(self, other): return GraphArray.to_ga(other, self.cluster_state, self.km, self.copy_on_op) - def tensordot(self, other, axes=2): - other = self.other_to_ga(other) + @staticmethod + def tensordot(a, b, axes=2): + b = a.other_to_ga(b) # TODO: Reuse BlockArrayBase tensordot operator. - this_axes = self.grid.grid_shape[:-axes] - this_sum_axes = self.grid.grid_shape[-axes:] - other_axes = other.grid.grid_shape[axes:] - other_sum_axes = other.grid.grid_shape[:axes] - assert this_sum_axes == other_sum_axes - result_shape = tuple(self.shape[:-axes] + other.shape[axes:]) - result_block_shape = tuple(self.block_shape[:-axes] + other.block_shape[axes:]) + a_axes = a.grid.grid_shape[:-axes] + a_sum_axes = a.grid.grid_shape[-axes:] + b_axes = b.grid.grid_shape[axes:] + b_sum_axes = b.grid.grid_shape[:axes] + assert a_sum_axes == b_sum_axes + result_shape = tuple(a.shape[:-axes] + b.shape[axes:]) + result_block_shape = tuple(a.block_shape[:-axes] + b.block_shape[axes:]) result_grid = ArrayGrid( shape=result_shape, block_shape=result_block_shape, - dtype=self.dtype.__name__, + dtype=a.dtype.__name__, ) - assert result_grid.grid_shape == tuple(this_axes + other_axes) + assert result_grid.grid_shape == tuple(a_axes + b_axes) result_graphs = np.empty(shape=result_grid.grid_shape, dtype=np.object) - this_dims = list(itertools.product(*map(range, this_axes))) - other_dims = list(itertools.product(*map(range, other_axes))) - sum_dims = list(itertools.product(*map(range, this_sum_axes))) - for i in this_dims: - for j in other_dims: + a_dims = list(itertools.product(*map(range, a_axes))) + b_dims = list(itertools.product(*map(range, b_axes))) + sum_dims = list(itertools.product(*map(range, a_sum_axes))) + for i in a_dims: + for j in b_dims: # A \in \R^{I \times K} # B \in \R^{K \times J} # C \in \R^{I \times J} @@ -205,20 +206,20 @@ def tensordot(self, other, axes=2): grid_entry = tuple(i + j) if len(sum_dims) == 1: k = sum_dims[0] - self_node: TreeNode = self.graphs[tuple(i + k)] - other_node: TreeNode = other.graphs[tuple(k + j)] - dot_node: TreeNode = self_node.tensordot(other_node, axes=axes) + a_node: TreeNode = a.graphs[tuple(i + k)] + b_node: TreeNode = b.graphs[tuple(k + j)] + dot_node: TreeNode = a_node.tensordot(b_node, axes=axes) result_graphs[grid_entry] = dot_node else: - rop = TreeReductionOp(self.cluster_state) + rop = TreeReductionOp(a.cluster_state) rop.set_grid_entry(grid_entry) rop.set_grid_shape(result_grid.grid_shape) rop.op_name = "sum" - rop.copy_on_op = self.copy_on_op + rop.copy_on_op = a.copy_on_op for k in sum_dims: - self_node: TreeNode = self.graphs[tuple(i + k)] - other_node: TreeNode = other.graphs[tuple(k + j)] - dot_node: TreeNode = self_node.tensordot(other_node, axes=axes) + a_node: TreeNode = a.graphs[tuple(i + k)] + b_node: TreeNode = b.graphs[tuple(k + j)] + dot_node: TreeNode = a_node.tensordot(b_node, axes=axes) # Explicitly add parent here, since sum depends on prod. # Not needed for other ops; make_bop takes care of it. # We don't need to copy the node here since the local @@ -229,14 +230,14 @@ def tensordot(self, other, axes=2): return GraphArray( result_grid, - self.cluster_state, + a.cluster_state, result_graphs, - self.km, - copy_on_op=self.copy_on_op, + a.km, + copy_on_op=a.copy_on_op, ) def __matmul__(self, other): - return self.tensordot(other, axes=1) + return self.tensordot(self, other, axes=1) def ga_from_arr(self, arr: Union[TreeNode, np.ndarray], result_shape: tuple): if isinstance(arr, TreeNode): diff --git a/nums/experimental/optimizer/reduction_ops.py b/nums/experimental/optimizer/reduction_ops.py index d5879d15..1a363513 100644 --- a/nums/experimental/optimizer/reduction_ops.py +++ b/nums/experimental/optimizer/reduction_ops.py @@ -266,7 +266,7 @@ def _collapse(self, device: Device, left: Leaf, right: Leaf): block: Block = lblock.copy() block.transposed = False block.dtype = array_utils.get_reduce_output_type(self.op_name, lblock.dtype) - block.oid = lblock._km.bop_reduce( + block.oid = lblock.km.bop_reduce( op_name, lblock.oid, rblock.oid, diff --git a/tests/core/array/test_bop.py b/tests/core/array/test_bop.py index d14da1b0..166c4a8e 100644 --- a/tests/core/array/test_bop.py +++ b/tests/core/array/test_bop.py @@ -69,7 +69,7 @@ def test_tensordot_basic(app_inst: ArrayApplication): shape = 2, 4, 10, 15 npX = np.arange(np.product(shape)).reshape(*shape) rX = app_inst.array(npX, block_shape=(1, 2, 10, 3)) - rResult = rX.T.tensordot(rX, axes=1) + rResult = rX.tensordot(rX.T, rX, axes=1) assert np.allclose(rResult.get(), (np.tensordot(npX.T, npX, axes=1))) common.check_block_integrity(rResult) @@ -93,7 +93,7 @@ def test_tensordot_large_shape(app_inst: ArrayApplication): block_a = app_inst.array(a, block_shape=(30, 5, 3, 2)) block_b = app_inst.array(b, block_shape=(2, 3, 5, 25)) - block_c = block_a.tensordot(block_b, axes=1) + block_c = block_a.tensordot(block_a, block_b, axes=1) assert np.allclose(block_c.get(), c) common.check_block_integrity(block_c) @@ -129,7 +129,7 @@ def test_tensordot_all_shapes(app_inst: ArrayApplication): ) block_a = app_inst.array(a, block_shape=a_block_shape) block_b = app_inst.array(b, block_shape=b_block_shape) - block_c = block_a.tensordot(block_b, axes=axes) + block_c = block_a.tensordot(block_a, block_b, axes=axes) assert np.allclose(block_c.get(), c) common.check_block_integrity(block_c) diff --git a/tests/core/array/test_sparse.py b/tests/core/array/test_sparse.py index 4ef9bc06..85f0254b 100644 --- a/tests/core/array/test_sparse.py +++ b/tests/core/array/test_sparse.py @@ -34,14 +34,18 @@ def test_from_coo(app_inst: ArrayApplication): def test_sparse_random(app_inst: ArrayApplication): rs: NumsRandomState = app_inst.random_state(1337) x_sba = rs.sparse_randint( - 1, high=5, dtype=int, shape=(15, 10), block_shape=(5, 5), p=0.1, fill_value=0 + 1, + high=5, + dtype=np.int64, + shape=(100, 50), + block_shape=(50, 50), + p=0.1, + fill_value=0, ) x_ba = x_sba.to_ba() x_np = x_ba.get() x_sp = sparse.COO.from_numpy(x_np, fill_value=0) assert x_sba.nnz == x_sp.nnz - assert x_sba.nbytes == x_sp.nbytes - print(x_np) def test_sparse_uop(app_inst: ArrayApplication): @@ -73,17 +77,27 @@ def test_sparse_add(app_inst: ArrayApplication): y_ba = y_sba.to_ba() assert np.array_equal(x1 + x2, y_ba.get()) - # Test sparse-dense. + # Test dense-sparse. y_ba = x2_ba + x1_sba # __radd__ assert np.array_equal(x2 + x1_sp, y_ba.get()) + y_ba = x1_sba - x2_ba # __sub__ + assert np.array_equal(x1_sp - x2, y_ba.get()) + y_ba = x2_ba - x1_sba # __rsub__ + assert np.array_equal(x2 - x1_sp, y_ba.get()) # Test sparse-scalar. - y_sp = x1_sp - 1 + y_sp = x1_sp - 1 # __sub__ y_sba = x1_sba - 1 assert y_sba.fill_value == y_sp.fill_value # 4 assert y_sba.nnz == y_sp.nnz # 16 y_ba = y_sba.to_ba() assert np.array_equal(x1 - 1, y_ba.get()) + y_sp = 1 - x1_sp # __rsub__ + y_sba = 1 - x1_sba + assert y_sba.fill_value == y_sp.fill_value # 4 + assert y_sba.nnz == y_sp.nnz # 16 + y_ba = y_sba.to_ba() + assert np.array_equal(1 - x1, y_ba.get()) def test_sparse_mul(app_inst: ArrayApplication): @@ -138,7 +152,7 @@ def test_neq(app_inst: ArrayApplication): def test_tensordot(app_inst: ArrayApplication): - x1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [2, 2, 0, 0], [2, 2, 0, 0]]) + x1 = np.array([[0, 0, 1, 2], [0, 0, 3, 4], [5, 6, 0, 0], [7, 8, 0, 0]]) x2 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [2, 2, 2, 2], [2, 2, 2, 2]]) x1_sp = sparse.COO.from_numpy(x1, fill_value=0) x2_sp = sparse.COO.from_numpy(x2, fill_value=0) @@ -147,12 +161,15 @@ def test_tensordot(app_inst: ArrayApplication): x1_sba = SparseBlockArray.from_ba(x1_ba, fill_value=0) x2_sba = SparseBlockArray.from_ba(x2_ba, fill_value=0) y_sp = sparse.tensordot(x1_sp, x2_sp, axes=1) - y_sba = x1_sba.tensordot(x2_sba, axes=1) + y_sba = x1_sba @ x2_sba assert y_sba.fill_value == y_sp.fill_value # 0 assert y_sba.nnz == y_sp.nnz # y_ba = y_sba.to_ba() assert np.array_equal(np.tensordot(x1, x2, axes=1), y_ba.get()) + y_ba = x1_ba @ x2_sba # __rmatmul__ + assert np.array_equal(np.tensordot(x1, x2, axes=1), y_ba.get()) + def test_sdtp(app_inst: ArrayApplication): shape = 50, 50, 50 diff --git a/tests/experimental/optimizer/test_copy.py b/tests/experimental/optimizer/test_copy.py index 5de7b42a..ea31aac6 100644 --- a/tests/experimental/optimizer/test_copy.py +++ b/tests/experimental/optimizer/test_copy.py @@ -88,7 +88,7 @@ def tensordot( cluster_state = ClusterState(lhs.km.devices()) lhs_ga: GraphArray = GraphArray.from_ba(lhs, cluster_state, copy_on_op=copy_on_op) rhs_ga: GraphArray = GraphArray.from_ba(rhs, cluster_state, copy_on_op=copy_on_op) - return lhs_ga.tensordot(rhs_ga, axes=axes) + return GraphArray.tensordot(lhs_ga, rhs_ga, axes=axes) def optimized_tensordot( diff --git a/tests/experimental/optimizer/test_tensordot.py b/tests/experimental/optimizer/test_tensordot.py index 7657b898..c06ae1ae 100644 --- a/tests/experimental/optimizer/test_tensordot.py +++ b/tests/experimental/optimizer/test_tensordot.py @@ -41,7 +41,7 @@ def optimized_tensordot( cluster_state: ClusterState = ClusterState(lhs.km.devices()) lhs_ga: GraphArray = GraphArray.from_ba(lhs, cluster_state, copy_on_op=copy_on_op) rhs_ga: GraphArray = GraphArray.from_ba(rhs, cluster_state, copy_on_op=copy_on_op) - tensordot_ga = lhs_ga.tensordot(rhs_ga, axes=axes) + tensordot_ga = GraphArray.tensordot(lhs_ga, rhs_ga, axes=axes) global random_state print("*" * 50) print("op grid shape", tensordot_ga.grid.grid_shape) @@ -119,7 +119,7 @@ def test_load_sqr(app_inst_mock_big): cluster_state: ClusterState = ClusterState(app.km.devices()) lhs_ga: GraphArray = GraphArray.from_ba(lhs, cluster_state) rhs_ga: GraphArray = GraphArray.from_ba(rhs, cluster_state) - tensordot_ga = lhs_ga.tensordot(rhs_ga, axes=axes) + tensordot_ga = GraphArray.tensordot(lhs_ga, rhs_ga, axes=axes) mem_diff = max(cluster_state.resources[0]) - min(cluster_state.resources[0]) net_in_diff = max(cluster_state.resources[1]) - min(cluster_state.resources[1]) @@ -163,7 +163,7 @@ def test_load_single_block_rhs(app_inst_mock_big): cluster_state: ClusterState = ClusterState(app.km.devices()) lhs_ga: GraphArray = GraphArray.from_ba(lhs, cluster_state) rhs_ga: GraphArray = GraphArray.from_ba(rhs, cluster_state) - tensordot_ga = lhs_ga.tensordot(rhs_ga, axes=axes) + tensordot_ga = GraphArray.tensordot(lhs_ga, rhs_ga, axes=axes) print("memory", cluster_state.resources[0]) print("net_in", cluster_state.resources[1]) From 359e3958c3a25346b2373f95f420c4d81d7b4e40 Mon Sep 17 00:00:00 2001 From: Daniel Zou Date: Wed, 10 Aug 2022 23:12:17 -0700 Subject: [PATCH 2/5] Relocated selections and reshape, but not working for SBA --- nums/core/array/base.py | 616 ++++++++++++++++++- nums/core/array/blockarray.py | 583 +----------------- nums/core/array/sparse.py | 23 + nums/core/array/view.py | 18 +- nums/core/kernel/kernel_interface.py | 3 + nums/core/kernel/numpy_kernel.py | 10 + nums/experimental/optimizer/fusion.py | 1 - nums/experimental/optimizer/graph.py | 38 +- nums/experimental/optimizer/grapharray.py | 6 +- nums/experimental/optimizer/reduction_ops.py | 14 +- tests/core/array/test_sparse.py | 11 + 11 files changed, 701 insertions(+), 622 deletions(-) diff --git a/nums/core/array/base.py b/nums/core/array/base.py index 04b3a534..648a4fc5 100644 --- a/nums/core/array/base.py +++ b/nums/core/array/base.py @@ -14,7 +14,9 @@ # pylint: disable = protected-access +# pylint: disable=too-many-lines +import warnings import numpy as np @@ -466,6 +468,19 @@ def is_dense(self): def __repr__(self): return "BlockArray(" + str(self.blocks) + ")" + def __getattr__(self, item): + if item == "__array_priority__" or item == "__array_struct__": + # This is triggered by a numpy array on the LHS. + raise TypeError( + "Unexpected conversion attempt from BlockArrayBase to ndarray." + ) + elif item == "ndim": + return len(self.shape) + elif item == "T": + return self.transpose() + else: + raise NotImplementedError(item) + def get(self) -> np.ndarray: result: np.ndarray = np.zeros(shape=self.grid.shape, dtype=self.grid.dtype) block_shape: np.ndarray = np.array(self.grid.block_shape, dtype=np.int64) @@ -521,6 +536,17 @@ def flattened_oids(self): oids.append(oid) return oids + @classmethod + def empty(cls, shape, block_shape, dtype, km: KernelManager): + raise NotImplementedError() + + @staticmethod + def to_block_array(obj, km: KernelManager, block_shape=None): + raise NotImplementedError() + + def transpose(self, defer=False, redistribute=False): + raise NotImplementedError() + def broadcast_to(self, shape): b = array_utils.broadcast(self.shape, shape) result_block_shape = array_utils.broadcast_block_shape( @@ -544,6 +570,391 @@ def broadcast_to(self, shape): result.blocks = broadcast return result + def reshape(self, *shape, **kwargs): + block_shape = kwargs.get("block_shape", None) + if array_utils.is_int(shape): + shape = (shape,) + elif len(shape) == 0: + shape = self.shape + elif isinstance(shape[0], (tuple, list)): + assert len(shape) == 1 + shape = shape[0] + else: + assert all(np.issubdtype(type(n), int) for n in shape) + shape = Reshape.compute_shape(self.shape, shape) + if block_shape is None: + if shape == self.shape: + # This is a noop. + block_shape = self.block_shape + else: + block_shape = self.km.get_block_shape(shape, self.dtype) + return Reshape()(self, shape, block_shape) + + def _preprocess_subscript(self, item): + if not isinstance(item, tuple): + ss = (item,) + else: + ss = item + # We need to fetch any block arrays. + tmp = [] + for entry in ss: + if isinstance(entry, BlockArrayBase): + val = entry.get() + else: + val = entry + if isinstance(val, list): + val = np.array(val) + if isinstance(val, np.ndarray): + # If this is a Boolean mask, convert it to integers. + if array_utils.is_bool(val.dtype, type_test=True): + val = np.arange(len(val))[val] + if val.shape == (): + val = val.item() + tmp.append(val) + ss = tuple(tmp) + is_handled_advanced = False + array_encountered = False + axis = None + + # Check if this is a supported advanced indexing operation. + for i, entry in enumerate(ss): + if isinstance(entry, slice) and entry.start is None and entry.stop is None: + continue + elif array_utils.is_int(entry) or array_utils.is_uint(entry): + continue + elif array_utils.is_array_like(entry): + if array_encountered: + raise NotImplementedError( + "Advanced indexing is only supported along a single axis." + ) + is_handled_advanced = True + array_encountered = True + axis = i + if not (np.all(0 <= entry) and np.all(entry < self.shape[axis])): + raise IndexError( + "Advanced indexing array along axis %s is out of bounds." % axis + ) + else: + if array_encountered: + raise NotImplementedError( + "Advanced indexing is only supported " + "with full slices and integers along other axes." + ) + is_handled_advanced = False + break + + return ss, is_handled_advanced, axis + + def __getitem__(self, item): + ss, is_handled_advanced, axis = self._preprocess_subscript(item) + + if is_handled_advanced: + # Treat this as a shuffle. + return self._advanced_single_array_select(ss, axis=axis) + + # This is to deal with circular imports. Little overhead since this happens once per call. + # However, would be better to rearrange modules in the future. + from nums.core.array.view import ArrayView + + av: ArrayView = ArrayView.from_block_array(self) + # TODO (hme): We don't have to create, but do so for now until we need to optimize. + return av[ss].create() + + def _advanced_single_array_select(self, ss: tuple, axis: int = 0): + # Create output array along the axis of the selection operation. + # We don't allocate zeros for output array. Instead, we let the update kernel + # create the initial set of zeros to save some memory. + array = ss[axis] + assert len(array.shape) == 1 + + # TODO: We may encounter block shape incompatability due to this. + block_size = self.block_shape[axis] + self.km.update_block_shape_map(array.shape[0], block_size) + + dst_axis = None + shape = [] + block_shape = [] + for i in range(len(self.shape)): + if i == axis: + dst_axis = len(shape) + shape.append(array.shape[0]) + block_shape.append(block_size) + elif i < len(ss): + if isinstance(ss[i], slice): + shape.append(self.shape[i]) + block_shape.append(self.block_shape[i]) + else: + # It's an index. We drop the indices. + continue + else: + shape.append(self.shape[i]) + block_shape.append(self.block_shape[i]) + + dst_arr = type(self)( + ArrayGrid( + shape=tuple(shape), + block_shape=tuple(block_shape), + dtype=self.dtype.__name__, + ), + km=self.km, + ) + + src_arr = self + np_ss = ss + ss = self.km.put( + ss, + syskwargs={ + "grid_entry": (0,), + "grid_shape": (1,), + }, + ) + for src_grid_entry in src_arr.grid.get_entry_iterator(): + src_coord: tuple = src_arr.grid.get_entry_coordinates(src_grid_entry) + src_block: Block = src_arr.blocks[src_grid_entry] + + # Make sure index values in subscript are within bounds of src_arr. + # We also prepare dst_grid_entry here. + dst_grid_entry_list = [] + skip = False + for curr_axis in range(len(np_ss)): + if curr_axis == axis: + dst_grid_entry_list.append(None) + elif isinstance(np_ss[curr_axis], slice): + dst_grid_entry_list.append(src_grid_entry[curr_axis]) + elif not ( + src_coord[curr_axis] + <= np_ss[curr_axis] + < src_coord[curr_axis] + src_block.shape[curr_axis] + ): + skip = True + break + if skip: + continue + for curr_axis in range(len(np_ss), len(src_grid_entry)): + dst_grid_entry_list.append(src_grid_entry[curr_axis]) + + for j in range(dst_arr.grid.grid_shape[dst_axis]): + dst_grid_entry_list[dst_axis] = j + dst_grid_entry = tuple(dst_grid_entry_list) + dst_block: Block = dst_arr.blocks[dst_grid_entry] + dst_coord: tuple = dst_arr.grid.get_entry_coordinates(dst_grid_entry) + + if dst_block.oid is None: + dst_arg = (dst_block.shape, dst_block.dtype) + else: + dst_arg = dst_block.oid + dst_block.oid = self.km.advanced_select_block_along_axis( + dst_arg, + src_block.oid, + ss, + dst_axis, + axis, + dst_coord, + src_coord, + syskwargs={ + "grid_entry": dst_grid_entry, + "grid_shape": dst_arr.grid.grid_shape, + }, + ) + return dst_arr + + def __setitem__(self, key, value): + value: BlockArrayBase = self.to_block_array(value, self.km) + ss, is_handled_advanced, axis = self._preprocess_subscript(key) + if is_handled_advanced: + return self._advanced_single_array_assign(ss, value, axis) + + # This is to deal with circular imports. Little overhead since this happens once per call. + # However, would be better to rearrange modules in the future. + from nums.core.array.view import ArrayView + + av: ArrayView = ArrayView.from_block_array(self) + av[key] = value + + def _advanced_single_array_assign( + self, + ss: tuple, + value, + axis: int, + ): + array = ss[axis] + assert len(array.shape) == 1 + + # The subscript contains a single array. We therefore know one of two things is true: + # 1. value is the same shape as self along axes != axis. + # 2. value is scalar or 1-dimensional. + # We currently don't support the case where value may broadcasted if it has more dims. + # This should be a straight-forward future task. + value: BlockArrayBase = value + mode = None + if len(value.shape) == 0: + # subscripted value per block will broadcast to other dimensions. + mode = "scalar" + elif len(value.shape) == 1: + # assert len(value.shape) == len(ss) + mode = "single-dim" + # Can broadcast if trailing dim matches. + assert len(ss[axis]) == value.shape[0] + + for i in range(len(self.shape)): + if i == axis: + assert len(ss[i]) == value.shape[0] + elif i < axis: + # Nothing to check here. + # These entries are : or integer. + pass + else: + if i < len(ss): + if not isinstance(ss[i], slice): + # ss[i] is an integer. + continue + # If we're here, then the rest of the subscript operator + # will resolve to :, which is not broadcastable. + raise ValueError( + "Cannot broadcast input array " + "from shape %s into shape %s" + % (value.shape, tuple([value.shape[0]] + list(self.shape[i:]))) + ) + elif len(value.shape) == len(self.shape): + mode = "multi-dim" + new_block_shape = [] + for i in range(len(self.shape)): + if i == axis: + new_block_shape.append(value.block_shape[i]) + elif i < len(ss) and ( + array_utils.is_int(ss[i]) or array_utils.is_uint(ss[i]) + ): + # These entries are : or integer. + # assert array_utils.is_int(ss[i]) or array_utils.is_uint(ss[i]) + assert value.shape[i] == 1 + new_block_shape.append(1) + else: + assert value.shape[i] == self.shape[i], "Shape mismatch." + new_block_shape.append(self.block_shape[i]) + new_block_shape = tuple(new_block_shape) + if new_block_shape != value.block_shape: + # TODO: This message occurs on X[idx[:n]] = X[idx[n:]] + 0.5, + # even when n is a multiple of block_shape[0]. + warnings.warn( + ("Assigned value block shape %s " % str(value.block_shape)) + + ( + "does not match block shape %s of assignee. " + % str(new_block_shape) + ) + + "Applying reshape to assigned value." + ) + value = value.reshape(block_shape=new_block_shape) + + # Like select, iterate over destination blocks along the axis being updated. + # e.g. if self is 2-dim and axis=0, then fix the row and iterate over the columns. + # If value has the same shape as self, then for each destination block, + # iterate over the blocks in value along axis. + # e.g. if self is 2-dim and axis=0, then for the given column, iterate over the rows + # of value. + # If value is scalar, then attempt to assign it to every destination block. + # If value is 1-dim, the just iterate over the dim and assign accordingly. + + dst_arr = self + src_arr = value + src_grid_shape = src_arr.grid.grid_shape + np_ss = ss + ss = self.km.put( + ss, + syskwargs={ + "grid_entry": (0,), + "grid_shape": (1,), + }, + ) + for dst_grid_entry in dst_arr.grid.get_entry_iterator(): + dst_block: BlockBase = dst_arr.blocks[dst_grid_entry] + dst_coord: tuple = dst_arr.grid.get_entry_coordinates(dst_grid_entry) + + # Make sure index values in subscript are within bounds of dst_arr. + # We don't need to check src_arr: + # 1) The block shapes of dst_arr and src_arr are the same except along axis + # and indices in ss. We are not concerned with axes the indices in ss correspond to, + # because they are of size 1 in src_arr => we only need to check that indices + # fall within bounds of dst_arr. + # 2) For each dst_arr, we test the values + # to assign to dst_arr by traverse the src_arr along axis. + # Thus, size along all other axes are equal or broadcasted. + skip = False + for curr_axis in range(len(np_ss)): + if curr_axis == axis or isinstance(np_ss[curr_axis], slice): + continue + if not ( + dst_coord[curr_axis] + <= np_ss[curr_axis] + < dst_coord[curr_axis] + dst_block.shape[curr_axis] + ): + skip = True + break + if skip: + continue + + if mode == "scalar": + src_block: BlockBase = src_arr.blocks.item() + src_coord: tuple = src_arr.grid.get_entry_coordinates( + src_block.grid_entry + ) + dst_block.oid = self.km.advanced_assign_block_along_axis( + dst_block.oid, + src_block.oid, + ss, + axis, + dst_coord, + src_coord, + syskwargs={ + "grid_entry": dst_grid_entry, + "grid_shape": dst_arr.grid.grid_shape, + }, + ) + elif mode == "single-dim": + for src_grid_entry in src_arr.grid.get_entry_iterator(): + src_block: BlockBase = src_arr.blocks[src_grid_entry] + src_coord: tuple = src_arr.grid.get_entry_coordinates( + src_grid_entry + ) + dst_block.oid = self.km.advanced_assign_block_along_axis( + dst_block.oid, + src_block.oid, + ss, + axis, + dst_coord, + src_coord, + syskwargs={ + "grid_entry": dst_grid_entry, + "grid_shape": dst_arr.grid.grid_shape, + }, + ) + elif mode == "multi-dim": + for j in range(src_grid_shape[axis]): + # Apply sel from each block along axis of src_arr. + # e.g. for 2 dim array, we fix the column blocks + # given by dst_grid_entry, and iterate over the rows. + src_grid_entry = tuple( + list(dst_grid_entry[:axis]) + + [j] + + list(dst_grid_entry[axis + 1 :]) + ) + src_block: BlockBase = src_arr.blocks[src_grid_entry] + src_coord: tuple = src_arr.grid.get_entry_coordinates( + src_grid_entry + ) + dst_block.oid = self.km.advanced_assign_block_along_axis( + dst_block.oid, + src_block.oid, + ss, + axis, + dst_coord, + src_coord, + syskwargs={ + "grid_entry": dst_grid_entry, + "grid_shape": dst_arr.grid.grid_shape, + }, + ) + return dst_arr + def tree_reduce( self, op_name, blocks_or_oids, result_grid_entry, result_grid_shape ): @@ -787,7 +1198,9 @@ def __rrshift__(self, other): # Linear Algebra ################# - def _compute_tensordot_syskwargs(self, self_block: Block, other_block: Block): + def _compute_tensordot_syskwargs( + self, self_block: BlockBase, other_block: BlockBase + ): # Schedule on larger block. if np.product(self_block.shape) >= np.product(other_block.shape): return self_block.true_grid_entry(), self_block.true_grid_shape() @@ -864,3 +1277,204 @@ def __ne__(self, other): def __rne__(self, other): other = self.check_or_convert_other(other) return other.__inequality__("ne", self) + + +class Reshape: + @staticmethod + def compute_shape(shape, input_shape): + size = np.product(shape) + if -1 in input_shape: + new_shape = [] + other_dim_prod = 1 + negative_one_seen = False + for dim in input_shape: + if dim == -1: + if negative_one_seen: + raise Exception("Only one -1 permitted in reshape.") + negative_one_seen = True + continue + other_dim_prod *= dim + if size % other_dim_prod != 0: + raise Exception("Invalid shape.") + for dim in input_shape: + if dim == -1: + new_shape.append(size // other_dim_prod) + else: + new_shape.append(dim) + else: + new_shape = input_shape + assert size == np.product(new_shape) + return new_shape + + def _group_index_lists_by_block( + self, dst_slice_tuples, src_grid: ArrayGrid, dst_index_list, src_index_list + ): + # TODO(hme): Keep this function here until it's needed for greater support of + # selection/assignment operations. + # Block grid entries needed to write to given dst_slice_selection. + src_blocks = {} + dst_slice_np = np.array(dst_slice_tuples).T + dst_index_arr = np.array(dst_index_list) + src_index_arr = np.array(src_index_list) + # Pick the smallest type to represent indices. + # A set of these indices may be transmitted over the network, + # so we want to pick the smallest encoding possible. + index_types = [ + (2**8, np.uint8), + (2**16, np.uint16), + (2**32, np.uint32), + (2**64, np.uint64), + ] + index_type = None + for bound, curr_index_type in index_types: + if np.all(np.array(src_grid.block_shape) < bound) and np.all( + dst_slice_np[1] < bound + ): + index_type = curr_index_type + break + if index_type is None: + raise Exception("Unable to encode block indices, blocks are too large.") + for grid_entry in src_grid.get_entry_iterator(): + src_slice_np = np.array(src_grid.get_slice_tuples(grid_entry)).T + index_pairs = [] + for i in range(src_index_arr.shape[0]): + src_index = src_index_arr[i] + dst_index = dst_index_arr[i] + if np.all( + (src_slice_np[0] <= src_index) & (src_index < src_slice_np[1]) + ): + index_pair = ( + (dst_index - dst_slice_np[0]).astype(index_type), + (src_index - src_slice_np[0]).astype(index_type), + ) + index_pairs.append(index_pair) + if len(index_pairs) > 0: + src_blocks[grid_entry] = index_pairs + return src_blocks + + def _arbitrary_reshape( + self, arr: BlockArrayBase, shape, block_shape + ) -> BlockArrayBase: + # This is the worst-case scenario. + # Generate index mappings per block, and group source indices to minimize + # RPCs and generation of new objects. + km = arr.km + dst_arr = type(arr).empty( + shape=shape, block_shape=block_shape, dtype=arr.dtype, km=km + ) + for dst_grid_entry in dst_arr.grid.get_entry_iterator(): + dst_block: BlockBase = dst_arr.blocks[dst_grid_entry] + dst_slice_selection = dst_arr.grid.get_slice(dst_grid_entry) + dst_index_list = array_utils.slice_sel_to_index_list(dst_slice_selection) + src_index_list = array_utils.translate_index_list( + dst_index_list, shape, arr.shape + ) + src_blocks = self._group_index_lists_by_block( + dst_arr.grid.get_slice_tuples(dst_grid_entry), + arr.grid, + dst_index_list, + src_index_list, + ) + for src_grid_entry in src_blocks: + src_block: BlockBase = arr.blocks[src_grid_entry] + index_pairs = src_blocks[src_grid_entry] + syskwargs = { + "grid_entry": dst_grid_entry, + "grid_shape": dst_arr.grid.grid_shape, + } + dst_block.oid = km.update_block_by_index( + dst_block.oid, src_block.oid, index_pairs, syskwargs=syskwargs + ) + return dst_arr + + def _block_shape_reshape(self, arr, block_shape): + rarr: BlockArrayBase = type(arr).empty( + arr.shape, block_shape, arr.dtype, arr.km + ) + for grid_entry in rarr.grid.get_entry_iterator(): + grid_entry_slice = rarr.grid.get_slice(grid_entry) + # TODO (hme): This could be less costly. + rarr[grid_entry_slice] = arr[grid_entry_slice] + return rarr + + def _strip_ones(self, shape): + return tuple(filter(lambda x: x != 1, shape)) + + def _check_positions_ones(self, shape, block_shape): + # If a position in the shape is 1, then the corresponding + # position in block_shape should also be 1. + for i in range(len(shape)): + if shape[i] == 1: + if shape[i] != block_shape[i]: + return False + return True + + def _is_simple_reshape(self, arr: BlockArrayBase, shape, block_shape): + # Is the reshape a difference of factors of 1? + # Strip out 1s and compare. + # If a position in the shape is 1, then the corresponding + # position in block_shape should also be 1. + + # If source shape and dest shape are the same or source block_shape and dest block_shape + # are same, this is not a simple reshape. + if shape == arr.shape or block_shape == arr.block_shape: + return False + + # Checks if source shape and dest shape are same & source block_shape and dest + # block_shape are same after stripping ones. + if not ( + self._strip_ones(shape) == self._strip_ones(arr.shape) + and self._strip_ones(block_shape) == self._strip_ones(arr.block_shape) + ): + return False + if not self._check_positions_ones(shape, block_shape): + return False + return True + + def _simple_reshape(self, arr, shape, block_shape): + # Reshape the array of blocks only. + # This is only used when the difference in shape are factors of 1s, + # and the ordering of other factors are maintained. + + # Check assumptions. + assert len(self._strip_ones(arr.shape)) == len(self._strip_ones(shape)) + + # Create new grid, and perform reshape on blocks + # to simplify access to source blocks. + grid = ArrayGrid(shape, block_shape, dtype=arr.dtype.__name__) + src_blocks = arr.blocks.reshape(grid.grid_shape) + if arr.is_dense: + rarr = type(arr)(grid, arr.km) + else: + rarr = type(arr)(grid, arr.km, arr.fill_value) + for grid_entry in grid.get_entry_iterator(): + src_block: BlockBase = src_blocks[grid_entry] + dst_block: BlockBase = rarr.blocks[grid_entry] + syskwargs = {"grid_entry": grid_entry, "grid_shape": grid.grid_shape} + dst_block.oid = arr.km.reshape( + src_block.oid, dst_block.shape, syskwargs=syskwargs + ) + return rarr + + def _validate(self, arr, shape, block_shape): + assert -1 not in shape + assert -1 not in block_shape + assert len(shape) == len(block_shape) + assert np.product(arr.shape) == np.product(shape) + + def __call__(self, arr: BlockArrayBase, shape, block_shape): + self._validate(arr, shape, block_shape) + if arr.shape == shape and arr.block_shape == block_shape: + return arr + elif self._is_simple_reshape(arr, shape, block_shape): + return self._simple_reshape(arr, shape, block_shape) + elif arr.shape == shape and arr.block_shape != block_shape: + return self._block_shape_reshape(arr, block_shape) + elif arr.shape != shape and arr.block_shape == block_shape: + # Just do full reshape for this case as well. + # Though there may be a better solution, we generally expect + # the block shape to change with array shape. + return self._arbitrary_reshape(arr, shape, block_shape) + else: + assert arr.shape != shape and arr.block_shape != block_shape + return self._arbitrary_reshape(arr, shape, block_shape) diff --git a/nums/core/array/blockarray.py b/nums/core/array/blockarray.py index 9fccb971..041747b2 100644 --- a/nums/core/array/blockarray.py +++ b/nums/core/array/blockarray.py @@ -19,7 +19,7 @@ import numpy as np from nums.core.array import utils as array_utils -from nums.core.array.base import BlockArrayBase, Block +from nums.core.array.base import Block, BlockArrayBase from nums.core.array.view import ArrayView from nums.core.grid.grid import ArrayGrid from nums.core.kernel.kernel_manager import KernelManager @@ -161,26 +161,6 @@ def to_single_block(self, replicate=False): ) return res - def reshape(self, *shape, **kwargs): - block_shape = kwargs.get("block_shape", None) - if array_utils.is_int(shape): - shape = (shape,) - elif len(shape) == 0: - shape = self.shape - elif isinstance(shape[0], (tuple, list)): - assert len(shape) == 1 - shape = shape[0] - else: - assert all(np.issubdtype(type(n), int) for n in shape) - shape = Reshape.compute_shape(self.shape, shape) - if block_shape is None: - if shape == self.shape: - # This is a noop. - block_shape = self.block_shape - else: - block_shape = self.km.get_block_shape(shape, self.dtype) - return Reshape()(self, shape, block_shape) - def expand_dims(self, axis): """ This function refers to the numpy implementation of expand_dims. @@ -257,373 +237,6 @@ def transpose(self, defer=False, redistribute=False): ) return rarrT - def __getattr__(self, item): - if item == "__array_priority__" or item == "__array_struct__": - # This is triggered by a numpy array on the LHS. - raise TypeError("Unexpected conversion attempt from BlockArray to ndarray.") - elif item == "ndim": - return len(self.shape) - elif item == "T": - return self.transpose() - else: - raise NotImplementedError(item) - - def _preprocess_subscript(self, item): - if not isinstance(item, tuple): - ss = (item,) - else: - ss = item - # We need to fetch any block arrays. - tmp = [] - for entry in ss: - if isinstance(entry, BlockArray): - val = entry.get() - else: - val = entry - if isinstance(val, list): - val = np.array(val) - if isinstance(val, np.ndarray): - # If this is a Boolean mask, convert it to integers. - if array_utils.is_bool(val.dtype, type_test=True): - val = np.arange(len(val))[val] - if val.shape == (): - val = val.item() - tmp.append(val) - ss = tuple(tmp) - is_handled_advanced = False - array_encountered = False - axis = None - - # Check if this is a supported advanced indexing operation. - for i, entry in enumerate(ss): - if isinstance(entry, slice) and entry.start is None and entry.stop is None: - continue - elif array_utils.is_int(entry) or array_utils.is_uint(entry): - continue - elif array_utils.is_array_like(entry): - if array_encountered: - raise NotImplementedError( - "Advanced indexing is only supported along a single axis." - ) - is_handled_advanced = True - array_encountered = True - axis = i - if not (np.all(0 <= entry) and np.all(entry < self.shape[axis])): - raise IndexError( - "Advanced indexing array along axis %s is out of bounds." % axis - ) - else: - if array_encountered: - raise NotImplementedError( - "Advanced indexing is only supported " - "with full slices and integers along other axes." - ) - is_handled_advanced = False - break - - return ss, is_handled_advanced, axis - - def __getitem__(self, item): - ss, is_handled_advanced, axis = self._preprocess_subscript(item) - - if is_handled_advanced: - # Treat this as a shuffle. - return self._advanced_single_array_select(ss, axis=axis) - - av: ArrayView = ArrayView.from_block_array(self) - # TODO (hme): We don't have to create, but do so for now until we need to optimize. - return av[ss].create() - - def _advanced_single_array_select(self, ss: tuple, axis: int = 0): - # Create output array along the axis of the selection operation. - # We don't allocate zeros for output array. Instead, we let the update kernel - # create the initial set of zeros to save some memory. - array = ss[axis] - assert len(array.shape) == 1 - - # TODO: We may encounter block shape incompatability due to this. - block_size = self.block_shape[axis] - self.km.update_block_shape_map(array.shape[0], block_size) - - dst_axis = None - shape = [] - block_shape = [] - for i in range(len(self.shape)): - if i == axis: - dst_axis = len(shape) - shape.append(array.shape[0]) - block_shape.append(block_size) - elif i < len(ss): - if isinstance(ss[i], slice): - shape.append(self.shape[i]) - block_shape.append(self.block_shape[i]) - else: - # It's an index. We drop the indices. - continue - else: - shape.append(self.shape[i]) - block_shape.append(self.block_shape[i]) - - dst_arr = BlockArray( - ArrayGrid( - shape=tuple(shape), - block_shape=tuple(block_shape), - dtype=self.dtype.__name__, - ), - km=self.km, - ) - - src_arr = self - np_ss = ss - ss = self.km.put( - ss, - syskwargs={ - "grid_entry": (0,), - "grid_shape": (1,), - }, - ) - for src_grid_entry in src_arr.grid.get_entry_iterator(): - src_coord: tuple = src_arr.grid.get_entry_coordinates(src_grid_entry) - src_block: Block = src_arr.blocks[src_grid_entry] - - # Make sure index values in subscript are within bounds of src_arr. - # We also prepare dst_grid_entry here. - dst_grid_entry_list = [] - skip = False - for curr_axis in range(len(np_ss)): - if curr_axis == axis: - dst_grid_entry_list.append(None) - elif isinstance(np_ss[curr_axis], slice): - dst_grid_entry_list.append(src_grid_entry[curr_axis]) - elif not ( - src_coord[curr_axis] - <= np_ss[curr_axis] - < src_coord[curr_axis] + src_block.shape[curr_axis] - ): - skip = True - break - if skip: - continue - for curr_axis in range(len(np_ss), len(src_grid_entry)): - dst_grid_entry_list.append(src_grid_entry[curr_axis]) - - for j in range(dst_arr.grid.grid_shape[dst_axis]): - dst_grid_entry_list[dst_axis] = j - dst_grid_entry = tuple(dst_grid_entry_list) - dst_block: Block = dst_arr.blocks[dst_grid_entry] - dst_coord: tuple = dst_arr.grid.get_entry_coordinates(dst_grid_entry) - - if dst_block.oid is None: - dst_arg = (dst_block.shape, dst_block.dtype) - else: - dst_arg = dst_block.oid - dst_block.oid = self.km.advanced_select_block_along_axis( - dst_arg, - src_block.oid, - ss, - dst_axis, - axis, - dst_coord, - src_coord, - syskwargs={ - "grid_entry": dst_grid_entry, - "grid_shape": dst_arr.grid.grid_shape, - }, - ) - return dst_arr - - def __setitem__(self, key, value): - value: BlockArray = BlockArray.to_block_array(value, self.km) - ss, is_handled_advanced, axis = self._preprocess_subscript(key) - if is_handled_advanced: - return self._advanced_single_array_assign(ss, value, axis) - av: ArrayView = ArrayView.from_block_array(self) - av[key] = value - - def _advanced_single_array_assign( - self, - ss: tuple, - value, - axis: int, - ): - array = ss[axis] - assert len(array.shape) == 1 - - # The subscript contains a single array. We therefore know one of two things is true: - # 1. value is the same shape as self along axes != axis. - # 2. value is scalar or 1-dimensional. - # We currently don't support the case where value may broadcasted if it has more dims. - # This should be a straight-forward future task. - value: BlockArray = value - mode = None - if len(value.shape) == 0: - # subscripted value per block will broadcast to other dimensions. - mode = "scalar" - elif len(value.shape) == 1: - # assert len(value.shape) == len(ss) - mode = "single-dim" - # Can broadcast if trailing dim matches. - assert len(ss[axis]) == value.shape[0] - - for i in range(len(self.shape)): - if i == axis: - assert len(ss[i]) == value.shape[0] - elif i < axis: - # Nothing to check here. - # These entries are : or integer. - pass - else: - if i < len(ss): - if not isinstance(ss[i], slice): - # ss[i] is an integer. - continue - # If we're here, then the rest of the subscript operator - # will resolve to :, which is not broadcastable. - raise ValueError( - "Cannot broadcast input array " - "from shape %s into shape %s" - % (value.shape, tuple([value.shape[0]] + list(self.shape[i:]))) - ) - elif len(value.shape) == len(self.shape): - mode = "multi-dim" - new_block_shape = [] - for i in range(len(self.shape)): - if i == axis: - new_block_shape.append(value.block_shape[i]) - elif i < len(ss) and ( - array_utils.is_int(ss[i]) or array_utils.is_uint(ss[i]) - ): - # These entries are : or integer. - # assert array_utils.is_int(ss[i]) or array_utils.is_uint(ss[i]) - assert value.shape[i] == 1 - new_block_shape.append(1) - else: - assert value.shape[i] == self.shape[i], "Shape mismatch." - new_block_shape.append(self.block_shape[i]) - new_block_shape = tuple(new_block_shape) - if new_block_shape != value.block_shape: - # TODO: This message occurs on X[idx[:n]] = X[idx[n:]] + 0.5, - # even when n is a multiple of block_shape[0]. - warnings.warn( - ("Assigned value block shape %s " % str(value.block_shape)) - + ( - "does not match block shape %s of assignee. " - % str(new_block_shape) - ) - + "Applying reshape to assigned value." - ) - value = value.reshape(block_shape=new_block_shape) - - # Like select, iterate over destination blocks along the axis being updated. - # e.g. if self is 2-dim and axis=0, then fix the row and iterate over the columns. - # If value has the same shape as self, then for each destination block, - # iterate over the blocks in value along axis. - # e.g. if self is 2-dim and axis=0, then for the given column, iterate over the rows - # of value. - # If value is scalar, then attempt to assign it to every destination block. - # If value is 1-dim, the just iterate over the dim and assign accordingly. - - dst_arr = self - src_arr = value - src_grid_shape = src_arr.grid.grid_shape - np_ss = ss - ss = self.km.put( - ss, - syskwargs={ - "grid_entry": (0,), - "grid_shape": (1,), - }, - ) - for dst_grid_entry in dst_arr.grid.get_entry_iterator(): - dst_block: Block = dst_arr.blocks[dst_grid_entry] - dst_coord: tuple = dst_arr.grid.get_entry_coordinates(dst_grid_entry) - - # Make sure index values in subscript are within bounds of dst_arr. - # We don't need to check src_arr: - # 1) The block shapes of dst_arr and src_arr are the same except along axis - # and indices in ss. We are not concerned with axes the indices in ss correspond to, - # because they are of size 1 in src_arr => we only need to check that indices - # fall within bounds of dst_arr. - # 2) For each dst_arr, we test the values - # to assign to dst_arr by traverse the src_arr along axis. - # Thus, size along all other axes are equal or broadcasted. - skip = False - for curr_axis in range(len(np_ss)): - if curr_axis == axis or isinstance(np_ss[curr_axis], slice): - continue - if not ( - dst_coord[curr_axis] - <= np_ss[curr_axis] - < dst_coord[curr_axis] + dst_block.shape[curr_axis] - ): - skip = True - break - if skip: - continue - - if mode == "scalar": - src_block: Block = src_arr.blocks.item() - src_coord: tuple = src_arr.grid.get_entry_coordinates( - src_block.grid_entry - ) - dst_block.oid = self.km.advanced_assign_block_along_axis( - dst_block.oid, - src_block.oid, - ss, - axis, - dst_coord, - src_coord, - syskwargs={ - "grid_entry": dst_grid_entry, - "grid_shape": dst_arr.grid.grid_shape, - }, - ) - elif mode == "single-dim": - for src_grid_entry in src_arr.grid.get_entry_iterator(): - src_block: Block = src_arr.blocks[src_grid_entry] - src_coord: tuple = src_arr.grid.get_entry_coordinates( - src_grid_entry - ) - dst_block.oid = self.km.advanced_assign_block_along_axis( - dst_block.oid, - src_block.oid, - ss, - axis, - dst_coord, - src_coord, - syskwargs={ - "grid_entry": dst_grid_entry, - "grid_shape": dst_arr.grid.grid_shape, - }, - ) - elif mode == "multi-dim": - for j in range(src_grid_shape[axis]): - # Apply sel from each block along axis of src_arr. - # e.g. for 2 dim array, we fix the column blocks - # given by dst_grid_entry, and iterate over the rows. - src_grid_entry = tuple( - list(dst_grid_entry[:axis]) - + [j] - + list(dst_grid_entry[axis + 1 :]) - ) - src_block: Block = src_arr.blocks[src_grid_entry] - src_coord: tuple = src_arr.grid.get_entry_coordinates( - src_grid_entry - ) - dst_block.oid = self.km.advanced_assign_block_along_axis( - dst_block.oid, - src_block.oid, - ss, - axis, - dst_coord, - src_coord, - syskwargs={ - "grid_entry": dst_grid_entry, - "grid_shape": dst_arr.grid.grid_shape, - }, - ) - return dst_arr - @staticmethod def to_block_array(obj, km: KernelManager, block_shape=None): if isinstance(obj, BlockArray): @@ -898,197 +511,3 @@ def __inequality__(self, op, other): op, other_block, args={} ) return result - - -class Reshape: - @staticmethod - def compute_shape(shape, input_shape): - size = np.product(shape) - if -1 in input_shape: - new_shape = [] - other_dim_prod = 1 - negative_one_seen = False - for dim in input_shape: - if dim == -1: - if negative_one_seen: - raise Exception("Only one -1 permitted in reshape.") - negative_one_seen = True - continue - other_dim_prod *= dim - if size % other_dim_prod != 0: - raise Exception("Invalid shape.") - for dim in input_shape: - if dim == -1: - new_shape.append(size // other_dim_prod) - else: - new_shape.append(dim) - else: - new_shape = input_shape - assert size == np.product(new_shape) - return new_shape - - def _group_index_lists_by_block( - self, dst_slice_tuples, src_grid: ArrayGrid, dst_index_list, src_index_list - ): - # TODO(hme): Keep this function here until it's needed for greater support of - # selection/assignment operations. - # Block grid entries needed to write to given dst_slice_selection. - src_blocks = {} - dst_slice_np = np.array(dst_slice_tuples).T - dst_index_arr = np.array(dst_index_list) - src_index_arr = np.array(src_index_list) - # Pick the smallest type to represent indices. - # A set of these indices may be transmitted over the network, - # so we want to pick the smallest encoding possible. - index_types = [ - (2**8, np.uint8), - (2**16, np.uint16), - (2**32, np.uint32), - (2**64, np.uint64), - ] - index_type = None - for bound, curr_index_type in index_types: - if np.all(np.array(src_grid.block_shape) < bound) and np.all( - dst_slice_np[1] < bound - ): - index_type = curr_index_type - break - if index_type is None: - raise Exception("Unable to encode block indices, blocks are too large.") - for grid_entry in src_grid.get_entry_iterator(): - src_slice_np = np.array(src_grid.get_slice_tuples(grid_entry)).T - index_pairs = [] - for i in range(src_index_arr.shape[0]): - src_index = src_index_arr[i] - dst_index = dst_index_arr[i] - if np.all( - (src_slice_np[0] <= src_index) & (src_index < src_slice_np[1]) - ): - index_pair = ( - (dst_index - dst_slice_np[0]).astype(index_type), - (src_index - src_slice_np[0]).astype(index_type), - ) - index_pairs.append(index_pair) - if len(index_pairs) > 0: - src_blocks[grid_entry] = index_pairs - return src_blocks - - def _arbitrary_reshape(self, arr: BlockArray, shape, block_shape) -> BlockArray: - # This is the worst-case scenario. - # Generate index mappings per block, and group source indices to minimize - # RPCs and generation of new objects. - km = arr.km - dst_arr = BlockArray.empty( - shape=shape, block_shape=block_shape, dtype=arr.dtype, km=km - ) - for dst_grid_entry in dst_arr.grid.get_entry_iterator(): - dst_block: Block = dst_arr.blocks[dst_grid_entry] - dst_slice_selection = dst_arr.grid.get_slice(dst_grid_entry) - dst_index_list = array_utils.slice_sel_to_index_list(dst_slice_selection) - src_index_list = array_utils.translate_index_list( - dst_index_list, shape, arr.shape - ) - src_blocks = self._group_index_lists_by_block( - dst_arr.grid.get_slice_tuples(dst_grid_entry), - arr.grid, - dst_index_list, - src_index_list, - ) - for src_grid_entry in src_blocks: - src_block: Block = arr.blocks[src_grid_entry] - index_pairs = src_blocks[src_grid_entry] - syskwargs = { - "grid_entry": dst_grid_entry, - "grid_shape": dst_arr.grid.grid_shape, - } - dst_block.oid = km.update_block_by_index( - dst_block.oid, src_block.oid, index_pairs, syskwargs=syskwargs - ) - return dst_arr - - def _block_shape_reshape(self, arr, block_shape): - rarr: BlockArray = BlockArray.empty(arr.shape, block_shape, arr.dtype, arr.km) - for grid_entry in rarr.grid.get_entry_iterator(): - grid_entry_slice = rarr.grid.get_slice(grid_entry) - # TODO (hme): This could be less costly. - rarr[grid_entry_slice] = arr[grid_entry_slice] - return rarr - - def _strip_ones(self, shape): - return tuple(filter(lambda x: x != 1, shape)) - - def _check_positions_ones(self, shape, block_shape): - # If a position in the shape is 1, then the corresponding - # position in block_shape should also be 1. - for i in range(len(shape)): - if shape[i] == 1: - if shape[i] != block_shape[i]: - return False - return True - - def _is_simple_reshape(self, arr: BlockArray, shape, block_shape): - # Is the reshape a difference of factors of 1? - # Strip out 1s and compare. - # If a position in the shape is 1, then the corresponding - # position in block_shape should also be 1. - - # If source shape and dest shape are the same or source block_shape and dest block_shape - # are same, this is not a simple reshape. - if shape == arr.shape or block_shape == arr.block_shape: - return False - - # Checks if source shape and dest shape are same & source block_shape and dest - # block_shape are same after stripping ones. - if not ( - self._strip_ones(shape) == self._strip_ones(arr.shape) - and self._strip_ones(block_shape) == self._strip_ones(arr.block_shape) - ): - return False - if not self._check_positions_ones(shape, block_shape): - return False - return True - - def _simple_reshape(self, arr, shape, block_shape): - # Reshape the array of blocks only. - # This is only used when the difference in shape are factors of 1s, - # and the ordering of other factors are maintained. - - # Check assumptions. - assert len(self._strip_ones(arr.shape)) == len(self._strip_ones(shape)) - - # Create new grid, and perform reshape on blocks - # to simplify access to source blocks. - grid = ArrayGrid(shape, block_shape, dtype=arr.dtype.__name__) - src_blocks = arr.blocks.reshape(grid.grid_shape) - rarr = BlockArray(grid, arr.km) - for grid_entry in grid.get_entry_iterator(): - src_block: Block = src_blocks[grid_entry] - dst_block: Block = rarr.blocks[grid_entry] - syskwargs = {"grid_entry": grid_entry, "grid_shape": grid.grid_shape} - dst_block.oid = arr.km.reshape( - src_block.oid, dst_block.shape, syskwargs=syskwargs - ) - return rarr - - def _validate(self, arr, shape, block_shape): - assert -1 not in shape - assert -1 not in block_shape - assert len(shape) == len(block_shape) - assert np.product(arr.shape) == np.product(shape) - - def __call__(self, arr: BlockArray, shape, block_shape): - self._validate(arr, shape, block_shape) - if arr.shape == shape and arr.block_shape == block_shape: - return arr - elif self._is_simple_reshape(arr, shape, block_shape): - return self._simple_reshape(arr, shape, block_shape) - elif arr.shape == shape and arr.block_shape != block_shape: - return self._block_shape_reshape(arr, block_shape) - elif arr.shape != shape and arr.block_shape == block_shape: - # Just do full reshape for this case as well. - # Though there may be a better solution, we generally expect - # the block shape to change with array shape. - return self._arbitrary_reshape(arr, shape, block_shape) - else: - assert arr.shape != shape and arr.block_shape != block_shape - return self._arbitrary_reshape(arr, shape, block_shape) diff --git a/nums/core/array/sparse.py b/nums/core/array/sparse.py index c3e99c6c..0a51947e 100644 --- a/nums/core/array/sparse.py +++ b/nums/core/array/sparse.py @@ -268,6 +268,28 @@ def _get_nbytes(self): def __repr__(self): return f"SparseBlockArray({self.blocks})" + @classmethod + def empty(cls, shape, block_shape, dtype, km: KernelManager): + return SparseBlockArray.create( + "empty", shape, block_shape, dtype, km, fill_value=0 + ) + + @classmethod + def create( + cls, create_op_name, shape, block_shape, dtype, km: KernelManager, fill_value=0 + ): + grid = ArrayGrid(shape=shape, block_shape=block_shape, dtype=dtype.__name__) + grid_meta = grid.to_meta() + arr = SparseBlockArray(grid, km, fill_value) + for grid_entry in grid.get_entry_iterator(): + arr.blocks[grid_entry].oid = km.new_sparse_block( + create_op_name, + grid_entry, + grid_meta, + syskwargs={"grid_entry": grid_entry, "grid_shape": grid.grid_shape}, + ) + return arr + @classmethod def from_np(cls, arr, block_shape, copy, km, fill_value=0): dtype_str = str(arr.dtype) @@ -423,6 +445,7 @@ def to_block_array(obj, km: KernelManager, block_shape=None): raise Exception("Unsupported type %s" % type(obj)) if block_shape is None: block_shape = km.get_block_shape(np_array.shape, np_array.dtype) + # Assume object is dense. return BlockArray.from_np(np_array, block_shape, False, km) def check_or_convert_other(self, other, compute_block_shape=False): diff --git a/nums/core/array/view.py b/nums/core/array/view.py index f2e20ccb..88e79267 100644 --- a/nums/core/array/view.py +++ b/nums/core/array/view.py @@ -19,7 +19,7 @@ from nums.core.array import selection from nums.core.array import utils as array_utils -from nums.core.array.base import Block, BlockArrayBase +from nums.core.array.base import BlockBase, BlockArrayBase from nums.core.array.selection import BasicSelection from nums.core.kernel.kernel_manager import KernelManager from nums.core.grid.grid import ArrayGrid @@ -175,7 +175,7 @@ def create_basic_single_step(self, concrete_cls) -> BlockArrayBase: ] if src_dst_intersection_block.is_empty(): continue - src_block: Block = self._source.blocks[src_grid_entry] + src_block: BlockBase = self._source.blocks[src_grid_entry] src_oids.append(src_block.oid) src_sel_block: BasicSelection = src_sel_arr[src_grid_entry] src_dep_sel_loc = src_dst_intersection_block - src_sel_block.position() @@ -184,7 +184,7 @@ def create_basic_single_step(self, concrete_cls) -> BlockArrayBase: src_dst_intersection_block - dst_sel_offset_block.position() ) dst_params.append((dst_block_sel_loc.selector(), False)) - dst_block: Block = dst_ba.blocks.reshape(dst_grid_bc.grid_shape)[ + dst_block: BlockBase = dst_ba.blocks.reshape(dst_grid_bc.grid_shape)[ dst_grid_entry_bc ] dst_block.oid = km.create_block( @@ -300,8 +300,8 @@ def assign_references(self, dst_sel: BasicSelection, value): ) # This is a reference assignment, and the grid properties between the # two blocks may differ, so retain those properties in the copy. - dst_block: Block = self._source.blocks[dst_grid_entry] - src_block_copy: Block = value._source.blocks[src_grid_entry].copy() + dst_block: BlockBase = self._source.blocks[dst_grid_entry] + src_block_copy: BlockBase = value._source.blocks[src_grid_entry].copy() src_block_copy.grid_entry = dst_block.grid_entry src_block_copy.grid_shape = dst_block.grid_shape self._source.blocks[dst_grid_entry] = src_block_copy @@ -331,8 +331,8 @@ def assign_references(self, dst_sel: BasicSelection, value): ) # This is a reference assignment, and the grid properties between the # two blocks may differ, so retain those properties in the copy. - dst_block: Block = self._source.blocks[dst_grid_entry] - src_block_copy: Block = src_ba.blocks[src_grid_entry].copy() + dst_block: BlockBase = self._source.blocks[dst_grid_entry] + src_block_copy: BlockBase = src_ba.blocks[src_grid_entry].copy() src_block_copy.grid_entry = dst_block.grid_entry src_block_copy.grid_shape = dst_block.grid_shape self._source.blocks[dst_grid_entry] = src_block_copy @@ -393,7 +393,7 @@ def basic_assign_single_step(self, dst_sel: BasicSelection, value): src_oids = [] src_params = [] dst_params = [] - dst_block: Block = dst_ba.blocks[dst_grid_entry] + dst_block: BlockBase = dst_ba.blocks[dst_grid_entry] for src_index, src_grid_entry_bc in enumerate( src_inflated_grid.get_entry_iterator() ): @@ -404,7 +404,7 @@ def basic_assign_single_step(self, dst_sel: BasicSelection, value): continue src_grid_entry = src_grid_entry_iterator[src_index] - src_block: Block = src_ba_bc.blocks[src_grid_entry] + src_block: BlockBase = src_ba_bc.blocks[src_grid_entry] src_oids.append(src_block.oid) src_sel_block_offset: BasicSelection = src_sel_offset[src_grid_entry_bc] diff --git a/nums/core/kernel/kernel_interface.py b/nums/core/kernel/kernel_interface.py index 79c2be92..76631799 100644 --- a/nums/core/kernel/kernel_interface.py +++ b/nums/core/kernel/kernel_interface.py @@ -179,6 +179,9 @@ def sparse_nnz(self, arr, syskwargs: Dict): def sparse_nbytes(self, arr, syskwargs: Dict): raise NotImplementedError() + def new_sparse_block(self, op_name, grid_entry, grid_meta, syskwargs: Dict): + raise NotImplementedError() + def sparse_random_block( self, rng_params, diff --git a/nums/core/kernel/numpy_kernel.py b/nums/core/kernel/numpy_kernel.py index ef671a5e..f5fee593 100644 --- a/nums/core/kernel/numpy_kernel.py +++ b/nums/core/kernel/numpy_kernel.py @@ -540,6 +540,16 @@ def sparse_nnz(self, arr): def sparse_nbytes(self, arr): return arr.nbytes + def new_sparse_block(self, op_name, grid_entry, grid_meta): + op_func = sparse.__getattribute__(op_name) + grid = ArrayGrid.from_meta(grid_meta) + block_shape = grid.get_block_shape(grid_entry) + if op_name == "eye": + assert np.all(np.diff(grid_entry) == 0) + return op_func(*block_shape, dtype=grid.dtype) + else: + return op_func(block_shape, dtype=grid.dtype) + def sparse_random_block( self, rng_params, diff --git a/nums/experimental/optimizer/fusion.py b/nums/experimental/optimizer/fusion.py index e20b04e7..1729f4d7 100644 --- a/nums/experimental/optimizer/fusion.py +++ b/nums/experimental/optimizer/fusion.py @@ -5,7 +5,6 @@ import numpy as np from nums.core.array import utils as array_utils -from nums.core.array.base import Block from nums.core.grid.grid import Device from nums.experimental.optimizer.clusterstate import ClusterState diff --git a/nums/experimental/optimizer/graph.py b/nums/experimental/optimizer/graph.py index 5c505340..361e740f 100644 --- a/nums/experimental/optimizer/graph.py +++ b/nums/experimental/optimizer/graph.py @@ -21,7 +21,7 @@ from nums.core.settings import sync_nnz from nums.core.array import utils as array_utils -from nums.core.array.base import Block +from nums.core.array.base import BlockBase, Block from nums.core.grid.grid import Device from nums.core.kernel.kernel_manager import KernelManager from nums.experimental.optimizer.clusterstate import ClusterState @@ -313,7 +313,7 @@ def execute_on(self, device: Device, leaf_ids=None) -> Leaf: assert isinstance(self.child, Leaf) result = self._collapse(device) new_leaf: Leaf = result[0] - new_block: Block = result[1] + new_block: BlockBase = result[1] self.cluster_state.commit_uop(self._mem_cost(), self.child.block.id, device) # self.cluster_state.add_block(new_block.id, new_block.size(), [device]) self.cluster_state.add_block( @@ -330,12 +330,12 @@ def execute_on(self, device: Device, leaf_ids=None) -> Leaf: def _collapse(self, device: Device): assert isinstance(self.child, Leaf) - block: Block = self.child.block + block: BlockBase = self.child.block op_name, args = self.op_name, {} if op_name == "transpose": - block: Block = block.transpose(defer=True) + block: BlockBase = block.transpose(defer=True) else: - block: Block = block.ufunc(op_name, device=device) + block: BlockBase = block.ufunc(op_name, device=device) leaf: Leaf = Leaf(self.cluster_state) leaf.block = block leaf.tree_node_size = self.child.tree_node_size.uop(op_name) @@ -344,7 +344,7 @@ def _collapse(self, device: Device): def _mem_cost(self): assert isinstance(self.child, Leaf) - block: Block = self.child.block + block: BlockBase = self.child.block if block.is_dense: return np.product(block.shape) if sync_nnz > 1: @@ -434,10 +434,10 @@ def copy(self, cluster_state, parent=None, new_ids=False): def _collapse(self, device: Device): assert isinstance(self.child, Leaf) - child_block: Block = self.child.block + child_block: BlockBase = self.child.block op_name, args = self.op_name, {} - block = Block( + block: BlockBase = Block( grid_entry=self.grid_entry(), grid_shape=self.grid_shape(), shape=self.shape(), @@ -467,7 +467,7 @@ def _collapse(self, device: Device): def _mem_cost(self): assert isinstance(self.child, Leaf) - block: Block = self.child.block + block: BlockBase = self.child.block if block.is_dense: return np.product(self.shape()) if sync_nnz > 1: @@ -661,7 +661,7 @@ def execute_on(self, device: Device, leaf_ids=None) -> Leaf: assert isinstance(self.left, Leaf) and isinstance(self.right, Leaf) result = self._collapse(device) new_leaf: Leaf = result[0] - new_block: Block = result[1] + new_block: BlockBase = result[1] # This updates load on nodes and channels. # This also updates block states to indicate that they now reside on the provided nodes. # Update the cluster state after computing the leaf, so that transfer costs are properly @@ -690,8 +690,8 @@ def execute_on(self, device: Device, leaf_ids=None) -> Leaf: def _collapse(self, device: Device): assert isinstance(self.left, Leaf) and isinstance(self.right, Leaf) - lblock: Block = self.left.block - rblock: Block = self.right.block + lblock: BlockBase = self.left.block + rblock: BlockBase = self.right.block if self.op_name == "matmul": op_name, args = "tensordot", {"axes": 1} elif self.op_name == "tensordot": @@ -699,7 +699,7 @@ def _collapse(self, device: Device): else: op_name, args = self.op_name, {} assert array_utils.can_broadcast_shapes(lblock.shape, rblock.shape) - block: Block = lblock.bop(op_name, rblock, args=args, device=device) + block: BlockBase = lblock.bop(op_name, rblock, args=args, device=device) leaf: Leaf = Leaf(self.cluster_state) leaf.block = block leaf.tree_node_size = self.left.tree_node_size.bop( @@ -714,8 +714,8 @@ def _mem_cost(self): # Computes the memory required to perform this operation. # We approximate by just computing the memory required to store the result. assert isinstance(self.left, Leaf) and isinstance(self.right, Leaf) - lblock: Block = self.left.block - rblock: Block = self.right.block + lblock: BlockBase = self.left.block + rblock: BlockBase = self.right.block if self.op_name == "matmul": op_name, args = "tensordot", {"axes": 1} elif self.op_name == "tensordot": @@ -981,7 +981,7 @@ def execute_on(self, device: Device, leaf_ids=None) -> Leaf: block_ids = [child.block.id for child in self.children] result = self._collapse(device) new_leaf: Leaf = result[0] - new_block: Block = result[1] + new_block: BlockBase = result[1] # This updates load on nodes and channels. # This also updates block states to indicate that they now reside on the provided nodes. # Update the cluster state after computing the leaf, so that transfer costs are properly @@ -1007,7 +1007,7 @@ def _collapse(self, device: Device): block_oids.append(child.block.oid) if km is None: km = child.block.km - block: Block = Block( + block: BlockBase = Block( self._grid_entry, self._grid_shape, self._shape, self._dtype, False, km ) block._device = device @@ -1175,7 +1175,7 @@ def execute_on(self, device: Device, leaf_ids=None) -> Leaf: block_ids = [child.block.id for child in self.children] result = self._collapse(device) new_leaf: Leaf = result[0] - new_block: Block = result[1] + new_block: BlockBase = result[1] # This updates load on nodes and channels. # This also updates block states to indicate that they now reside on the provided nodes. # Update the cluster state after computing the leaf, so that transfer costs are properly @@ -1204,7 +1204,7 @@ def _collapse(self, device: Device): block_oids.append(child.block.oid) if km is None: km = child.block.km - block: Block = Block( + block: BlockBase = Block( self.grid_entry(), self.grid_shape(), self.shape(), self.dtype(), False, km ) block._device = device diff --git a/nums/experimental/optimizer/grapharray.py b/nums/experimental/optimizer/grapharray.py index f03a4163..f4a2300b 100644 --- a/nums/experimental/optimizer/grapharray.py +++ b/nums/experimental/optimizer/grapharray.py @@ -23,7 +23,7 @@ from nums.core.settings import sync_nnz from nums.core.array import utils as array_utils -from nums.core.array.base import BlockArrayBase, Block +from nums.core.array.base import BlockBase, BlockArrayBase from nums.core.array.blockarray import BlockArray from nums.core.array.sparse import SparseBlockArray from nums.core.kernel.kernel_manager import KernelManager @@ -50,7 +50,7 @@ def graphs_from_ba( ) -> np.ndarray: graphs = np.empty(shape=ba.grid.grid_shape, dtype=np.object) for grid_entry in ba.grid.get_entry_iterator(): - block: Block = ba.blocks[grid_entry] + block: BlockBase = ba.blocks[grid_entry] # Allocate the block to the node on which it's created. km: KernelManager = ba.km device: Device = km.device_grid.get_device( @@ -166,7 +166,7 @@ def iterator(self): yield node def to_blocks(self) -> np.ndarray: - blocks: np.ndarray = np.empty(self.grid.grid_shape, dtype=Block) + blocks: np.ndarray = np.empty(self.grid.grid_shape, dtype=BlockBase) for grid_entry in self.grid.get_entry_iterator(): leaf: TreeNode = self.graphs[grid_entry] assert isinstance(leaf, Leaf), "%s,%s" % (str(leaf), type(leaf)) diff --git a/nums/experimental/optimizer/reduction_ops.py b/nums/experimental/optimizer/reduction_ops.py index 1a363513..5ef10f03 100644 --- a/nums/experimental/optimizer/reduction_ops.py +++ b/nums/experimental/optimizer/reduction_ops.py @@ -20,7 +20,7 @@ import numpy as np from nums.core.settings import sync_nnz -from nums.core.array.base import Block +from nums.core.array.base import BlockBase from nums.core.grid.grid import Device from nums.core.kernel.kernel_manager import KernelManager from nums.experimental.optimizer.clusterstate import ClusterState @@ -215,7 +215,7 @@ def execute_on(self, device: Device, leaf_ids=None) -> TreeNode: assert isinstance(left, Leaf) and isinstance(right, Leaf) result = self._collapse(device, left, right) new_leaf: Leaf = result[0] - new_block: Block = result[1] + new_block: BlockBase = result[1] # Update action leaf queue. assert set(leaf_ids) == {self.action_leaf_q.pop(0), self.action_leaf_q.pop(0)} @@ -257,13 +257,13 @@ def execute_on(self, device: Device, leaf_ids=None) -> TreeNode: return self def _collapse(self, device: Device, left: Leaf, right: Leaf): - lblock: Block = left.block - rblock: Block = right.block + lblock: BlockBase = left.block + rblock: BlockBase = right.block if self.op_name == "matmul": raise ValueError("matmul is not a supported reduction operator.") op_name, args = self.op_name, {} assert lblock.shape == rblock.shape - block: Block = lblock.copy() + block: BlockBase = lblock.copy() block.transposed = False block.dtype = array_utils.get_reduce_output_type(self.op_name, lblock.dtype) block.oid = lblock.km.bop_reduce( @@ -294,12 +294,12 @@ def _mem_cost(self, leafs): shape = None for leaf in leafs: assert leaf.tree_node_id in self.leafs_dict - leaf_block: Block = leaf.block + leaf_block: BlockBase = leaf.block if shape is None: shape = leaf_block.shape else: assert leaf_block.shape == shape - leaf_block: Block = leafs[0].block + leaf_block: BlockBase = leafs[0].block if leaf_block.is_dense: return leaf_block.size() if sync_nnz > 1: diff --git a/tests/core/array/test_sparse.py b/tests/core/array/test_sparse.py index 85f0254b..302bd082 100644 --- a/tests/core/array/test_sparse.py +++ b/tests/core/array/test_sparse.py @@ -171,6 +171,17 @@ def test_tensordot(app_inst: ArrayApplication): assert np.array_equal(np.tensordot(x1, x2, axes=1), y_ba.get()) +# def test_getitem(app_inst: ArrayApplication): +# x1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [2, 2, 2, 2], [2, 2, 2, 2]]) +# x1_sp = sparse.COO.from_numpy(x1, fill_value=2) +# x1_ba = app_inst.array(x1, block_shape=(2, 2)) +# x1_sba = SparseBlockArray.from_ba(x1_ba, fill_value=2) +# y_sp = x1_sp[0:2, 1:3] +# y_sba = x1_sba[0:2, 1:3] +# assert y_sba.fill_value == y_sp.fill_value # 2 +# assert np.array_equal(y_sp.todense(), y_sba.to_ba().get()) + + def test_sdtp(app_inst: ArrayApplication): shape = 50, 50, 50 block_shape = 10, 10, 10 From 7bb4470123b70155cce8afc11d8f7dc609e3e2f3 Mon Sep 17 00:00:00 2001 From: Daniel Zou Date: Tue, 16 Aug 2022 08:46:06 -0700 Subject: [PATCH 3/5] Remove fill_value from API --- nums/core/array/base.py | 674 ++-------------------- nums/core/array/blockarray.py | 606 ++++++++++++++++++- nums/core/array/random.py | 10 +- nums/core/array/sparse.py | 191 ++---- nums/core/array/utils.py | 27 +- nums/core/kernel/kernel_interface.py | 3 +- nums/core/kernel/numpy_kernel.py | 14 +- nums/experimental/optimizer/graph.py | 2 +- nums/experimental/optimizer/grapharray.py | 6 +- nums/experimental/optimizer/size.py | 98 ++-- nums/numpy/random.py | 11 +- tests/core/array/test_sparse.py | 54 +- tests/experimental/optimizer/test_size.py | 25 +- 13 files changed, 783 insertions(+), 938 deletions(-) diff --git a/nums/core/array/base.py b/nums/core/array/base.py index 648a4fc5..bec634fe 100644 --- a/nums/core/array/base.py +++ b/nums/core/array/base.py @@ -17,6 +17,7 @@ # pylint: disable=too-many-lines import warnings +from numba.core.errors import NumbaNotImplementedError import numpy as np @@ -52,11 +53,10 @@ def __init__( self.id = Block.block_id_counter # Set if a device id was used to compute this block. self._device = None - self.fill_value = None @property def is_dense(self): - return self.fill_value is None + raise NotImplementedError() @property def nbytes(self): @@ -95,7 +95,31 @@ def true_grid_shape(self): return self.grid_shape def transpose(self, defer=False, redistribute=False): - raise NotImplementedError() + # If defer is True, this operation does not modify the remote object. + # If defer is True and redistribute is False, + # this operation does not move the remote object. + grid_entryT = tuple(reversed(self.grid_entry)) + grid_shapeT = tuple(reversed(self.grid_shape)) + blockT = type(self)( + grid_entry=grid_entryT, + grid_shape=grid_shapeT, + shape=tuple(reversed(self.shape)), + dtype=self.dtype, + transposed=not self.transposed, + km=self.km, + ) + blockT.oid = self.oid + if not defer: + blockT.transposed = False + if redistribute: + syskwargs = {"grid_entry": grid_entryT, "grid_shape": grid_shapeT} + else: + syskwargs = { + "grid_entry": self.grid_entry, + "grid_shape": self.grid_shape, + } + blockT.oid = self.km.transpose(self.oid, syskwargs=syskwargs) + return blockT def device(self): if self._device is not None: @@ -299,6 +323,10 @@ def __init__( ): super().__init__(grid_entry, grid_shape, shape, dtype, transposed, km, id) + @property + def is_dense(self): + return True + @property def nbytes(self): return np.prod(self.shape) * np.dtype(self.dtype).itemsize @@ -319,33 +347,6 @@ def copy(self, shallow=True): block.oid = self.oid return block - def transpose(self, defer=False, redistribute=False): - # If defer is True, this operation does not modify the remote object. - # If defer is True and redistribute is False, - # this operation does not move the remote object. - grid_entryT = tuple(reversed(self.grid_entry)) - grid_shapeT = tuple(reversed(self.grid_shape)) - blockT = Block( - grid_entry=grid_entryT, - grid_shape=grid_shapeT, - shape=tuple(reversed(self.shape)), - dtype=self.dtype, - transposed=not self.transposed, - km=self.km, - ) - blockT.oid = self.oid - if not defer: - blockT.transposed = False - if redistribute: - syskwargs = {"grid_entry": grid_entryT, "grid_shape": grid_shapeT} - else: - syskwargs = { - "grid_entry": self.grid_entry, - "grid_shape": self.grid_shape, - } - blockT.oid = self.km.transpose(self.oid, syskwargs=syskwargs) - return blockT - def map_uop(self, op_name, args=None, kwargs=None, device=None): # This retains transpose. block = self.copy() @@ -459,11 +460,10 @@ def __init__(self, grid: ArrayGrid, km: KernelManager, blocks: np.ndarray = None self.ndim = len(self.shape) self.dtype = self.grid.dtype self.blocks = blocks - self.fill_value = None @property def is_dense(self): - return self.fill_value is None + raise NotImplementedError() def __repr__(self): return "BlockArray(" + str(self.blocks) + ")" @@ -545,7 +545,29 @@ def to_block_array(obj, km: KernelManager, block_shape=None): raise NotImplementedError() def transpose(self, defer=False, redistribute=False): - raise NotImplementedError() + """ + Transpose this matrix. Only use defer with arithmetic operations. + Setting redistribute to True may significantly impact performance. + :param defer: When true, the transpose operation will be applied + with the next arithmetic operation. + :param redistribute: If defer is false, setting this to true will + redistribute the data according to the device grid (data placement policy). + This parameter has no effect when defer is true. + :return: The transposed matrix. + """ + if defer and redistribute: + warnings.warn("defer is True, redistribute=True will be ignored.") + metaT = self.grid.to_meta() + metaT["shape"] = tuple(reversed(metaT["shape"])) + metaT["block_shape"] = tuple(reversed(metaT["block_shape"])) + gridT = ArrayGrid.from_meta(metaT) + rarrT = type(self)(gridT, self.km) + rarrT.blocks = np.copy(self.blocks.T) + for grid_entry in rarrT.grid.get_entry_iterator(): + rarrT.blocks[grid_entry] = rarrT.blocks[grid_entry].transpose( + defer, redistribute + ) + return rarrT def broadcast_to(self, shape): b = array_utils.broadcast(self.shape, shape) @@ -570,391 +592,6 @@ def broadcast_to(self, shape): result.blocks = broadcast return result - def reshape(self, *shape, **kwargs): - block_shape = kwargs.get("block_shape", None) - if array_utils.is_int(shape): - shape = (shape,) - elif len(shape) == 0: - shape = self.shape - elif isinstance(shape[0], (tuple, list)): - assert len(shape) == 1 - shape = shape[0] - else: - assert all(np.issubdtype(type(n), int) for n in shape) - shape = Reshape.compute_shape(self.shape, shape) - if block_shape is None: - if shape == self.shape: - # This is a noop. - block_shape = self.block_shape - else: - block_shape = self.km.get_block_shape(shape, self.dtype) - return Reshape()(self, shape, block_shape) - - def _preprocess_subscript(self, item): - if not isinstance(item, tuple): - ss = (item,) - else: - ss = item - # We need to fetch any block arrays. - tmp = [] - for entry in ss: - if isinstance(entry, BlockArrayBase): - val = entry.get() - else: - val = entry - if isinstance(val, list): - val = np.array(val) - if isinstance(val, np.ndarray): - # If this is a Boolean mask, convert it to integers. - if array_utils.is_bool(val.dtype, type_test=True): - val = np.arange(len(val))[val] - if val.shape == (): - val = val.item() - tmp.append(val) - ss = tuple(tmp) - is_handled_advanced = False - array_encountered = False - axis = None - - # Check if this is a supported advanced indexing operation. - for i, entry in enumerate(ss): - if isinstance(entry, slice) and entry.start is None and entry.stop is None: - continue - elif array_utils.is_int(entry) or array_utils.is_uint(entry): - continue - elif array_utils.is_array_like(entry): - if array_encountered: - raise NotImplementedError( - "Advanced indexing is only supported along a single axis." - ) - is_handled_advanced = True - array_encountered = True - axis = i - if not (np.all(0 <= entry) and np.all(entry < self.shape[axis])): - raise IndexError( - "Advanced indexing array along axis %s is out of bounds." % axis - ) - else: - if array_encountered: - raise NotImplementedError( - "Advanced indexing is only supported " - "with full slices and integers along other axes." - ) - is_handled_advanced = False - break - - return ss, is_handled_advanced, axis - - def __getitem__(self, item): - ss, is_handled_advanced, axis = self._preprocess_subscript(item) - - if is_handled_advanced: - # Treat this as a shuffle. - return self._advanced_single_array_select(ss, axis=axis) - - # This is to deal with circular imports. Little overhead since this happens once per call. - # However, would be better to rearrange modules in the future. - from nums.core.array.view import ArrayView - - av: ArrayView = ArrayView.from_block_array(self) - # TODO (hme): We don't have to create, but do so for now until we need to optimize. - return av[ss].create() - - def _advanced_single_array_select(self, ss: tuple, axis: int = 0): - # Create output array along the axis of the selection operation. - # We don't allocate zeros for output array. Instead, we let the update kernel - # create the initial set of zeros to save some memory. - array = ss[axis] - assert len(array.shape) == 1 - - # TODO: We may encounter block shape incompatability due to this. - block_size = self.block_shape[axis] - self.km.update_block_shape_map(array.shape[0], block_size) - - dst_axis = None - shape = [] - block_shape = [] - for i in range(len(self.shape)): - if i == axis: - dst_axis = len(shape) - shape.append(array.shape[0]) - block_shape.append(block_size) - elif i < len(ss): - if isinstance(ss[i], slice): - shape.append(self.shape[i]) - block_shape.append(self.block_shape[i]) - else: - # It's an index. We drop the indices. - continue - else: - shape.append(self.shape[i]) - block_shape.append(self.block_shape[i]) - - dst_arr = type(self)( - ArrayGrid( - shape=tuple(shape), - block_shape=tuple(block_shape), - dtype=self.dtype.__name__, - ), - km=self.km, - ) - - src_arr = self - np_ss = ss - ss = self.km.put( - ss, - syskwargs={ - "grid_entry": (0,), - "grid_shape": (1,), - }, - ) - for src_grid_entry in src_arr.grid.get_entry_iterator(): - src_coord: tuple = src_arr.grid.get_entry_coordinates(src_grid_entry) - src_block: Block = src_arr.blocks[src_grid_entry] - - # Make sure index values in subscript are within bounds of src_arr. - # We also prepare dst_grid_entry here. - dst_grid_entry_list = [] - skip = False - for curr_axis in range(len(np_ss)): - if curr_axis == axis: - dst_grid_entry_list.append(None) - elif isinstance(np_ss[curr_axis], slice): - dst_grid_entry_list.append(src_grid_entry[curr_axis]) - elif not ( - src_coord[curr_axis] - <= np_ss[curr_axis] - < src_coord[curr_axis] + src_block.shape[curr_axis] - ): - skip = True - break - if skip: - continue - for curr_axis in range(len(np_ss), len(src_grid_entry)): - dst_grid_entry_list.append(src_grid_entry[curr_axis]) - - for j in range(dst_arr.grid.grid_shape[dst_axis]): - dst_grid_entry_list[dst_axis] = j - dst_grid_entry = tuple(dst_grid_entry_list) - dst_block: Block = dst_arr.blocks[dst_grid_entry] - dst_coord: tuple = dst_arr.grid.get_entry_coordinates(dst_grid_entry) - - if dst_block.oid is None: - dst_arg = (dst_block.shape, dst_block.dtype) - else: - dst_arg = dst_block.oid - dst_block.oid = self.km.advanced_select_block_along_axis( - dst_arg, - src_block.oid, - ss, - dst_axis, - axis, - dst_coord, - src_coord, - syskwargs={ - "grid_entry": dst_grid_entry, - "grid_shape": dst_arr.grid.grid_shape, - }, - ) - return dst_arr - - def __setitem__(self, key, value): - value: BlockArrayBase = self.to_block_array(value, self.km) - ss, is_handled_advanced, axis = self._preprocess_subscript(key) - if is_handled_advanced: - return self._advanced_single_array_assign(ss, value, axis) - - # This is to deal with circular imports. Little overhead since this happens once per call. - # However, would be better to rearrange modules in the future. - from nums.core.array.view import ArrayView - - av: ArrayView = ArrayView.from_block_array(self) - av[key] = value - - def _advanced_single_array_assign( - self, - ss: tuple, - value, - axis: int, - ): - array = ss[axis] - assert len(array.shape) == 1 - - # The subscript contains a single array. We therefore know one of two things is true: - # 1. value is the same shape as self along axes != axis. - # 2. value is scalar or 1-dimensional. - # We currently don't support the case where value may broadcasted if it has more dims. - # This should be a straight-forward future task. - value: BlockArrayBase = value - mode = None - if len(value.shape) == 0: - # subscripted value per block will broadcast to other dimensions. - mode = "scalar" - elif len(value.shape) == 1: - # assert len(value.shape) == len(ss) - mode = "single-dim" - # Can broadcast if trailing dim matches. - assert len(ss[axis]) == value.shape[0] - - for i in range(len(self.shape)): - if i == axis: - assert len(ss[i]) == value.shape[0] - elif i < axis: - # Nothing to check here. - # These entries are : or integer. - pass - else: - if i < len(ss): - if not isinstance(ss[i], slice): - # ss[i] is an integer. - continue - # If we're here, then the rest of the subscript operator - # will resolve to :, which is not broadcastable. - raise ValueError( - "Cannot broadcast input array " - "from shape %s into shape %s" - % (value.shape, tuple([value.shape[0]] + list(self.shape[i:]))) - ) - elif len(value.shape) == len(self.shape): - mode = "multi-dim" - new_block_shape = [] - for i in range(len(self.shape)): - if i == axis: - new_block_shape.append(value.block_shape[i]) - elif i < len(ss) and ( - array_utils.is_int(ss[i]) or array_utils.is_uint(ss[i]) - ): - # These entries are : or integer. - # assert array_utils.is_int(ss[i]) or array_utils.is_uint(ss[i]) - assert value.shape[i] == 1 - new_block_shape.append(1) - else: - assert value.shape[i] == self.shape[i], "Shape mismatch." - new_block_shape.append(self.block_shape[i]) - new_block_shape = tuple(new_block_shape) - if new_block_shape != value.block_shape: - # TODO: This message occurs on X[idx[:n]] = X[idx[n:]] + 0.5, - # even when n is a multiple of block_shape[0]. - warnings.warn( - ("Assigned value block shape %s " % str(value.block_shape)) - + ( - "does not match block shape %s of assignee. " - % str(new_block_shape) - ) - + "Applying reshape to assigned value." - ) - value = value.reshape(block_shape=new_block_shape) - - # Like select, iterate over destination blocks along the axis being updated. - # e.g. if self is 2-dim and axis=0, then fix the row and iterate over the columns. - # If value has the same shape as self, then for each destination block, - # iterate over the blocks in value along axis. - # e.g. if self is 2-dim and axis=0, then for the given column, iterate over the rows - # of value. - # If value is scalar, then attempt to assign it to every destination block. - # If value is 1-dim, the just iterate over the dim and assign accordingly. - - dst_arr = self - src_arr = value - src_grid_shape = src_arr.grid.grid_shape - np_ss = ss - ss = self.km.put( - ss, - syskwargs={ - "grid_entry": (0,), - "grid_shape": (1,), - }, - ) - for dst_grid_entry in dst_arr.grid.get_entry_iterator(): - dst_block: BlockBase = dst_arr.blocks[dst_grid_entry] - dst_coord: tuple = dst_arr.grid.get_entry_coordinates(dst_grid_entry) - - # Make sure index values in subscript are within bounds of dst_arr. - # We don't need to check src_arr: - # 1) The block shapes of dst_arr and src_arr are the same except along axis - # and indices in ss. We are not concerned with axes the indices in ss correspond to, - # because they are of size 1 in src_arr => we only need to check that indices - # fall within bounds of dst_arr. - # 2) For each dst_arr, we test the values - # to assign to dst_arr by traverse the src_arr along axis. - # Thus, size along all other axes are equal or broadcasted. - skip = False - for curr_axis in range(len(np_ss)): - if curr_axis == axis or isinstance(np_ss[curr_axis], slice): - continue - if not ( - dst_coord[curr_axis] - <= np_ss[curr_axis] - < dst_coord[curr_axis] + dst_block.shape[curr_axis] - ): - skip = True - break - if skip: - continue - - if mode == "scalar": - src_block: BlockBase = src_arr.blocks.item() - src_coord: tuple = src_arr.grid.get_entry_coordinates( - src_block.grid_entry - ) - dst_block.oid = self.km.advanced_assign_block_along_axis( - dst_block.oid, - src_block.oid, - ss, - axis, - dst_coord, - src_coord, - syskwargs={ - "grid_entry": dst_grid_entry, - "grid_shape": dst_arr.grid.grid_shape, - }, - ) - elif mode == "single-dim": - for src_grid_entry in src_arr.grid.get_entry_iterator(): - src_block: BlockBase = src_arr.blocks[src_grid_entry] - src_coord: tuple = src_arr.grid.get_entry_coordinates( - src_grid_entry - ) - dst_block.oid = self.km.advanced_assign_block_along_axis( - dst_block.oid, - src_block.oid, - ss, - axis, - dst_coord, - src_coord, - syskwargs={ - "grid_entry": dst_grid_entry, - "grid_shape": dst_arr.grid.grid_shape, - }, - ) - elif mode == "multi-dim": - for j in range(src_grid_shape[axis]): - # Apply sel from each block along axis of src_arr. - # e.g. for 2 dim array, we fix the column blocks - # given by dst_grid_entry, and iterate over the rows. - src_grid_entry = tuple( - list(dst_grid_entry[:axis]) - + [j] - + list(dst_grid_entry[axis + 1 :]) - ) - src_block: BlockBase = src_arr.blocks[src_grid_entry] - src_coord: tuple = src_arr.grid.get_entry_coordinates( - src_grid_entry - ) - dst_block.oid = self.km.advanced_assign_block_along_axis( - dst_block.oid, - src_block.oid, - ss, - axis, - dst_coord, - src_coord, - syskwargs={ - "grid_entry": dst_grid_entry, - "grid_shape": dst_arr.grid.grid_shape, - }, - ) - return dst_arr - def tree_reduce( self, op_name, blocks_or_oids, result_grid_entry, result_grid_shape ): @@ -1277,204 +914,3 @@ def __ne__(self, other): def __rne__(self, other): other = self.check_or_convert_other(other) return other.__inequality__("ne", self) - - -class Reshape: - @staticmethod - def compute_shape(shape, input_shape): - size = np.product(shape) - if -1 in input_shape: - new_shape = [] - other_dim_prod = 1 - negative_one_seen = False - for dim in input_shape: - if dim == -1: - if negative_one_seen: - raise Exception("Only one -1 permitted in reshape.") - negative_one_seen = True - continue - other_dim_prod *= dim - if size % other_dim_prod != 0: - raise Exception("Invalid shape.") - for dim in input_shape: - if dim == -1: - new_shape.append(size // other_dim_prod) - else: - new_shape.append(dim) - else: - new_shape = input_shape - assert size == np.product(new_shape) - return new_shape - - def _group_index_lists_by_block( - self, dst_slice_tuples, src_grid: ArrayGrid, dst_index_list, src_index_list - ): - # TODO(hme): Keep this function here until it's needed for greater support of - # selection/assignment operations. - # Block grid entries needed to write to given dst_slice_selection. - src_blocks = {} - dst_slice_np = np.array(dst_slice_tuples).T - dst_index_arr = np.array(dst_index_list) - src_index_arr = np.array(src_index_list) - # Pick the smallest type to represent indices. - # A set of these indices may be transmitted over the network, - # so we want to pick the smallest encoding possible. - index_types = [ - (2**8, np.uint8), - (2**16, np.uint16), - (2**32, np.uint32), - (2**64, np.uint64), - ] - index_type = None - for bound, curr_index_type in index_types: - if np.all(np.array(src_grid.block_shape) < bound) and np.all( - dst_slice_np[1] < bound - ): - index_type = curr_index_type - break - if index_type is None: - raise Exception("Unable to encode block indices, blocks are too large.") - for grid_entry in src_grid.get_entry_iterator(): - src_slice_np = np.array(src_grid.get_slice_tuples(grid_entry)).T - index_pairs = [] - for i in range(src_index_arr.shape[0]): - src_index = src_index_arr[i] - dst_index = dst_index_arr[i] - if np.all( - (src_slice_np[0] <= src_index) & (src_index < src_slice_np[1]) - ): - index_pair = ( - (dst_index - dst_slice_np[0]).astype(index_type), - (src_index - src_slice_np[0]).astype(index_type), - ) - index_pairs.append(index_pair) - if len(index_pairs) > 0: - src_blocks[grid_entry] = index_pairs - return src_blocks - - def _arbitrary_reshape( - self, arr: BlockArrayBase, shape, block_shape - ) -> BlockArrayBase: - # This is the worst-case scenario. - # Generate index mappings per block, and group source indices to minimize - # RPCs and generation of new objects. - km = arr.km - dst_arr = type(arr).empty( - shape=shape, block_shape=block_shape, dtype=arr.dtype, km=km - ) - for dst_grid_entry in dst_arr.grid.get_entry_iterator(): - dst_block: BlockBase = dst_arr.blocks[dst_grid_entry] - dst_slice_selection = dst_arr.grid.get_slice(dst_grid_entry) - dst_index_list = array_utils.slice_sel_to_index_list(dst_slice_selection) - src_index_list = array_utils.translate_index_list( - dst_index_list, shape, arr.shape - ) - src_blocks = self._group_index_lists_by_block( - dst_arr.grid.get_slice_tuples(dst_grid_entry), - arr.grid, - dst_index_list, - src_index_list, - ) - for src_grid_entry in src_blocks: - src_block: BlockBase = arr.blocks[src_grid_entry] - index_pairs = src_blocks[src_grid_entry] - syskwargs = { - "grid_entry": dst_grid_entry, - "grid_shape": dst_arr.grid.grid_shape, - } - dst_block.oid = km.update_block_by_index( - dst_block.oid, src_block.oid, index_pairs, syskwargs=syskwargs - ) - return dst_arr - - def _block_shape_reshape(self, arr, block_shape): - rarr: BlockArrayBase = type(arr).empty( - arr.shape, block_shape, arr.dtype, arr.km - ) - for grid_entry in rarr.grid.get_entry_iterator(): - grid_entry_slice = rarr.grid.get_slice(grid_entry) - # TODO (hme): This could be less costly. - rarr[grid_entry_slice] = arr[grid_entry_slice] - return rarr - - def _strip_ones(self, shape): - return tuple(filter(lambda x: x != 1, shape)) - - def _check_positions_ones(self, shape, block_shape): - # If a position in the shape is 1, then the corresponding - # position in block_shape should also be 1. - for i in range(len(shape)): - if shape[i] == 1: - if shape[i] != block_shape[i]: - return False - return True - - def _is_simple_reshape(self, arr: BlockArrayBase, shape, block_shape): - # Is the reshape a difference of factors of 1? - # Strip out 1s and compare. - # If a position in the shape is 1, then the corresponding - # position in block_shape should also be 1. - - # If source shape and dest shape are the same or source block_shape and dest block_shape - # are same, this is not a simple reshape. - if shape == arr.shape or block_shape == arr.block_shape: - return False - - # Checks if source shape and dest shape are same & source block_shape and dest - # block_shape are same after stripping ones. - if not ( - self._strip_ones(shape) == self._strip_ones(arr.shape) - and self._strip_ones(block_shape) == self._strip_ones(arr.block_shape) - ): - return False - if not self._check_positions_ones(shape, block_shape): - return False - return True - - def _simple_reshape(self, arr, shape, block_shape): - # Reshape the array of blocks only. - # This is only used when the difference in shape are factors of 1s, - # and the ordering of other factors are maintained. - - # Check assumptions. - assert len(self._strip_ones(arr.shape)) == len(self._strip_ones(shape)) - - # Create new grid, and perform reshape on blocks - # to simplify access to source blocks. - grid = ArrayGrid(shape, block_shape, dtype=arr.dtype.__name__) - src_blocks = arr.blocks.reshape(grid.grid_shape) - if arr.is_dense: - rarr = type(arr)(grid, arr.km) - else: - rarr = type(arr)(grid, arr.km, arr.fill_value) - for grid_entry in grid.get_entry_iterator(): - src_block: BlockBase = src_blocks[grid_entry] - dst_block: BlockBase = rarr.blocks[grid_entry] - syskwargs = {"grid_entry": grid_entry, "grid_shape": grid.grid_shape} - dst_block.oid = arr.km.reshape( - src_block.oid, dst_block.shape, syskwargs=syskwargs - ) - return rarr - - def _validate(self, arr, shape, block_shape): - assert -1 not in shape - assert -1 not in block_shape - assert len(shape) == len(block_shape) - assert np.product(arr.shape) == np.product(shape) - - def __call__(self, arr: BlockArrayBase, shape, block_shape): - self._validate(arr, shape, block_shape) - if arr.shape == shape and arr.block_shape == block_shape: - return arr - elif self._is_simple_reshape(arr, shape, block_shape): - return self._simple_reshape(arr, shape, block_shape) - elif arr.shape == shape and arr.block_shape != block_shape: - return self._block_shape_reshape(arr, block_shape) - elif arr.shape != shape and arr.block_shape == block_shape: - # Just do full reshape for this case as well. - # Though there may be a better solution, we generally expect - # the block shape to change with array shape. - return self._arbitrary_reshape(arr, shape, block_shape) - else: - assert arr.shape != shape and arr.block_shape != block_shape - return self._arbitrary_reshape(arr, shape, block_shape) diff --git a/nums/core/array/blockarray.py b/nums/core/array/blockarray.py index 041747b2..db0eb20d 100644 --- a/nums/core/array/blockarray.py +++ b/nums/core/array/blockarray.py @@ -51,6 +51,10 @@ def __init__(self, grid: ArrayGrid, km: KernelManager, blocks: np.ndarray = None km=self.km, ) + @property + def is_dense(self): + return True + @classmethod def empty(cls, shape, block_shape, dtype, km: KernelManager): return BlockArray.create("empty", shape, block_shape, dtype, km) @@ -161,6 +165,26 @@ def to_single_block(self, replicate=False): ) return res + def reshape(self, *shape, **kwargs): + block_shape = kwargs.get("block_shape", None) + if array_utils.is_int(shape): + shape = (shape,) + elif len(shape) == 0: + shape = self.shape + elif isinstance(shape[0], (tuple, list)): + assert len(shape) == 1 + shape = shape[0] + else: + assert all(np.issubdtype(type(n), int) for n in shape) + shape = Reshape.compute_shape(self.shape, shape) + if block_shape is None: + if shape == self.shape: + # This is a noop. + block_shape = self.block_shape + else: + block_shape = self.km.get_block_shape(shape, self.dtype) + return Reshape()(self, shape, block_shape) + def expand_dims(self, axis): """ This function refers to the numpy implementation of expand_dims. @@ -212,30 +236,370 @@ def swapaxes(self, axis1, axis2): rarr_swap = BlockArray(grid_swap, self.km, rarr_src) return rarr_swap - def transpose(self, defer=False, redistribute=False): - """ - Transpose this matrix. Only use defer with arithmetic operations. - Setting redistribute to True may significantly impact performance. - :param defer: When true, the transpose operation will be applied - with the next arithmetic operation. - :param redistribute: If defer is false, setting this to true will - redistribute the data according to the device grid (data placement policy). - This parameter has no effect when defer is true. - :return: The transposed matrix. - """ - if defer and redistribute: - warnings.warn("defer is True, redistribute=True will be ignored.") - metaT = self.grid.to_meta() - metaT["shape"] = tuple(reversed(metaT["shape"])) - metaT["block_shape"] = tuple(reversed(metaT["block_shape"])) - gridT = ArrayGrid.from_meta(metaT) - rarrT = BlockArray(gridT, self.km) - rarrT.blocks = np.copy(self.blocks.T) - for grid_entry in rarrT.grid.get_entry_iterator(): - rarrT.blocks[grid_entry] = rarrT.blocks[grid_entry].transpose( - defer, redistribute - ) - return rarrT + def _preprocess_subscript(self, item): + if not isinstance(item, tuple): + ss = (item,) + else: + ss = item + # We need to fetch any block arrays. + tmp = [] + for entry in ss: + if isinstance(entry, BlockArrayBase): + val = entry.get() + else: + val = entry + if isinstance(val, list): + val = np.array(val) + if isinstance(val, np.ndarray): + # If this is a Boolean mask, convert it to integers. + if array_utils.is_bool(val.dtype, type_test=True): + val = np.arange(len(val))[val] + if val.shape == (): + val = val.item() + tmp.append(val) + ss = tuple(tmp) + is_handled_advanced = False + array_encountered = False + axis = None + + # Check if this is a supported advanced indexing operation. + for i, entry in enumerate(ss): + if isinstance(entry, slice) and entry.start is None and entry.stop is None: + continue + elif array_utils.is_int(entry) or array_utils.is_uint(entry): + continue + elif array_utils.is_array_like(entry): + if array_encountered: + raise NotImplementedError( + "Advanced indexing is only supported along a single axis." + ) + is_handled_advanced = True + array_encountered = True + axis = i + if not (np.all(0 <= entry) and np.all(entry < self.shape[axis])): + raise IndexError( + "Advanced indexing array along axis %s is out of bounds." % axis + ) + else: + if array_encountered: + raise NotImplementedError( + "Advanced indexing is only supported " + "with full slices and integers along other axes." + ) + is_handled_advanced = False + break + + return ss, is_handled_advanced, axis + + def __getitem__(self, item): + ss, is_handled_advanced, axis = self._preprocess_subscript(item) + + if is_handled_advanced: + # Treat this as a shuffle. + return self._advanced_single_array_select(ss, axis=axis) + + # This is to deal with circular imports. Little overhead since this happens once per call. + # However, would be better to rearrange modules in the future. + from nums.core.array.view import ArrayView + + av: ArrayView = ArrayView.from_block_array(self) + # TODO (hme): We don't have to create, but do so for now until we need to optimize. + return av[ss].create() + + def _advanced_single_array_select(self, ss: tuple, axis: int = 0): + # Create output array along the axis of the selection operation. + # We don't allocate zeros for output array. Instead, we let the update kernel + # create the initial set of zeros to save some memory. + array = ss[axis] + assert len(array.shape) == 1 + + # TODO: We may encounter block shape incompatability due to this. + block_size = self.block_shape[axis] + self.km.update_block_shape_map(array.shape[0], block_size) + + dst_axis = None + shape = [] + block_shape = [] + for i in range(len(self.shape)): + if i == axis: + dst_axis = len(shape) + shape.append(array.shape[0]) + block_shape.append(block_size) + elif i < len(ss): + if isinstance(ss[i], slice): + shape.append(self.shape[i]) + block_shape.append(self.block_shape[i]) + else: + # It's an index. We drop the indices. + continue + else: + shape.append(self.shape[i]) + block_shape.append(self.block_shape[i]) + + dst_arr = type(self)( + ArrayGrid( + shape=tuple(shape), + block_shape=tuple(block_shape), + dtype=self.dtype.__name__, + ), + km=self.km, + ) + + src_arr = self + np_ss = ss + ss = self.km.put( + ss, + syskwargs={ + "grid_entry": (0,), + "grid_shape": (1,), + }, + ) + for src_grid_entry in src_arr.grid.get_entry_iterator(): + src_coord: tuple = src_arr.grid.get_entry_coordinates(src_grid_entry) + src_block: Block = src_arr.blocks[src_grid_entry] + + # Make sure index values in subscript are within bounds of src_arr. + # We also prepare dst_grid_entry here. + dst_grid_entry_list = [] + skip = False + for curr_axis in range(len(np_ss)): + if curr_axis == axis: + dst_grid_entry_list.append(None) + elif isinstance(np_ss[curr_axis], slice): + dst_grid_entry_list.append(src_grid_entry[curr_axis]) + elif not ( + src_coord[curr_axis] + <= np_ss[curr_axis] + < src_coord[curr_axis] + src_block.shape[curr_axis] + ): + skip = True + break + if skip: + continue + for curr_axis in range(len(np_ss), len(src_grid_entry)): + dst_grid_entry_list.append(src_grid_entry[curr_axis]) + + for j in range(dst_arr.grid.grid_shape[dst_axis]): + dst_grid_entry_list[dst_axis] = j + dst_grid_entry = tuple(dst_grid_entry_list) + dst_block: Block = dst_arr.blocks[dst_grid_entry] + dst_coord: tuple = dst_arr.grid.get_entry_coordinates(dst_grid_entry) + + if dst_block.oid is None: + dst_arg = (dst_block.shape, dst_block.dtype) + else: + dst_arg = dst_block.oid + dst_block.oid = self.km.advanced_select_block_along_axis( + dst_arg, + src_block.oid, + ss, + dst_axis, + axis, + dst_coord, + src_coord, + syskwargs={ + "grid_entry": dst_grid_entry, + "grid_shape": dst_arr.grid.grid_shape, + }, + ) + return dst_arr + + def __setitem__(self, key, value): + value: BlockArrayBase = self.to_block_array(value, self.km) + ss, is_handled_advanced, axis = self._preprocess_subscript(key) + if is_handled_advanced: + return self._advanced_single_array_assign(ss, value, axis) + + # This is to deal with circular imports. Little overhead since this happens once per call. + # However, would be better to rearrange modules in the future. + from nums.core.array.view import ArrayView + + av: ArrayView = ArrayView.from_block_array(self) + av[key] = value + + def _advanced_single_array_assign( + self, + ss: tuple, + value, + axis: int, + ): + array = ss[axis] + assert len(array.shape) == 1 + + # The subscript contains a single array. We therefore know one of two things is true: + # 1. value is the same shape as self along axes != axis. + # 2. value is scalar or 1-dimensional. + # We currently don't support the case where value may broadcasted if it has more dims. + # This should be a straight-forward future task. + value: BlockArrayBase = value + mode = None + if len(value.shape) == 0: + # subscripted value per block will broadcast to other dimensions. + mode = "scalar" + elif len(value.shape) == 1: + # assert len(value.shape) == len(ss) + mode = "single-dim" + # Can broadcast if trailing dim matches. + assert len(ss[axis]) == value.shape[0] + + for i in range(len(self.shape)): + if i == axis: + assert len(ss[i]) == value.shape[0] + elif i < axis: + # Nothing to check here. + # These entries are : or integer. + pass + else: + if i < len(ss): + if not isinstance(ss[i], slice): + # ss[i] is an integer. + continue + # If we're here, then the rest of the subscript operator + # will resolve to :, which is not broadcastable. + raise ValueError( + "Cannot broadcast input array " + "from shape %s into shape %s" + % (value.shape, tuple([value.shape[0]] + list(self.shape[i:]))) + ) + elif len(value.shape) == len(self.shape): + mode = "multi-dim" + new_block_shape = [] + for i in range(len(self.shape)): + if i == axis: + new_block_shape.append(value.block_shape[i]) + elif i < len(ss) and ( + array_utils.is_int(ss[i]) or array_utils.is_uint(ss[i]) + ): + # These entries are : or integer. + # assert array_utils.is_int(ss[i]) or array_utils.is_uint(ss[i]) + assert value.shape[i] == 1 + new_block_shape.append(1) + else: + assert value.shape[i] == self.shape[i], "Shape mismatch." + new_block_shape.append(self.block_shape[i]) + new_block_shape = tuple(new_block_shape) + if new_block_shape != value.block_shape: + # TODO: This message occurs on X[idx[:n]] = X[idx[n:]] + 0.5, + # even when n is a multiple of block_shape[0]. + warnings.warn( + ("Assigned value block shape %s " % str(value.block_shape)) + + ( + "does not match block shape %s of assignee. " + % str(new_block_shape) + ) + + "Applying reshape to assigned value." + ) + value = value.reshape(block_shape=new_block_shape) + + # Like select, iterate over destination blocks along the axis being updated. + # e.g. if self is 2-dim and axis=0, then fix the row and iterate over the columns. + # If value has the same shape as self, then for each destination block, + # iterate over the blocks in value along axis. + # e.g. if self is 2-dim and axis=0, then for the given column, iterate over the rows + # of value. + # If value is scalar, then attempt to assign it to every destination block. + # If value is 1-dim, the just iterate over the dim and assign accordingly. + + dst_arr = self + src_arr = value + src_grid_shape = src_arr.grid.grid_shape + np_ss = ss + ss = self.km.put( + ss, + syskwargs={ + "grid_entry": (0,), + "grid_shape": (1,), + }, + ) + for dst_grid_entry in dst_arr.grid.get_entry_iterator(): + dst_block: BlockBase = dst_arr.blocks[dst_grid_entry] + dst_coord: tuple = dst_arr.grid.get_entry_coordinates(dst_grid_entry) + + # Make sure index values in subscript are within bounds of dst_arr. + # We don't need to check src_arr: + # 1) The block shapes of dst_arr and src_arr are the same except along axis + # and indices in ss. We are not concerned with axes the indices in ss correspond to, + # because they are of size 1 in src_arr => we only need to check that indices + # fall within bounds of dst_arr. + # 2) For each dst_arr, we test the values + # to assign to dst_arr by traverse the src_arr along axis. + # Thus, size along all other axes are equal or broadcasted. + skip = False + for curr_axis in range(len(np_ss)): + if curr_axis == axis or isinstance(np_ss[curr_axis], slice): + continue + if not ( + dst_coord[curr_axis] + <= np_ss[curr_axis] + < dst_coord[curr_axis] + dst_block.shape[curr_axis] + ): + skip = True + break + if skip: + continue + + if mode == "scalar": + src_block: BlockBase = src_arr.blocks.item() + src_coord: tuple = src_arr.grid.get_entry_coordinates( + src_block.grid_entry + ) + dst_block.oid = self.km.advanced_assign_block_along_axis( + dst_block.oid, + src_block.oid, + ss, + axis, + dst_coord, + src_coord, + syskwargs={ + "grid_entry": dst_grid_entry, + "grid_shape": dst_arr.grid.grid_shape, + }, + ) + elif mode == "single-dim": + for src_grid_entry in src_arr.grid.get_entry_iterator(): + src_block: BlockBase = src_arr.blocks[src_grid_entry] + src_coord: tuple = src_arr.grid.get_entry_coordinates( + src_grid_entry + ) + dst_block.oid = self.km.advanced_assign_block_along_axis( + dst_block.oid, + src_block.oid, + ss, + axis, + dst_coord, + src_coord, + syskwargs={ + "grid_entry": dst_grid_entry, + "grid_shape": dst_arr.grid.grid_shape, + }, + ) + elif mode == "multi-dim": + for j in range(src_grid_shape[axis]): + # Apply sel from each block along axis of src_arr. + # e.g. for 2 dim array, we fix the column blocks + # given by dst_grid_entry, and iterate over the rows. + src_grid_entry = tuple( + list(dst_grid_entry[:axis]) + + [j] + + list(dst_grid_entry[axis + 1 :]) + ) + src_block: BlockBase = src_arr.blocks[src_grid_entry] + src_coord: tuple = src_arr.grid.get_entry_coordinates( + src_grid_entry + ) + dst_block.oid = self.km.advanced_assign_block_along_axis( + dst_block.oid, + src_block.oid, + ss, + axis, + dst_coord, + src_coord, + syskwargs={ + "grid_entry": dst_grid_entry, + "grid_shape": dst_arr.grid.grid_shape, + }, + ) + return dst_arr @staticmethod def to_block_array(obj, km: KernelManager, block_shape=None): @@ -511,3 +875,197 @@ def __inequality__(self, op, other): op, other_block, args={} ) return result + + +class Reshape: + @staticmethod + def compute_shape(shape, input_shape): + size = np.product(shape) + if -1 in input_shape: + new_shape = [] + other_dim_prod = 1 + negative_one_seen = False + for dim in input_shape: + if dim == -1: + if negative_one_seen: + raise Exception("Only one -1 permitted in reshape.") + negative_one_seen = True + continue + other_dim_prod *= dim + if size % other_dim_prod != 0: + raise Exception("Invalid shape.") + for dim in input_shape: + if dim == -1: + new_shape.append(size // other_dim_prod) + else: + new_shape.append(dim) + else: + new_shape = input_shape + assert size == np.product(new_shape) + return new_shape + + def _group_index_lists_by_block( + self, dst_slice_tuples, src_grid: ArrayGrid, dst_index_list, src_index_list + ): + # TODO(hme): Keep this function here until it's needed for greater support of + # selection/assignment operations. + # Block grid entries needed to write to given dst_slice_selection. + src_blocks = {} + dst_slice_np = np.array(dst_slice_tuples).T + dst_index_arr = np.array(dst_index_list) + src_index_arr = np.array(src_index_list) + # Pick the smallest type to represent indices. + # A set of these indices may be transmitted over the network, + # so we want to pick the smallest encoding possible. + index_types = [ + (2**8, np.uint8), + (2**16, np.uint16), + (2**32, np.uint32), + (2**64, np.uint64), + ] + index_type = None + for bound, curr_index_type in index_types: + if np.all(np.array(src_grid.block_shape) < bound) and np.all( + dst_slice_np[1] < bound + ): + index_type = curr_index_type + break + if index_type is None: + raise Exception("Unable to encode block indices, blocks are too large.") + for grid_entry in src_grid.get_entry_iterator(): + src_slice_np = np.array(src_grid.get_slice_tuples(grid_entry)).T + index_pairs = [] + for i in range(src_index_arr.shape[0]): + src_index = src_index_arr[i] + dst_index = dst_index_arr[i] + if np.all( + (src_slice_np[0] <= src_index) & (src_index < src_slice_np[1]) + ): + index_pair = ( + (dst_index - dst_slice_np[0]).astype(index_type), + (src_index - src_slice_np[0]).astype(index_type), + ) + index_pairs.append(index_pair) + if len(index_pairs) > 0: + src_blocks[grid_entry] = index_pairs + return src_blocks + + def _arbitrary_reshape(self, arr: BlockArray, shape, block_shape) -> BlockArray: + # This is the worst-case scenario. + # Generate index mappings per block, and group source indices to minimize + # RPCs and generation of new objects. + km = arr.km + dst_arr = type(arr).empty( + shape=shape, block_shape=block_shape, dtype=arr.dtype, km=km + ) + for dst_grid_entry in dst_arr.grid.get_entry_iterator(): + dst_block: Block = dst_arr.blocks[dst_grid_entry] + dst_slice_selection = dst_arr.grid.get_slice(dst_grid_entry) + dst_index_list = array_utils.slice_sel_to_index_list(dst_slice_selection) + src_index_list = array_utils.translate_index_list( + dst_index_list, shape, arr.shape + ) + src_blocks = self._group_index_lists_by_block( + dst_arr.grid.get_slice_tuples(dst_grid_entry), + arr.grid, + dst_index_list, + src_index_list, + ) + for src_grid_entry in src_blocks: + src_block: Block = arr.blocks[src_grid_entry] + index_pairs = src_blocks[src_grid_entry] + syskwargs = { + "grid_entry": dst_grid_entry, + "grid_shape": dst_arr.grid.grid_shape, + } + dst_block.oid = km.update_block_by_index( + dst_block.oid, src_block.oid, index_pairs, syskwargs=syskwargs + ) + return dst_arr + + def _block_shape_reshape(self, arr, block_shape): + rarr: BlockArray = type(arr).empty(arr.shape, block_shape, arr.dtype, arr.km) + for grid_entry in rarr.grid.get_entry_iterator(): + grid_entry_slice = rarr.grid.get_slice(grid_entry) + # TODO (hme): This could be less costly. + rarr[grid_entry_slice] = arr[grid_entry_slice] + return rarr + + def _strip_ones(self, shape): + return tuple(filter(lambda x: x != 1, shape)) + + def _check_positions_ones(self, shape, block_shape): + # If a position in the shape is 1, then the corresponding + # position in block_shape should also be 1. + for i in range(len(shape)): + if shape[i] == 1: + if shape[i] != block_shape[i]: + return False + return True + + def _is_simple_reshape(self, arr: BlockArray, shape, block_shape): + # Is the reshape a difference of factors of 1? + # Strip out 1s and compare. + # If a position in the shape is 1, then the corresponding + # position in block_shape should also be 1. + + # If source shape and dest shape are the same or source block_shape and dest block_shape + # are same, this is not a simple reshape. + if shape == arr.shape or block_shape == arr.block_shape: + return False + + # Checks if source shape and dest shape are same & source block_shape and dest + # block_shape are same after stripping ones. + if not ( + self._strip_ones(shape) == self._strip_ones(arr.shape) + and self._strip_ones(block_shape) == self._strip_ones(arr.block_shape) + ): + return False + if not self._check_positions_ones(shape, block_shape): + return False + return True + + def _simple_reshape(self, arr, shape, block_shape): + # Reshape the array of blocks only. + # This is only used when the difference in shape are factors of 1s, + # and the ordering of other factors are maintained. + + # Check assumptions. + assert len(self._strip_ones(arr.shape)) == len(self._strip_ones(shape)) + + # Create new grid, and perform reshape on blocks + # to simplify access to source blocks. + grid = ArrayGrid(shape, block_shape, dtype=arr.dtype.__name__) + src_blocks = arr.blocks.reshape(grid.grid_shape) + rarr = BlockArray(grid, arr.km) + for grid_entry in grid.get_entry_iterator(): + src_block: Block = src_blocks[grid_entry] + dst_block: Block = rarr.blocks[grid_entry] + syskwargs = {"grid_entry": grid_entry, "grid_shape": grid.grid_shape} + dst_block.oid = arr.km.reshape( + src_block.oid, dst_block.shape, syskwargs=syskwargs + ) + return rarr + + def _validate(self, arr, shape, block_shape): + assert -1 not in shape + assert -1 not in block_shape + assert len(shape) == len(block_shape) + assert np.product(arr.shape) == np.product(shape) + + def __call__(self, arr: BlockArray, shape, block_shape): + self._validate(arr, shape, block_shape) + if arr.shape == shape and arr.block_shape == block_shape: + return arr + elif self._is_simple_reshape(arr, shape, block_shape): + return self._simple_reshape(arr, shape, block_shape) + elif arr.shape == shape and arr.block_shape != block_shape: + return self._block_shape_reshape(arr, block_shape) + elif arr.shape != shape and arr.block_shape == block_shape: + # Just do full reshape for this case as well. + # Though there may be a better solution, we generally expect + # the block shape to change with array shape. + return self._arbitrary_reshape(arr, shape, block_shape) + else: + assert arr.shape != shape and arr.block_shape != block_shape + return self._arbitrary_reshape(arr, shape, block_shape) diff --git a/nums/core/array/random.py b/nums/core/array/random.py index fb17e1d8..45196888 100644 --- a/nums/core/array/random.py +++ b/nums/core/array/random.py @@ -224,7 +224,6 @@ def sparse_randint( shape=None, block_shape=None, p=0.01, - fill_value=0, ): if dtype is None: dtype = np.int64 @@ -236,7 +235,6 @@ def sparse_randint( block_shape, dtype, p, - fill_value, ) def sparse_uniform( @@ -247,7 +245,6 @@ def sparse_uniform( block_shape=None, dtype=None, p=0.01, - fill_value=0, ): return self._sparse_sample_basic( "uniform", @@ -256,7 +253,6 @@ def sparse_uniform( block_shape, dtype, p, - fill_value, ) def sparse_normal( @@ -267,7 +263,6 @@ def sparse_normal( block_shape=None, dtype=None, p=0.01, - fill_value=0, ): return self._sparse_sample_basic( "normal", @@ -276,7 +271,6 @@ def sparse_normal( block_shape, dtype, p, - fill_value, ) def _sparse_sample_basic( @@ -287,7 +281,6 @@ def _sparse_sample_basic( block_shape, dtype, p, - fill_value, ) -> SparseBlockArray: if shape is None: assert block_shape is None @@ -300,7 +293,7 @@ def _sparse_sample_basic( assert isinstance(dtype, type) assert "size" not in rfunc_args grid: ArrayGrid = ArrayGrid(shape, block_shape, dtype=dtype.__name__) - sba: SparseBlockArray = SparseBlockArray(grid, self._km, fill_value) + sba: SparseBlockArray = SparseBlockArray(grid, self._km) for grid_entry in sba.grid.get_entry_iterator(): rng_params = list(self._rng.new_block_rng_params()) this_block_shape = grid.get_block_shape(grid_entry) @@ -316,7 +309,6 @@ def _sparse_sample_basic( this_block_shape, dtype, p, - fill_value, syskwargs=syskwargs, ) block._nnz = sba.km.sparse_nnz(block.oid, syskwargs=syskwargs) diff --git a/nums/core/array/sparse.py b/nums/core/array/sparse.py index 0a51947e..f7dc2e2d 100644 --- a/nums/core/array/sparse.py +++ b/nums/core/array/sparse.py @@ -19,17 +19,19 @@ def __init__( dtype, transposed, km: KernelManager, - fill_value=0, id=None, index_dtype=np.int64, ): super().__init__(grid_entry, grid_shape, shape, dtype, transposed, km, id) - self.fill_value = fill_value self.index_dtype = index_dtype self.oid = None self._nnz: object = None self._nbytes: object = None + @property + def is_dense(self): + return False + @property def nnz(self): if self._nnz is None: @@ -74,30 +76,6 @@ def copy(self, shallow=True): block.oid = self.oid return block - def transpose(self, defer=False, redistribute=False): - grid_entryT = tuple(reversed(self.grid_entry)) - grid_shapeT = tuple(reversed(self.grid_shape)) - blockT = SparseBlock( - grid_entry=grid_entryT, - grid_shape=grid_shapeT, - shape=tuple(reversed(self.shape)), - dtype=self.dtype, - transposed=not self.transposed, - km=self.km, - ) - blockT.oid = self.oid - if not defer: - blockT.transposed = False - if redistribute: - syskwargs = {"grid_entry": grid_entryT, "grid_shape": grid_shapeT} - else: - syskwargs = { - "grid_entry": self.grid_entry, - "grid_shape": self.grid_shape, - } - blockT.oid = self.km.transpose(self.oid, syskwargs=syskwargs) - return blockT - def map_uop(self, op_name, args=None, kwargs=None, device=None): block = self.copy() block.dtype = array_utils.get_uop_output_type(op_name, self.dtype) @@ -113,27 +91,43 @@ def map_uop(self, op_name, args=None, kwargs=None, device=None): ) block._nnz = self.km.sparse_nnz(block.oid, syskwargs=syskwargs) block._nbytes = self.km.sparse_nbytes(block.oid, syskwargs=syskwargs) - block.fill_value = np.__getattribute__(op_name)(self.fill_value) return block def block_from_scalar(self, other): assert array_utils.is_scalar(other) - block = SparseBlock( - self.grid_entry, - self.grid_shape, - (1,), - self.dtype, - False, - self.km, - fill_value=other, - ) - block.oid = self.km.sparse_block_from_scalar( - other, - syskwargs={ - "grid_entry": self.grid_entry, - "grid_shape": self.grid_shape, - }, - ) + # Construct sparse block only if value is 0. + if np.isclose(other, 0): + block = SparseBlock( + self.grid_entry, + self.grid_shape, + (1,), + self.dtype, + False, + self.km, + ) + block.oid = self.km.sparse_block_from_scalar( + other, + syskwargs={ + "grid_entry": self.grid_entry, + "grid_shape": self.grid_shape, + }, + ) + else: + block = Block( + self.grid_entry, + self.grid_shape, + (1,), + self.dtype, + False, + self.km, + ) + block.oid = self.km.block_from_scalar( + other, + syskwargs={ + "grid_entry": self.grid_entry, + "grid_shape": self.grid_shape, + }, + ) return block @staticmethod @@ -144,9 +138,6 @@ def init_block(op_name, block1, block2, args, device=None): result_shape, dtype, ) = BlockBase.block_meta(op_name, block1, block2, args) - fill_value = array_utils.get_bop_fill_value( - op_name, block1.fill_value, block2.fill_value - ) # TODO: what happens when different index_dtype? block = SparseBlock( grid_entry=result_grid_entry, @@ -154,7 +145,6 @@ def init_block(op_name, block1, block2, args, device=None): shape=result_shape, dtype=dtype, transposed=False, - fill_value=fill_value, km=block1.km, ) block._device = device @@ -174,9 +164,7 @@ def binary_op(op_name, a: BlockBase, b: BlockBase, args: dict, device=None): if not isinstance(a, BlockBase) or not isinstance(b, BlockBase): raise NotImplementedError() - densify = array_utils.get_sparse_bop_return_type( - op_name, a.fill_value, b.fill_value - ) + densify = array_utils.get_sparse_bop_densify(op_name, a.is_dense, b.is_dense) if densify: block = Block.init_block(op_name, a, b, args, device) else: @@ -206,11 +194,7 @@ def binary_op(op_name, a: BlockBase, b: BlockBase, args: dict, device=None): def bop(self, op_name, other, args: dict, device=None): return self.binary_op(op_name, self, other, args, device) - # TODO: densify when fill_value != 0 def tensordot(self, other, axes): - assert self.fill_value == 0 - if not other.is_dense: - assert other.fill_value == 0 return self.binary_op("tensordot", self, other, args={"axes": axes}) @@ -219,7 +203,6 @@ def __init__( self, grid: ArrayGrid, km: KernelManager, - fill_value=0, blocks: np.ndarray = None, ): if blocks is not None: @@ -237,12 +220,14 @@ def __init__( self.dtype, False, self.km, - fill_value, ) - self.fill_value = fill_value self._nnz = -1 self._nbytes = -1 + @property + def is_dense(self): + return False + @property def nnz(self): return self._get_nnz() @@ -270,17 +255,13 @@ def __repr__(self): @classmethod def empty(cls, shape, block_shape, dtype, km: KernelManager): - return SparseBlockArray.create( - "empty", shape, block_shape, dtype, km, fill_value=0 - ) + return SparseBlockArray.create("empty", shape, block_shape, dtype, km) @classmethod - def create( - cls, create_op_name, shape, block_shape, dtype, km: KernelManager, fill_value=0 - ): + def create(cls, create_op_name, shape, block_shape, dtype, km: KernelManager): grid = ArrayGrid(shape=shape, block_shape=block_shape, dtype=dtype.__name__) grid_meta = grid.to_meta() - arr = SparseBlockArray(grid, km, fill_value) + arr = SparseBlockArray(grid, km) for grid_entry in grid.get_entry_iterator(): arr.blocks[grid_entry].oid = km.new_sparse_block( create_op_name, @@ -291,10 +272,10 @@ def create( return arr @classmethod - def from_np(cls, arr, block_shape, copy, km, fill_value=0): + def from_np(cls, arr, block_shape, copy, km): dtype_str = str(arr.dtype) grid = ArrayGrid(arr.shape, block_shape, dtype_str) - rarr = SparseBlockArray(grid, km, fill_value) + rarr = SparseBlockArray(grid, km) grid_entry_iterator = grid.get_entry_iterator() for grid_entry in grid_entry_iterator: grid_slice = grid.get_slice(grid_entry) @@ -302,7 +283,7 @@ def from_np(cls, arr, block_shape, copy, km, fill_value=0): if copy: block = np.copy(block) # TODO: generalize for different kernels - block = sparse.COO.from_numpy(block, fill_value) + block = sparse.COO.from_numpy(block, fill_value=0) rarr.blocks[grid_entry].oid = km.put( block, syskwargs={"grid_entry": grid_entry, "grid_shape": grid.grid_shape}, @@ -333,12 +314,10 @@ def from_sparse(cls, arr, block_shape, copy, km, fill_value=0): def from_scalar(cls, val, km): if not array_utils.is_scalar(val): raise ValueError("%s is not a scalar." % val) - return SparseBlockArray.from_np( - np.array(val), (), copy=False, km=km, fill_value=val - ) + return SparseBlockArray.from_np(np.array(val), (), copy=False, km=km) @classmethod - def from_blocks(cls, arr: np.ndarray, result_shape, km, fill_value): + def from_blocks(cls, arr: np.ndarray, result_shape, km): sample_idx = tuple(0 for _ in arr.shape) if isinstance(arr, SparseBlock): sample_block = arr @@ -353,7 +332,7 @@ def from_blocks(cls, arr: np.ndarray, result_shape, km, fill_value): shape=result_shape, block_shape=result_block_shape, dtype=result_dtype_str ) assert arr.shape == result_grid.grid_shape - result = SparseBlockArray(result_grid, km, fill_value) + result = SparseBlockArray(result_grid, km) for grid_entry in result_grid.get_entry_iterator(): if isinstance(arr, SparseBlock): block: SparseBlock = arr @@ -363,12 +342,12 @@ def from_blocks(cls, arr: np.ndarray, result_shape, km, fill_value): return result @classmethod - def from_ba(cls, ba: BlockArrayBase, fill_value=0): + def from_ba(cls, ba: BlockArrayBase): assert ( ba.shape != () ), "from_ba does not support scalar BlockArray. Use from_scalar." grid = ArrayGrid(ba.shape, ba.block_shape, ba.dtype.__name__) - sba = SparseBlockArray(grid, ba.km, fill_value) + sba = SparseBlockArray(grid, ba.km) for grid_entry in grid.get_entry_iterator(): block: Block = ba.blocks[grid_entry] sblock: SparseBlock = sba.blocks[grid_entry] @@ -378,12 +357,10 @@ def from_ba(cls, ba: BlockArrayBase, fill_value=0): } sblock.oid = sba.km.dense_to_sparse( block.oid, - fill_value, syskwargs=syskwargs, ) sblock._nnz = sba.km.sparse_nnz(sblock.oid, syskwargs=syskwargs) sblock._nbytes = sba.km.sparse_nbytes(sblock.oid, syskwargs=syskwargs) - sba.fill_value = fill_value return sba def to_ba(self): @@ -404,7 +381,7 @@ def todense(self) -> BlockArray: def copy(self): grid_copy = self.grid.from_meta(self.grid.to_meta()) - rarr_copy = SparseBlockArray(grid_copy, self.km, self.fill_value) + rarr_copy = SparseBlockArray(grid_copy, self.km) for grid_entry in grid_copy.get_entry_iterator(): rarr_copy.blocks[grid_entry] = self.blocks[grid_entry].copy() return rarr_copy @@ -416,21 +393,6 @@ def astype(self, dtype): result.blocks[grid_entry] = self.blocks[grid_entry].astype(dtype) return result - def transpose(self, defer=False, redistribute=False): - if defer and redistribute: - warnings.warn("defer is True, redistribute=True will be ignored.") - metaT = self.grid.to_meta() - metaT["shape"] = tuple(reversed(metaT["shape"])) - metaT["block_shape"] = tuple(reversed(metaT["block_shape"])) - gridT = ArrayGrid.from_meta(metaT) - rarrT = SparseBlockArray(gridT, self.km, self.fill_value) - rarrT.blocks = np.copy(self.blocks.T) - for grid_entry in rarrT.grid.get_entry_iterator(): - rarrT.blocks[grid_entry] = rarrT.blocks[grid_entry].transpose( - defer, redistribute - ) - return rarrT - @staticmethod def to_block_array(obj, km: KernelManager, block_shape=None): if isinstance(obj, (BlockArray, SparseBlockArray)): @@ -463,8 +425,6 @@ def ufunc(self, op_name): result = self.copy() for grid_entry in self.grid.get_entry_iterator(): result.blocks[grid_entry] = self.blocks[grid_entry].ufunc(op_name) - func = np.__getattribute__(op_name) - result.fill_value = func(self.fill_value) result._nnz = -1 result._nbytes = -1 return result @@ -482,11 +442,7 @@ def elementwise(op_name, a, b): else: raise NotImplementedError() - densify = array_utils.get_sparse_bop_return_type( - op_name, - a.fill_value, - b.fill_value, - ) + densify = array_utils.get_sparse_bop_densify(op_name, a.is_dense, b.is_dense) if a.shape == b.shape and a.block_shape == b.block_shape: return SparseBlockArray._fast_elementwise(op_name, a, b, densify) else: @@ -498,13 +454,9 @@ def elementwise(op_name, a, b): km=a.km, ) else: - fill_value = array_utils.get_bop_fill_value( - op_name, a.fill_value, b.fill_value - ) result = SparseBlockArray.from_blocks( blocks_op(b.blocks), result_shape=None, - fill_value=fill_value, km=a.km, ) return result @@ -515,12 +467,8 @@ def _fast_elementwise(op_name, a, b, densify): dtype = array_utils.get_bop_output_type(op_name, a.dtype, b.dtype) if densify: block_type = Block - fill_value = None else: block_type = SparseBlock - fill_value = array_utils.get_bop_fill_value( - op_name, a.fill_value, b.fill_value - ) blocks = np.empty(shape=a.grid_shape, dtype=block_type) for grid_entry in a.grid.get_entry_iterator(): a_block: BlockBase = a.blocks[grid_entry] @@ -550,7 +498,7 @@ def _fast_elementwise(op_name, a, b, densify): if densify: return BlockArray(grid, a.km, blocks=blocks) else: - return SparseBlockArray(grid, a.km, fill_value, blocks=blocks) + return SparseBlockArray(grid, a.km, blocks=blocks) ################# # Linear Algebra @@ -572,20 +520,11 @@ def tensordot(a, b, axes=2): else: raise NotImplementedError() - # PyData/Sparse only works with fill_value == 0 - # TODO: densify when fill_value != 0 - if not (a.is_dense or b.is_dense): - assert ( - a.fill_value == 0 and b.fill_value == 0 - ), "Sparse-sparse tensordot with non-zero fill value is not supported." - if array_utils.np_tensordot_param_test(a.shape, a.ndim, b.shape, b.ndim, axes): raise ValueError("shape-mismatch for sum") - densify = array_utils.get_sparse_bop_return_type( - "tensordot", - a.fill_value, - b.fill_value, + densify = array_utils.get_sparse_bop_densify( + "tensordot", a.is_dense, b.is_dense ) if axes > 0: @@ -614,7 +553,7 @@ def tensordot(a, b, axes=2): if densify: result = BlockArray(result_grid, a.km) else: - result = SparseBlockArray(result_grid, a.km, a.fill_value) + result = SparseBlockArray(result_grid, a.km) a_dims = list(itertools.product(*map(range, a_axes))) b_dims = list(itertools.product(*map(range, b_axes))) sum_dims = list(itertools.product(*map(range, a_sum_axes))) @@ -663,13 +602,12 @@ def sdtp(self, *block_arrays: List[BlockArray]): """ Perform a sampled tensor product among an arbitrary number of block arrays. """ - assert np.allclose(self.fill_value, 0) for i, ba in enumerate(block_arrays): assert len(ba.shape) == 1 assert ba.shape[0] == self.shape[i] assert ba.block_shape[0] == self.block_shape[i] # Sparsity of result is same as self. - result: SparseBlockArray = SparseBlockArray(self.grid, self.km, self.fill_value) + result: SparseBlockArray = SparseBlockArray(self.grid, self.km) for grid_entry in self.grid.get_entry_iterator(): dense_oids = [ ba.blocks[grid_entry[i]].oid for i, ba in enumerate(block_arrays) @@ -690,8 +628,6 @@ def sdtd(self, x: BlockArray, y: BlockArray, axes: int): is being performed are not partitioned. The last constraint can be eliminated if we add a sampled element-wise kernel. """ - assert np.allclose(self.fill_value, 0) - if array_utils.np_tensordot_param_test(x.shape, x.ndim, y.shape, y.ndim, axes): raise ValueError("shape-mismatch for sum") @@ -716,7 +652,7 @@ def sdtd(self, x: BlockArray, y: BlockArray, axes: int): assert result_grid.grid_shape == tuple(x_axes + y_axes) assert result_grid.grid_shape == self.grid_shape assert result_grid.block_shape == self.block_shape - result: SparseBlockArray = SparseBlockArray(self.grid, self.km, self.fill_value) + result: SparseBlockArray = SparseBlockArray(self.grid, self.km) x_dims = list(itertools.product(*map(range, x_axes))) y_dims = list(itertools.product(*map(range, y_axes))) sum_dims = tuple([0] * axes) @@ -752,10 +688,7 @@ def __inequality__(self, op_name, other): ) dtype = bool.__name__ grid = ArrayGrid(shape, block_shape, dtype) - fill_value = array_utils.get_bop_fill_value( - op_name, self.fill_value, other.fill_value - ) - result = SparseBlockArray(grid, self.km, fill_value) + result = SparseBlockArray(grid, self.km) for grid_entry in result.grid.get_entry_iterator(): other_block: Block = other.blocks.item() result.blocks[grid_entry] = self.blocks[grid_entry].bop( diff --git a/nums/core/array/utils.py b/nums/core/array/utils.py index 872f5d9d..ed01190e 100644 --- a/nums/core/array/utils.py +++ b/nums/core/array/utils.py @@ -453,15 +453,15 @@ def normalize_axis_index(axis, ndim): return axis % ndim -def get_sparse_bop_return_type(op_name, a_fv, b_fv): - def sample_array(fv): +def get_sparse_bop_densify(op_name, a_dense, b_dense): + def sample_array(is_dense): s = np.eye(2) - if fv is not None: - return sparse.COO.from_numpy(s, fill_value=fv) + if not is_dense: + return sparse.COO.from_numpy(s, fill_value=0) return s - sa = sample_array(a_fv) - sb = sample_array(b_fv) + sa = sample_array(a_dense) + sb = sample_array(b_dense) if op_name == "tensordot": result = sparse.tensordot(sa, sb) else: @@ -474,18 +474,3 @@ def sample_array(fv): if isinstance(result, sparse.SparseArray): return False return True - - -def get_bop_fill_value(op_name, a_fv, b_fv): - if a_fv is None and b_fv is None: - return None - if a_fv is None: - return b_fv - if b_fv is None: - return a_fv - op_name = np_ufunc_map.get(op_name, op_name) - try: - func = np.__getattribute__(op_name) - except Exception as _: - func = scipy.special.__getattribute__(op_name) - return func(a_fv, b_fv) diff --git a/nums/core/kernel/kernel_interface.py b/nums/core/kernel/kernel_interface.py index 76631799..f1f16218 100644 --- a/nums/core/kernel/kernel_interface.py +++ b/nums/core/kernel/kernel_interface.py @@ -167,7 +167,7 @@ def any(self, arr, syskwargs: Dict): # Sparse - def dense_to_sparse(self, arr, fill_value, syskwargs: Dict): + def dense_to_sparse(self, arr, syskwargs: Dict): raise NotImplementedError() def sparse_to_dense(self, arr, syskwargs: Dict): @@ -190,7 +190,6 @@ def sparse_random_block( shape, dtype, p, - fill_value, syskwargs: Dict, ): raise NotImplementedError() diff --git a/nums/core/kernel/numpy_kernel.py b/nums/core/kernel/numpy_kernel.py index f5fee593..20523e0a 100644 --- a/nums/core/kernel/numpy_kernel.py +++ b/nums/core/kernel/numpy_kernel.py @@ -527,9 +527,8 @@ def any(self, arr): # Sparse - def dense_to_sparse(self, arr, fill_value): - result = COO.from_numpy(arr, fill_value=fill_value) - return result + def dense_to_sparse(self, arr): + return COO.from_numpy(arr, fill_value=0) def sparse_to_dense(self, arr): return arr.todense() @@ -558,7 +557,6 @@ def sparse_random_block( shape, dtype, p, - fill_value, ): rng = block_rng_legacy(*rng_params) rfunc = rng.__getattribute__(rfunc_name) @@ -570,7 +568,7 @@ def sparse_random_block( random_state=rng, data_rvs=lambda s: rfunc(**rfunc_args, size=s), format="coo", - fill_value=fill_value, + fill_value=0, ) return result.astype(dtype) @@ -615,14 +613,14 @@ def sparse_bop(self, op, a1, a2, a1_T, a2_T, axes, densify): def sparse_block_from_scalar(self, x): assert np.isscalar(x) - return sparse.COO.from_numpy(np.array(x), fill_value=x) + return sparse.COO.from_numpy(np.array(x), fill_value=0) def sdtp(self, s: sparse.COO, *dense_arrays): data = np.copy(s.data) for position in range(s.nnz): for axis in range(len(s.shape)): data[position] *= dense_arrays[axis][s.coords[axis][position]] - return sparse.COO(s.coords, data, shape=s.shape, fill_value=s.fill_value) + return sparse.COO(s.coords, data, shape=s.shape, fill_value=0) def sdtd(self, s: sparse.COO, x: np.ndarray, y: np.ndarray, axes: int): # Check some things. @@ -647,4 +645,4 @@ def sdtd(self, s: sparse.COO, x: np.ndarray, y: np.ndarray, axes: int): sx = x[tuple(x_coords)] sy = y[tuple(y_coords)] data[position] *= np.tensordot(sx, sy, axes=axes) - return sparse.COO(s.coords, data, shape=s.shape, fill_value=s.fill_value) + return sparse.COO(s.coords, data, shape=s.shape, fill_value=0) diff --git a/nums/experimental/optimizer/graph.py b/nums/experimental/optimizer/graph.py index 361e740f..2440e622 100644 --- a/nums/experimental/optimizer/graph.py +++ b/nums/experimental/optimizer/graph.py @@ -1215,7 +1215,7 @@ def _collapse(self, device: Device): leaf.block = block # Assume dense for simplicity. leaf.tree_node_size = TreeNodeSize( - self.shape(), np.prod(self.shape()), block.dtype, block.fill_value + self.shape(), np.prod(self.shape()), block.dtype, block.is_dense ) leaf.copy_on_op = self.copy_on_op return leaf, block diff --git a/nums/experimental/optimizer/grapharray.py b/nums/experimental/optimizer/grapharray.py index f4a2300b..4755a574 100644 --- a/nums/experimental/optimizer/grapharray.py +++ b/nums/experimental/optimizer/grapharray.py @@ -70,7 +70,7 @@ def graphs_from_ba( block.shape, nnz, block.dtype, - block.fill_value, + block.is_dense, ) leaf.copy_on_op = copy_on_op graphs[grid_entry] = leaf @@ -110,9 +110,7 @@ def to_ba(self): ), "Cannot convert unsolved GraphArray to BlockArray." if sample_node.block.is_dense: return BlockArray(self.grid.copy(), self.km, self.to_blocks()) - return SparseBlockArray( - self.grid.copy(), self.km, sample_node.block.fill_value, self.to_blocks() - ) + return SparseBlockArray(self.grid.copy(), self.km, self.to_blocks()) def __init__( self, diff --git a/nums/experimental/optimizer/size.py b/nums/experimental/optimizer/size.py index e813259b..089d97d4 100644 --- a/nums/experimental/optimizer/size.py +++ b/nums/experimental/optimizer/size.py @@ -10,11 +10,11 @@ class TreeNodeSize: Sparse binary operations use B(n, p) to estimate nnz as needed. """ - def __init__(self, shape, nnz, dtype, fill_value=None, index_dtype=np.int64): + def __init__(self, shape, nnz, dtype, is_dense, index_dtype=np.int64): self.shape = shape self.nnz = nnz self.dtype = dtype - self.fill_value = fill_value + self.is_dense = is_dense self.index_dtype = index_dtype if self.is_dense: assert nnz == np.prod(shape) @@ -44,14 +44,10 @@ def copy(self): self.shape, self.nnz, self.dtype, - self.fill_value, + self.is_dense, self.index_dtype, ) - @property - def is_dense(self): - return self.fill_value is None - @property def nbytes(self): if self.is_dense: @@ -69,17 +65,13 @@ def uop(self, op_name): shape=tuple(reversed(self.shape)), nnz=self.nnz, dtype=self.dtype, - fill_value=self.fill_value, + is_dense=self.is_dense, ) - if self.is_dense: - fill_value = None - else: - fill_value = np.__getattribute__(op_name)(self.fill_value) return TreeNodeSize( shape=self.shape, nnz=self.nnz, dtype=array_utils.get_uop_output_type(op_name, self.dtype), - fill_value=fill_value, + is_dense=self.is_dense, ) def reduce_axis(self, op_name, axis, keepdims, transposed): @@ -97,7 +89,7 @@ def reduce_axis(self, op_name, axis, keepdims, transposed): shape=tuple(shape), nnz=np.prod(shape), dtype=array_utils.get_uop_output_type(op_name, self.dtype), - fill_value=None, + is_dense=True, ) def _nnz_disjunction(self, other, shape): @@ -118,7 +110,6 @@ def _nnz_conjunction(self, other, shape): def _nnz_selection(self, other, shape): # If self element is nonzero, result is nonzero. - assert self.fill_value == 0 n1 = np.prod(self.shape) p1 = self.nnz / n1 return int(p1 * np.prod(shape)) @@ -126,17 +117,17 @@ def _nnz_selection(self, other, shape): def add(self, other): shape = array_utils.broadcast_shape(self.shape, other.shape) dtype = array_utils.get_bop_output_type("add", self.dtype, other.dtype) - is_dense = array_utils.get_sparse_bop_return_type( + is_dense = array_utils.get_sparse_bop_densify( "add", - self.fill_value, - other.fill_value, + self.is_dense, + other.is_dense, ) if is_dense: return TreeNodeSize( shape=shape, nnz=np.prod(shape), dtype=dtype, - fill_value=None, + is_dense=True, ) if self.is_dense or other.is_dense: raise ValueError( @@ -146,9 +137,7 @@ def add(self, other): shape=shape, nnz=self._nnz_disjunction(other, shape), dtype=dtype, - fill_value=array_utils.get_bop_fill_value( - "add", self.fill_value, other.fill_value - ), + is_dense=False, ) __add__ = add @@ -156,30 +145,23 @@ def add(self, other): def mul(self, other): shape = array_utils.broadcast_shape(self.shape, other.shape) dtype = array_utils.get_bop_output_type("mul", self.dtype, other.dtype) - is_dense = array_utils.get_sparse_bop_return_type( + is_dense = array_utils.get_sparse_bop_densify( "mul", - self.fill_value, - other.fill_value, + self.is_dense, + other.is_dense, ) if is_dense: return TreeNodeSize( shape=shape, nnz=np.prod(shape), dtype=dtype, - fill_value=None, + is_dense=True, ) if not self.is_dense and not other.is_dense: - if self.fill_value == 0 and other.fill_value == 0: - nnz = self._nnz_conjunction(other, shape) - elif self.fill_value == 0: - nnz = self._nnz_selection(other, shape) - elif other.fill_value == 0: - nnz = other._nnz_selection(self, shape) - else: - nnz = self._nnz_disjunction(other, shape) - elif self.fill_value == 0 and other.is_dense: + nnz = self._nnz_conjunction(other, shape) + elif not self.is_dense and other.is_dense: nnz = self._nnz_selection(other, shape) - elif self.is_dense and other.fill_value == 0: + elif self.is_dense and not other.is_dense: nnz = other._nnz_selection(self, shape) else: raise ValueError( @@ -189,9 +171,7 @@ def mul(self, other): shape=shape, nnz=nnz, dtype=dtype, - fill_value=array_utils.get_bop_fill_value( - "mul", self.fill_value, other.fill_value - ), + is_dense=False, ) __mul__ = mul @@ -199,25 +179,25 @@ def mul(self, other): def truediv(self, other): shape = array_utils.broadcast_shape(self.shape, other.shape) dtype = array_utils.get_bop_output_type("truediv", self.dtype, other.dtype) - is_dense = array_utils.get_sparse_bop_return_type( + is_dense = array_utils.get_sparse_bop_densify( "truediv", - self.fill_value, - other.fill_value, + self.is_dense, + other.is_dense, ) if is_dense: return TreeNodeSize( shape=shape, nnz=np.prod(shape), dtype=dtype, - fill_value=None, + is_dense=True, ) if self.is_dense or other.is_dense: raise ValueError( "TreeNodeSize.__add__ is inconsistent with sparse bop rules." ) - if other.fill_value == 0: # nan + if not other.is_dense: # nan nnz = other._nnz_selection(self, shape) - elif self.fill_value == 0: + elif not self.is_dense: nnz = self._nnz_selection(other, shape) else: nnz = self._nnz_disjunction(other, shape) @@ -225,9 +205,7 @@ def truediv(self, other): shape=shape, nnz=nnz, dtype=dtype, - fill_value=array_utils.get_bop_fill_value( - "truediv", self.fill_value, other.fill_value - ), + is_dense=False, ) __truediv__ = truediv @@ -248,22 +226,17 @@ def tensordot(self, other, axes): shape = tuple(self.shape + other.shape) sum_shape = (1,) dtype = array_utils.get_bop_output_type("tensordot", self.dtype, other.dtype) - is_dense = array_utils.get_sparse_bop_return_type( - "tensordot", self.fill_value, other.fill_value + is_dense = array_utils.get_sparse_bop_densify( + "tensordot", self.is_dense, other.is_dense ) if is_dense: return TreeNodeSize( shape=shape, nnz=np.prod(shape), dtype=dtype, - fill_value=None, + is_dense=True, ) - if ( - self.is_dense - or other.is_dense - or self.fill_value != 0 - or other.fill_value != 0 - ): + if self.is_dense or other.is_dense: raise ValueError( "TreeNodeSize.tensordot is inconsistent with sparse bop rules." ) @@ -277,20 +250,17 @@ def tensordot(self, other, axes): shape=shape, nnz=int((1 - (1 - p1 * p2) ** k) * m), dtype=dtype, - fill_value=0, + is_dense=False, ) def inequality(self, op_name, other): assert other.shape == () dtype = array_utils.get_bop_output_type(op_name, self.dtype, other.dtype) - fill_value = array_utils.get_bop_fill_value( - op_name, self.fill_value, other.fill_value - ) - return TreeNode( + return TreeNodeSize( shape=self.shape, nnz=self.nnz, dtype=dtype, - fill_value=fill_value, + is_dense=self.is_dense, ) def bop_dense(self, other): @@ -301,7 +271,7 @@ def bop_dense(self, other): shape=shape, nnz=np.prod(shape), dtype=dtype, - fill_value=None, + is_dense=False, ) def bop(self, op_name, other, **kwargs): diff --git a/nums/numpy/random.py b/nums/numpy/random.py index 4f031df5..edd7d6a4 100644 --- a/nums/numpy/random.py +++ b/nums/numpy/random.py @@ -79,25 +79,20 @@ def permutation(self, x): arr_perm = self.rs().permutation(shape[0], block_shape[0]).get() return x[arr_perm] - def sparse_randn(self, density, fill_value, *shape): + def sparse_randn(self, density, *shape): shape, block_shape = self._get_shapes(shape, _np.float64) return self.rs().sparse_normal( shape=shape, block_shape=block_shape, p=density, - fill_value=fill_value, ) - def sparse_randint( - self, low, high=None, size=None, dtype=int, density=0.01, fill_value=0 - ): + def sparse_randint(self, low, high=None, size=None, dtype=int, density=0.01): if high is None: high = low low = 0 shape, block_shape = self._get_shapes(size, dtype) - return self.rs().sparse_randint( - low, high, dtype, shape, block_shape, density, fill_value - ) + return self.rs().sparse_randint(low, high, dtype, shape, block_shape, density) # Default imp. diff --git a/tests/core/array/test_sparse.py b/tests/core/array/test_sparse.py index 302bd082..4563771e 100644 --- a/tests/core/array/test_sparse.py +++ b/tests/core/array/test_sparse.py @@ -11,7 +11,7 @@ def test_sparse_init(app_inst: ArrayApplication): x1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [2, 2, 0, 0], [2, 2, 0, 0]]) x_ba = app_inst.array(x1, block_shape=(2, 2)) - x_sba = SparseBlockArray.from_ba(x_ba, fill_value=0) + x_sba = SparseBlockArray.from_ba(x_ba) assert x_sba.nnz == 8 assert x_sba.nbytes == 8 * 4 + 2 * 8 * 8 y_ba = x_sba.to_ba() @@ -40,7 +40,6 @@ def test_sparse_random(app_inst: ArrayApplication): shape=(100, 50), block_shape=(50, 50), p=0.1, - fill_value=0, ) x_ba = x_sba.to_ba() x_np = x_ba.get() @@ -50,12 +49,11 @@ def test_sparse_random(app_inst: ArrayApplication): def test_sparse_uop(app_inst: ArrayApplication): x = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [2, 2, 0, 0], [2, 2, 0, 0]]) - x_sp = sparse.COO.from_numpy(x, fill_value=1) + x_sp = sparse.COO.from_numpy(x, fill_value=0) x_sp = sparse.elemwise(np.negative, x_sp) x_ba = app_inst.array(x, block_shape=(2, 2)) - x_sba = SparseBlockArray.from_ba(x_ba, fill_value=1) + x_sba = SparseBlockArray.from_ba(x_ba) y_sba = -x_sba - assert y_sba.fill_value == x_sp.fill_value # -1 assert y_sba.nnz == x_sp.nnz # 12 y_ba = y_sba.to_ba() assert np.array_equal(np.negative(x), y_ba.get()) @@ -64,15 +62,14 @@ def test_sparse_uop(app_inst: ArrayApplication): def test_sparse_add(app_inst: ArrayApplication): x1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [2, 2, 0, 0], [2, 2, 0, 0]]) x2 = x1 * 2 - x1_sp = sparse.COO.from_numpy(x1, fill_value=2) - x2_sp = sparse.COO.from_numpy(x2, fill_value=2) + x1_sp = sparse.COO.from_numpy(x1, fill_value=0) + x2_sp = sparse.COO.from_numpy(x2, fill_value=0) x1_ba = app_inst.array(x1, block_shape=(2, 2)) x2_ba = app_inst.array(x2, block_shape=(2, 2)) - x1_sba = SparseBlockArray.from_ba(x1_ba, fill_value=2) - x2_sba = SparseBlockArray.from_ba(x2_ba, fill_value=2) + x1_sba = SparseBlockArray.from_ba(x1_ba) + x2_sba = SparseBlockArray.from_ba(x2_ba) y_sp = x1_sp + x2_sp y_sba = x1_sba + x2_sba - assert y_sba.fill_value == y_sp.fill_value # 4 assert y_sba.nnz == y_sp.nnz # 16 y_ba = y_sba.to_ba() assert np.array_equal(x1 + x2, y_ba.get()) @@ -88,13 +85,11 @@ def test_sparse_add(app_inst: ArrayApplication): # Test sparse-scalar. y_sp = x1_sp - 1 # __sub__ y_sba = x1_sba - 1 - assert y_sba.fill_value == y_sp.fill_value # 4 assert y_sba.nnz == y_sp.nnz # 16 y_ba = y_sba.to_ba() assert np.array_equal(x1 - 1, y_ba.get()) y_sp = 1 - x1_sp # __rsub__ y_sba = 1 - x1_sba - assert y_sba.fill_value == y_sp.fill_value # 4 assert y_sba.nnz == y_sp.nnz # 16 y_ba = y_sba.to_ba() assert np.array_equal(1 - x1, y_ba.get()) @@ -103,15 +98,14 @@ def test_sparse_add(app_inst: ArrayApplication): def test_sparse_mul(app_inst: ArrayApplication): x1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [2, 2, 0, 0], [2, 2, 0, 0]]) x2 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [2, 2, 2, 2], [2, 2, 2, 2]]) - x1_sp = sparse.COO.from_numpy(x1, fill_value=2) - x2_sp = sparse.COO.from_numpy(x2, fill_value=2) + x1_sp = sparse.COO.from_numpy(x1, fill_value=0) + x2_sp = sparse.COO.from_numpy(x2, fill_value=0) x1_ba = app_inst.array(x1, block_shape=(2, 2)) x2_ba = app_inst.array(x2, block_shape=(2, 2)) - x1_sba = SparseBlockArray.from_ba(x1_ba, fill_value=2) - x2_sba = SparseBlockArray.from_ba(x2_ba, fill_value=2) + x1_sba = SparseBlockArray.from_ba(x1_ba) + x2_sba = SparseBlockArray.from_ba(x2_ba) y_sp = x1_sp * x2_sp y_sba = x1_sba * x2_sba - assert y_sba.fill_value == y_sp.fill_value # 4 assert y_sba.nnz == y_sp.nnz # 16 y_ba = y_sba.to_ba() assert np.array_equal(x1 * x2, y_ba.get()) @@ -119,10 +113,10 @@ def test_sparse_mul(app_inst: ArrayApplication): # Test sparse-dense. rs: NumsRandomState = app_inst.random_state(1337) x1_sba = rs.sparse_randint( - 1, high=5, dtype=int, shape=(100, 50), block_shape=(5, 5), p=0.1, fill_value=0 + 1, high=5, dtype=int, shape=(100, 50), block_shape=(5, 5), p=0.1 ) x2_sba = rs.sparse_randint( - 1, high=5, dtype=int, shape=(1, 50), block_shape=(5, 5), p=0.1, fill_value=0 + 1, high=5, dtype=int, shape=(1, 50), block_shape=(5, 5), p=0.1 ) x1_ba = x1_sba.to_ba() x2_ba = x2_sba.to_ba() @@ -132,7 +126,6 @@ def test_sparse_mul(app_inst: ArrayApplication): x2_sp = sparse.COO.from_numpy(x2, fill_value=0) y_sp = x1_sp * x2 y_sba = x2_ba * x1_sba # __rmul__ - assert y_sba.fill_value == y_sp.fill_value assert y_sba.nnz == y_sp.nnz y_ba = y_sba.to_ba() assert np.array_equal(x1 * x2, y_ba.get()) @@ -140,12 +133,11 @@ def test_sparse_mul(app_inst: ArrayApplication): def test_neq(app_inst: ArrayApplication): x1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [2, 2, 0, 0], [2, 2, 0, 0]]) - x1_sp = sparse.COO.from_numpy(x1, fill_value=2) + x1_sp = sparse.COO.from_numpy(x1, fill_value=0) x1_ba = app_inst.array(x1, block_shape=(2, 2)) - x1_sba = SparseBlockArray.from_ba(x1_ba, fill_value=2) + x1_sba = SparseBlockArray.from_ba(x1_ba) y_sp = x1_sp > 0 y_sba = x1_sba > 0 - assert y_sba.fill_value == y_sp.fill_value # True assert y_sba.nnz == y_sp.nnz # 8 (nnz changes!) y_ba = y_sba.to_ba() assert np.array_equal(x1 > 0, y_ba.get()) @@ -158,11 +150,10 @@ def test_tensordot(app_inst: ArrayApplication): x2_sp = sparse.COO.from_numpy(x2, fill_value=0) x1_ba = app_inst.array(x1, block_shape=(2, 2)) x2_ba = app_inst.array(x2, block_shape=(2, 2)) - x1_sba = SparseBlockArray.from_ba(x1_ba, fill_value=0) - x2_sba = SparseBlockArray.from_ba(x2_ba, fill_value=0) + x1_sba = SparseBlockArray.from_ba(x1_ba) + x2_sba = SparseBlockArray.from_ba(x2_ba) y_sp = sparse.tensordot(x1_sp, x2_sp, axes=1) y_sba = x1_sba @ x2_sba - assert y_sba.fill_value == y_sp.fill_value # 0 assert y_sba.nnz == y_sp.nnz # y_ba = y_sba.to_ba() assert np.array_equal(np.tensordot(x1, x2, axes=1), y_ba.get()) @@ -171,17 +162,6 @@ def test_tensordot(app_inst: ArrayApplication): assert np.array_equal(np.tensordot(x1, x2, axes=1), y_ba.get()) -# def test_getitem(app_inst: ArrayApplication): -# x1 = np.array([[0, 0, 1, 1], [0, 0, 1, 1], [2, 2, 2, 2], [2, 2, 2, 2]]) -# x1_sp = sparse.COO.from_numpy(x1, fill_value=2) -# x1_ba = app_inst.array(x1, block_shape=(2, 2)) -# x1_sba = SparseBlockArray.from_ba(x1_ba, fill_value=2) -# y_sp = x1_sp[0:2, 1:3] -# y_sba = x1_sba[0:2, 1:3] -# assert y_sba.fill_value == y_sp.fill_value # 2 -# assert np.array_equal(y_sp.todense(), y_sba.to_ba().get()) - - def test_sdtp(app_inst: ArrayApplication): shape = 50, 50, 50 block_shape = 10, 10, 10 diff --git a/tests/experimental/optimizer/test_size.py b/tests/experimental/optimizer/test_size.py index a5606c6b..340ae163 100644 --- a/tests/experimental/optimizer/test_size.py +++ b/tests/experimental/optimizer/test_size.py @@ -11,7 +11,7 @@ def test_nbytes(): (10, 10), 10, np.int64, - 0, + False, ) assert x1_sp.nbytes == x1_ts.nbytes @@ -21,11 +21,11 @@ def test_uop(): (10, 10), 10, np.int64, - 1, + False, ) y_ts = x1_ts.uop("negative") assert np.allclose(y_ts.nnz, x1_ts.nnz) - assert y_ts.fill_value == -1 + assert not y_ts.is_dense def test_add(): @@ -33,17 +33,17 @@ def test_add(): (10, 10), 10, np.int64, - 1, + False, ) x2_ts = TreeNodeSize( (10, 10), 20, np.int64, - 2, + False, ) y_ts = x1_ts + x2_ts assert np.allclose(y_ts.nnz, int((1 - 0.9 * 0.8) * 100)) - assert y_ts.fill_value == 3 + assert not y_ts.is_dense def test_mul(): @@ -51,13 +51,13 @@ def test_mul(): (10, 10), 10, np.int64, - 0, + False, ) x2_ts = TreeNodeSize( (10, 10), 20, np.int64, - 0, + False, ) y_ts = x1_ts * x2_ts assert np.allclose(y_ts.nnz, int(0.1 * 0.2 * 100)) @@ -66,7 +66,7 @@ def test_mul(): (10, 1), 10, np.int64, - 1, + False, ) y_ts = x1_ts * x2_ts assert np.allclose(y_ts.nnz, int(0.1 * 100)) @@ -75,10 +75,11 @@ def test_mul(): (10, 1), 10, np.int64, - None, + True, ) y_ts = x1_ts * x2_ts assert np.allclose(y_ts.nnz, int(0.1 * 100)) + assert not y_ts.is_dense def test_tensordot(): @@ -86,13 +87,13 @@ def test_tensordot(): (10, 10), 10, np.int64, - 0, + False, ) x2_ts = TreeNodeSize( (10, 10), 20, np.int64, - 0, + False, ) y_ts = x1_ts.tensordot(x2_ts, axes=1) assert np.allclose(y_ts.nnz, int((1 - (1 - 0.1 * 0.2) ** 10) * 100)) From 865e16d966bf864a643c3f319638717ffb5cbad9 Mon Sep 17 00:00:00 2001 From: Daniel Zou Date: Sun, 4 Sep 2022 10:22:11 -0700 Subject: [PATCH 4/5] Integrate sparsity into GraphArray and fusion --- CONTRIBUTING_MAC.md | 17 ++ nums/core/array/base.py | 79 +----- nums/core/array/blockarray.py | 63 ++++- nums/core/array/random.py | 1 + nums/core/array/sparse.py | 208 +++++++++++++++- nums/core/array/utils.py | 40 +-- nums/core/array/view.py | 2 +- nums/core/kernel/kernel_interface.py | 10 +- nums/core/kernel/numpy_kernel.py | 36 ++- nums/experimental/optimizer/fusion.py | 8 +- nums/experimental/optimizer/fusion_utils.py | 18 ++ nums/experimental/optimizer/graph.py | 229 +++++++++++++----- nums/experimental/optimizer/grapharray.py | 41 +++- .../optimizer/{size.py => node_meta.py} | 151 ++++++++---- nums/experimental/optimizer/reduction_ops.py | 60 +++-- tests/core/array/test_sparse.py | 18 +- tests/experimental/optimizer/test_fusion.py | 163 ++++++++++--- .../optimizer/{test_size.py => test_meta.py} | 22 +- tests/experimental/optimizer/test_ops.py | 21 +- 19 files changed, 862 insertions(+), 325 deletions(-) create mode 100644 CONTRIBUTING_MAC.md rename nums/experimental/optimizer/{size.py => node_meta.py} (74%) rename tests/experimental/optimizer/{test_size.py => test_meta.py} (82%) diff --git a/CONTRIBUTING_MAC.md b/CONTRIBUTING_MAC.md new file mode 100644 index 00000000..1fcf6f38 --- /dev/null +++ b/CONTRIBUTING_MAC.md @@ -0,0 +1,17 @@ +# Contributing As a Mac User + +## Installation + +For M1 Macs (Apple Silicon), various [online sources](https://github.com/scipy/scipy/issues/13409) recommend using OpenBLAS as a native (not Rosetta) linear algebra backend for dependencies like NumPy. +```sh +brew install openblas +export OPENBLAS=$(/opt/homebrew/bin/brew --prefix openblas) +``` + +As [documented by Ray](https://docs.ray.io/en/latest/ray-overview/installation.html#m1-mac-apple-silicon-support), one must install miniforge, a community-driven Conda installer, for packages that support `arm64`. In particular, the GRPCIO package from pip will not work with Ray, so it must be uninstalled (`pip uninstall grpcio`) and replaced with the one from miniforge (`conda install grpcio`). + + +## Testing + +`tests/core/backend/test_backend_init.py::test_head_detection` +As [documented by Ray](https://github.com/ray-project/ray/issues/24130), there is different default behavior for fetching node IP addresses. On Linux, a private address (e.g. `192.168.0.1`) is returned, while on Windows and Mac, `127.0.0.1` is returned. As NumS is primarily released for Linux, this test will fail for Mac contributors, but it shouldn't affect development. diff --git a/nums/core/array/base.py b/nums/core/array/base.py index bec634fe..a09bbbb1 100644 --- a/nums/core/array/base.py +++ b/nums/core/array/base.py @@ -13,12 +13,7 @@ # limitations under the License. -# pylint: disable = protected-access -# pylint: disable=too-many-lines - import warnings -from numba.core.errors import NumbaNotImplementedError - import numpy as np from nums.core.array import utils as array_utils @@ -26,6 +21,10 @@ from nums.core.grid.grid import ArrayGrid +# pylint: disable=protected-access, redefined-builtin +# pylint: disable=too-many-lines + + class BlockBase: block_id_counter = -1 @@ -68,7 +67,7 @@ def __repr__(self): def size(self): return np.product(self.shape) - def copy(self): + def copy(self, shallow=True): raise NotImplementedError() def get(self): @@ -209,7 +208,7 @@ def _check_bop_implemented(self, other): def binary_op(op_name, a, b, args: dict, device=None): raise NotImplementedError() - def bop(self, op_name, other, args: dict, device=None, **kwargs): + def bop(self, op_name, other, args: dict, device=None): raise NotImplementedError() def tensordot(self, other, axes): @@ -253,7 +252,7 @@ def __truediv__(self, other): def __rtruediv__(self, other): if not self._check_bop_implemented(other): return NotImplemented - return self.binary_op("trudiv", other, self, args={}) + return self.binary_op("truediv", other, self, args={}) def __pow__(self, other): if not self._check_bop_implemented(other): @@ -311,18 +310,6 @@ def __ne__(self, other): class Block(BlockBase): # pylint: disable=redefined-builtin, global-statement - def __init__( - self, - grid_entry, - grid_shape, - shape, - dtype, - transposed, - km: KernelManager, - id=None, - ): - super().__init__(grid_entry, grid_shape, shape, dtype, transposed, km, id) - @property def is_dense(self): return True @@ -592,53 +579,13 @@ def broadcast_to(self, shape): result.blocks = broadcast return result + def reduce_axis(self, op_name, axis, keepdims=False): + raise NotImplementedError() + def tree_reduce( - self, op_name, blocks_or_oids, result_grid_entry, result_grid_shape + self, op_name, blocks_or_oids, result_grid_entry, result_grid_shape, *args ): - """ - Basic tree reduce imp. - Schedules op on same node as left operand. - :param op_name: The reduction op. - :param blocks_or_oids: A list of type Block or a list of tuples. - Tuples must be of the form - (oid, grid_entry, grid_shape, transposed) - :param result_grid_entry: The grid entry of the result block. This will be used - to compute the final reduction step. - :param result_grid_shape: The grid entry of the result block. This will be used - to compute the final reduction step. - :return: The oid of the result. - """ - oid_list = blocks_or_oids - if isinstance(blocks_or_oids[0], Block): - oid_list = [ - (b.oid, b.grid_entry, b.grid_shape, b.transposed) - for b in blocks_or_oids - ] - if len(oid_list) == 1: - return oid_list[0][0] - q = oid_list - while len(q) > 1: - a_oid, a_ge, a_gs, a_T = q.pop(0) - b_oid, _, _, b_T = q.pop(0) - ge, gs = ( - (result_grid_entry, result_grid_shape) if len(q) == 0 else (a_ge, a_gs) - ) - c_oid = self.km.bop_reduce( - op_name, - a_oid, - b_oid, - a_T, - b_T, - syskwargs={ - "grid_entry": ge, - "grid_shape": gs, - }, - ) - q.append((c_oid, ge, gs, False)) - r_oid, r_ge, r_gs, _ = q.pop(0) - assert r_ge == result_grid_entry - assert r_gs == result_grid_shape - return r_oid + raise NotImplementedError() def check_or_convert_other(self, other, compute_block_shape=False): raise NotImplementedError() @@ -870,7 +817,7 @@ def __rmatmul__(self, other): # Inequalities ################# - def __inequality__(self, op, other): + def __inequality__(self, op_name, other): raise NotImplementedError() def __ge__(self, other): diff --git a/nums/core/array/blockarray.py b/nums/core/array/blockarray.py index db0eb20d..d6dabd6a 100644 --- a/nums/core/array/blockarray.py +++ b/nums/core/array/blockarray.py @@ -19,7 +19,7 @@ import numpy as np from nums.core.array import utils as array_utils -from nums.core.array.base import Block, BlockArrayBase +from nums.core.array.base import BlockBase, Block, BlockArrayBase from nums.core.array.view import ArrayView from nums.core.grid.grid import ArrayGrid from nums.core.kernel.kernel_manager import KernelManager @@ -298,10 +298,6 @@ def __getitem__(self, item): # Treat this as a shuffle. return self._advanced_single_array_select(ss, axis=axis) - # This is to deal with circular imports. Little overhead since this happens once per call. - # However, would be better to rearrange modules in the future. - from nums.core.array.view import ArrayView - av: ArrayView = ArrayView.from_block_array(self) # TODO (hme): We don't have to create, but do so for now until we need to optimize. return av[ss].create() @@ -410,10 +406,6 @@ def __setitem__(self, key, value): if is_handled_advanced: return self._advanced_single_array_assign(ss, value, axis) - # This is to deal with circular imports. Little overhead since this happens once per call. - # However, would be better to rearrange modules in the future. - from nums.core.array.view import ArrayView - av: ArrayView = ArrayView.from_block_array(self) av[key] = value @@ -714,6 +706,54 @@ def reduce_axis(self, op_name, axis, keepdims=False): ) return result + def tree_reduce( + self, op_name, blocks_or_oids, result_grid_entry, result_grid_shape, *args + ): + """ + Basic tree reduce imp. + Schedules op on same node as left operand. + :param op_name: The reduction op. + :param blocks_or_oids: A list of type Block or a list of tuples. + Tuples must be of the form + (oid, grid_entry, grid_shape, transposed) + :param result_grid_entry: The grid entry of the result block. This will be used + to compute the final reduction step. + :param result_grid_shape: The grid entry of the result block. This will be used + to compute the final reduction step. + :return: The oid of the result. + """ + oid_list = blocks_or_oids + if isinstance(blocks_or_oids[0], Block): + oid_list = [ + (b.oid, b.grid_entry, b.grid_shape, b.transposed) + for b in blocks_or_oids + ] + if len(oid_list) == 1: + return oid_list[0][0] + q = oid_list + while len(q) > 1: + a_oid, a_ge, a_gs, a_T = q.pop(0) + b_oid, _, _, b_T = q.pop(0) + ge, gs = ( + (result_grid_entry, result_grid_shape) if len(q) == 0 else (a_ge, a_gs) + ) + c_oid = self.km.bop_reduce( + op_name, + a_oid, + b_oid, + a_T, + b_T, + syskwargs={ + "grid_entry": ge, + "grid_shape": gs, + }, + ) + q.append((c_oid, ge, gs, False)) + r_oid, r_ge, r_gs, _ = q.pop(0) + assert r_ge == result_grid_entry + assert r_gs == result_grid_shape + return r_oid + ################# # Arithmetic ################# @@ -827,6 +867,7 @@ def tensordot(a, b, axes=2): for k in sum_dims: a_block: Block = a.blocks[tuple(i + k)] b_block: Block = b.blocks[tuple(k + j)] + # pylint: disable=protected-access dot_grid_args = a._compute_tensordot_syskwargs(a_block, b_block) dotted_oid = a.km.bop( "tensordot", @@ -852,7 +893,7 @@ def tensordot(a, b, axes=2): # Inequalities ################# - def __inequality__(self, op, other): + def __inequality__(self, op_name, other): other = self.check_or_convert_other(other) if other is NotImplemented: return NotImplemented @@ -872,7 +913,7 @@ def __inequality__(self, op, other): else: other_block: Block = other.blocks[grid_entry] result.blocks[grid_entry] = self.blocks[grid_entry].bop( - op, other_block, args={} + op_name, other_block, args={} ) return result diff --git a/nums/core/array/random.py b/nums/core/array/random.py index 45196888..5e417270 100644 --- a/nums/core/array/random.py +++ b/nums/core/array/random.py @@ -311,5 +311,6 @@ def _sparse_sample_basic( p, syskwargs=syskwargs, ) + # pylint: disable=protected-access block._nnz = sba.km.sparse_nnz(block.oid, syskwargs=syskwargs) return sba diff --git a/nums/core/array/sparse.py b/nums/core/array/sparse.py index f7dc2e2d..37939b68 100644 --- a/nums/core/array/sparse.py +++ b/nums/core/array/sparse.py @@ -1,13 +1,17 @@ from typing import List +import itertools +import warnings +import numpy as np +import sparse + from nums.core.array import utils as array_utils from nums.core.array.base import BlockBase, Block, BlockArrayBase from nums.core.array.blockarray import BlockArray from nums.core.kernel.kernel_manager import KernelManager from nums.core.grid.grid import ArrayGrid -import numpy as np -import itertools -import warnings -import sparse + + +# pylint: disable=protected-access, redefined-builtin class SparseBlock(BlockBase): @@ -77,7 +81,18 @@ def copy(self, shallow=True): return block def map_uop(self, op_name, args=None, kwargs=None, device=None): - block = self.copy() + densify = array_utils.get_sparse_uop_densify(op_name) + if densify: + block = Block( + self.grid_entry, + self.grid_shape, + self.shape, + self.dtype, + self.transposed, + self.km, + ) + else: + block = self.copy() block.dtype = array_utils.get_uop_output_type(op_name, self.dtype) args = () if args is None else args kwargs = {} if kwargs is None else kwargs @@ -87,7 +102,7 @@ def map_uop(self, op_name, args=None, kwargs=None, device=None): syskwargs = {"device": device} block._device = device block.oid = self.km.sparse_map_uop( - op_name, self.oid, args, kwargs, syskwargs=syskwargs + op_name, self.oid, args, kwargs, densify, syskwargs=syskwargs ) block._nnz = self.km.sparse_nnz(block.oid, syskwargs=syskwargs) block._nbytes = self.km.sparse_nbytes(block.oid, syskwargs=syskwargs) @@ -312,8 +327,12 @@ def from_sparse(cls, arr, block_shape, copy, km, fill_value=0): @classmethod def from_scalar(cls, val, km): - if not array_utils.is_scalar(val): - raise ValueError("%s is not a scalar." % val) + # Only create SparseBlockArray with 0s. Other scalars should use dense BlockArray. + if not np.isclose(val, 0): + warnings.warn( + "%s cannot fill SparseBlockArray. Converting to BlockArray." % val + ) + return BlockArray.from_np(np.array(val), (), copy=False, km=km) return SparseBlockArray.from_np(np.array(val), (), copy=False, km=km) @classmethod @@ -422,13 +441,178 @@ def _check_bop_implemented(self, other): return False def ufunc(self, op_name): - result = self.copy() + densify = array_utils.get_sparse_uop_densify(op_name) + if densify: + result = BlockArray(self.grid, self.km) + else: + result = self.copy() for grid_entry in self.grid.get_entry_iterator(): result.blocks[grid_entry] = self.blocks[grid_entry].ufunc(op_name) result._nnz = -1 result._nbytes = -1 return result + def reduce_axis(self, op_name, axis, keepdims=False): + if not (axis is None or isinstance(axis, (int, np.int32, np.int64))): + raise NotImplementedError("Only integer axis is currently supported.") + if 0 in self.shape: + return SparseBlockArray.create("zeros", (), (), float, self.km) + block_reduced_oids = np.empty_like(self.blocks, dtype=tuple) + for grid_entry in self.grid.get_entry_iterator(): + block = self.blocks[grid_entry] + block_oid = self.km.sparse_reduce_axis( + op_name=op_name, + arr=block.oid, + axis=axis, + keepdims=keepdims, + transposed=block.transposed, + syskwargs={ + "grid_entry": block.grid_entry, + "grid_shape": block.grid_shape, + }, + ) + block_reduced_oids[grid_entry] = ( + block_oid, + block.grid_entry, + block.grid_shape, + False, + ) + result_shape = [] + result_block_shape = [] + for curr_axis in range(len(self.shape)): + axis_size, axis_block_size = ( + self.shape[curr_axis], + self.block_shape[curr_axis], + ) + if curr_axis == axis or axis is None: + if keepdims: + axis_size, axis_block_size = 1, 1 + else: + continue + result_shape.append(axis_size) + result_block_shape.append(axis_block_size) + result_shape = tuple(result_shape) + result_block_shape = tuple(result_block_shape) + result_dtype = array_utils.get_reduce_output_type(op_name, self.dtype) + result_grid = ArrayGrid( + shape=result_shape, + block_shape=result_block_shape, + dtype=result_dtype.__name__, + ) + result = BlockArray(result_grid, self.km) + + if axis is None: + if result.shape == (): + result_block: Block = result.blocks[()] + else: + result_block: Block = result.blocks[:].item() + result_block.oid = self.tree_reduce( + op_name, + block_reduced_oids.flatten().tolist(), + result_block.grid_entry, + result_block.grid_shape, + False, + ) + else: + for result_grid_entry in result_grid.get_entry_iterator(): + block_reduced_oids_axis = [] + for sum_dim in range(self.grid.grid_shape[axis]): + grid_entry = list(result_grid_entry) + if keepdims: + grid_entry[axis] = sum_dim + else: + grid_entry = grid_entry[:axis] + [sum_dim] + grid_entry[axis:] + grid_entry = tuple(grid_entry) + block_reduced_oids_axis.append(block_reduced_oids[grid_entry]) + result_block: Block = result.blocks[result_grid_entry] + result_block.oid = self.tree_reduce( + op_name, + block_reduced_oids_axis, + result_block.grid_entry, + result_block.grid_shape, + False, + ) + return result + + # pylint: disable=arguments-differ + def tree_reduce( + self, + op_name, + blocks_or_oids, + result_grid_entry, + result_grid_shape, + densify, + *args, + ): + """ + Basic tree reduce imp. + Schedules op on same node as left operand. + :param op_name: The reduction op. + :param blocks_or_oids: A list of type Block or a list of tuples. + Tuples must be of the form + (oid, grid_entry, grid_shape, transposed) + :param result_grid_entry: The grid entry of the result block. This will be used + to compute the final reduction step. + :param result_grid_shape: The grid entry of the result block. This will be used + to compute the final reduction step. + :return: The oid of the result. + """ + oid_list = blocks_or_oids + if isinstance(blocks_or_oids[0], Block): + oid_list = [ + (b.oid, b.grid_entry, b.grid_shape, b.transposed) + for b in blocks_or_oids + ] + if len(oid_list) == 1: + return oid_list[0][0] + q = oid_list + if densify: + while len(q) > 1: + a_oid, a_ge, a_gs, a_T = q.pop(0) + b_oid, _, _, b_T = q.pop(0) + ge, gs = ( + (result_grid_entry, result_grid_shape) + if len(q) == 0 + else (a_ge, a_gs) + ) + c_oid = self.km.bop_reduce( + op_name, + a_oid, + b_oid, + a_T, + b_T, + syskwargs={ + "grid_entry": ge, + "grid_shape": gs, + }, + ) + q.append((c_oid, ge, gs, False)) + else: + while len(q) > 1: + a_oid, a_ge, a_gs, a_T = q.pop(0) + b_oid, _, _, b_T = q.pop(0) + ge, gs = ( + (result_grid_entry, result_grid_shape) + if len(q) == 0 + else (a_ge, a_gs) + ) + c_oid = self.km.sparse_bop_reduce( + op_name, + a_oid, + b_oid, + a_T, + b_T, + syskwargs={ + "grid_entry": ge, + "grid_shape": gs, + }, + ) + q.append((c_oid, ge, gs, False)) + r_oid, r_ge, r_gs, _ = q.pop(0) + assert r_ge == result_grid_entry + assert r_gs == result_grid_shape + return r_oid + ################# # Arithmetic ################# @@ -583,7 +767,11 @@ def tensordot(a, b, axes=2): (dotted_oid, dot_grid_args[0], dot_grid_args[1], False) ) result_block.oid = a.tree_reduce( - "sum", sum_oids, result_block.grid_entry, result_block.grid_shape + "sum", + sum_oids, + result_block.grid_entry, + result_block.grid_shape, + densify, ) if not densify: syskwargs = { diff --git a/nums/core/array/utils.py b/nums/core/array/utils.py index ed01190e..356b78f7 100644 --- a/nums/core/array/utils.py +++ b/nums/core/array/utils.py @@ -18,7 +18,6 @@ import numpy as np import scipy.special -import sparse from nums.core.settings import np_ufunc_map from nums.core.array.errors import AxisError @@ -453,24 +452,25 @@ def normalize_axis_index(axis, ndim): return axis % ndim -def get_sparse_bop_densify(op_name, a_dense, b_dense): - def sample_array(is_dense): - s = np.eye(2) - if not is_dense: - return sparse.COO.from_numpy(s, fill_value=0) - return s - - sa = sample_array(a_dense) - sb = sample_array(b_dense) - if op_name == "tensordot": - result = sparse.tensordot(sa, sb) - else: - op_name = np_ufunc_map.get(op_name, op_name) - try: - ufunc = np.__getattribute__(op_name) - except Exception as _: - ufunc = scipy.special.__getattribute__(op_name) - result = sparse.elemwise(ufunc, sa, sb) - if isinstance(result, sparse.SparseArray): +def get_sparse_uop_densify(op_name): + ufunc = np.__getattribute__(op_name) + if ufunc(0) == 0: return False return True + + +sd_bop_sparse_ops = [ + "mul", +] + + +def get_sparse_bop_densify(op_name, a_dense: bool, b_dense: bool): + if a_dense and b_dense: + return True + + if a_dense or b_dense: + if op_name in sd_bop_sparse_ops: + return False + return True + + return False diff --git a/nums/core/array/view.py b/nums/core/array/view.py index 88e79267..9fa40b7e 100644 --- a/nums/core/array/view.py +++ b/nums/core/array/view.py @@ -13,7 +13,7 @@ # limitations under the License. -from typing import Tuple, Type +from typing import Tuple import numpy as np diff --git a/nums/core/kernel/kernel_interface.py b/nums/core/kernel/kernel_interface.py index f1f16218..0314011c 100644 --- a/nums/core/kernel/kernel_interface.py +++ b/nums/core/kernel/kernel_interface.py @@ -194,12 +194,20 @@ def sparse_random_block( ): raise NotImplementedError() - def sparse_map_uop(self, op_name, arr, args, kwargs, syskwargs: Dict): + def sparse_map_uop(self, op_name, arr, args, kwargs, densify, syskwargs: Dict): raise NotImplementedError() def sparse_bop(self, op, a1, a2, a1_T, a2_T, axes, densify, syskwargs: Dict): raise NotImplementedError() + def sparse_reduce_axis( + self, op_name, arr, axis, keepdims, transposed, syskwargs: Dict + ): + raise NotImplementedError() + + def sparse_bop_reduce(self, op, a1, a2, a1_T, a2_T, syskwargs: Dict): + raise NotImplementedError() + def sparse_block_from_scalar(self, x, syskwargs: Dict): raise NotImplementedError() diff --git a/nums/core/kernel/numpy_kernel.py b/nums/core/kernel/numpy_kernel.py index 20523e0a..1789622f 100644 --- a/nums/core/kernel/numpy_kernel.py +++ b/nums/core/kernel/numpy_kernel.py @@ -435,7 +435,6 @@ def bop(self, op, a1, a2, a1_T, a2_T, axes): ufunc = scipy.special.__getattribute__(op) return ufunc(a1, a2) - # Works for sparse too. def bop_reduce(self, op, a1, a2, a1_T, a2_T): if a1_T: a1 = a1.T @@ -572,7 +571,7 @@ def sparse_random_block( ) return result.astype(dtype) - def sparse_map_uop(self, op_name, arr, args, kwargs): + def sparse_map_uop(self, op_name, arr, args, kwargs, densify): """ Args: func: types.Callable @@ -583,6 +582,10 @@ def sparse_map_uop(self, op_name, arr, args, kwargs): args = list(args) args.insert(0, arr) result = sparse.elemwise(ufunc, *args, **kwargs) + if densify and isinstance(result, sparse.SparseArray): + result = result.todense() + elif not densify: + assert isinstance(result, sparse.SparseArray) return result def sparse_bop(self, op, a1, a2, a1_T, a2_T, axes, densify): @@ -611,6 +614,35 @@ def sparse_bop(self, op, a1, a2, a1_T, a2_T, axes, densify): assert isinstance(result, sparse.SparseArray) return result + def sparse_reduce_axis(self, op_name, arr, axis, keepdims, transposed): + assert isinstance(arr, sparse.COO) + op_func = np.__getattribute__(op_name) + if transposed: + arr = arr.T + return arr.reduce(op_func, axis=axis, keepdims=keepdims) + + def sparse_bop_reduce(self, op, a1, a2, a1_T, a2_T): + assert isinstance(a1, sparse.COO) and isinstance(a2, sparse.COO) + if a1_T: + a1 = a1.T + if a2_T: + a2 = a2.T + + # These are faster. + if op == "sum": + r = a1 + a2 + elif op == "prod": + r = a1 * a2 + else: + a = sparse.stack([a1, a2], axis=0) + r = a.reduce(np.__getattribute__(op), axis=0, keepdims=False) + + if a1 is np.nan or a2 is np.nan or r is np.nan: + assert np.isscalar(a1) and np.isscalar(a2) and np.isscalar(r) + else: + assert a1.shape == a2.shape == r.shape + return r + def sparse_block_from_scalar(self, x): assert np.isscalar(x) return sparse.COO.from_numpy(np.array(x), fill_value=0) diff --git a/nums/experimental/optimizer/fusion.py b/nums/experimental/optimizer/fusion.py index 1729f4d7..40373351 100644 --- a/nums/experimental/optimizer/fusion.py +++ b/nums/experimental/optimizer/fusion.py @@ -48,7 +48,7 @@ def traverse(self, node: TreeNode, fuseable_nodes): ) if child_leafs is None: # This branch has been pruned. - return None, None, None + return None, None, 0 if len(child_block_id_set) <= self.max_args: # If it's a frontier node, then there's no point in fusing it. # If it's a leaf, we still want to count toward node_leafs, @@ -102,11 +102,15 @@ def update_graph(self, root, fuseable_nodes): def fuse_node(self, node: TreeNode): result = FunctionNode(node.cluster_state) - result.op_func, result.children = node.fuse(result, self.km) + result.op_func, result.children, result.tree_node_meta = node.fuse( + result, self.km + ) result.set_shape(node.shape()) result.set_grid_entry(node.grid_entry()) result.set_grid_shape(node.grid_shape()) result.set_dtype(node.dtype()) + assert result.tree_node_meta.shape == result.shape() + assert result.tree_node_meta.dtype == result.dtype() expression_str = node.expression() result.set_expression(expression_str) diff --git a/nums/experimental/optimizer/fusion_utils.py b/nums/experimental/optimizer/fusion_utils.py index cc4e8134..c47aedd1 100644 --- a/nums/experimental/optimizer/fusion_utils.py +++ b/nums/experimental/optimizer/fusion_utils.py @@ -40,3 +40,21 @@ def set_using_marker(node, inputs): node.update_child([child], [inputs[child.marker]]) inputs[child.marker].parent = node return node + + +def print_graph(root: TreeNode): + """ + Recursively print string representations of tree nodes using BFS. + """ + print("print_graph") + nodes = [root] + while len(nodes) > 0: + node = nodes.pop(0) + if hasattr(node, "child"): + nodes.append(node.child) + if hasattr(node, "left"): + nodes.append(node.left) + nodes.append(node.right) + if hasattr(node, "children"): + nodes.extend(node.children) + print(node) diff --git a/nums/experimental/optimizer/graph.py b/nums/experimental/optimizer/graph.py index 2440e622..2ffc7a51 100644 --- a/nums/experimental/optimizer/graph.py +++ b/nums/experimental/optimizer/graph.py @@ -22,10 +22,11 @@ from nums.core.settings import sync_nnz from nums.core.array import utils as array_utils from nums.core.array.base import BlockBase, Block +from nums.core.array.sparse import SparseBlock from nums.core.grid.grid import Device from nums.core.kernel.kernel_manager import KernelManager from nums.experimental.optimizer.clusterstate import ClusterState -from nums.experimental.optimizer.size import TreeNodeSize +from nums.experimental.optimizer.node_meta import TreeNodeMeta, LeafMeta def subsample(total_items, max_items, rs: np.random.RandomState): @@ -52,6 +53,13 @@ def __init__(self, cluster_state: ClusterState, tree_node_id=None): self._dtype = None self._expression = None + # Type LeafMeta for leaves: shape, nnz, dtype, and whether output is_dense. + # Type TreeNodeMeta for other nodes: dtype and whether output is_dense. + self.tree_node_meta: TreeNodeMeta = None + # Whether this operation invokes dense or sparse kernel. + # Need to explicitly set is_dense property after initialization. + self.dense_kernel = True + def get_root(self): if self.parent is None: return self @@ -123,6 +131,12 @@ def make_bop(self, op_name, other, args=None): assert self.parent is None and other.parent is None bop.left, bop.right = self, other bop.left.parent, bop.right.parent = bop, bop + bop.tree_node_meta = bop.left.tree_node_meta.bop_partial( + op_name, bop.right.tree_node_meta + ) + bop.dense_kernel = ( + bop.left.tree_node_meta.is_dense and bop.right.tree_node_meta.is_dense + ) return bop def tensordot(self, other, axes): @@ -154,7 +168,6 @@ def __init__(self, cluster_state: ClusterState, tree_node_id=None): super().__init__(cluster_state, tree_node_id) self.block = None self.marker = -1 - self.tree_node_size = None def get_children(self): return [] @@ -186,8 +199,8 @@ def copy(self, cluster_state, parent=None, new_ids=False): # This property is only used for fusion. leaf.marker = self.marker - # This property is for experimental output size estimation. - leaf.tree_node_size = self.tree_node_size + leaf.tree_node_meta = self.tree_node_meta + leaf.dense_kernel = self.dense_kernel return leaf def get_leafs(self): @@ -229,7 +242,7 @@ def expression(self): def fuse(self, func_node, km: KernelManager): f = km.get_fuseable("identity") - return f, [self.copy(self.cluster_state, func_node)] + return f, [self.copy(self.cluster_state, func_node)], self.tree_node_meta def is_scalar(self): return self.block.size() == 1 @@ -242,10 +255,11 @@ def __init__(self, cluster_state: ClusterState, tree_node_id=None): self.op_name = None def __repr__(self): - return "UnaryOp(name=%s, id=%s, child=%s)" % ( + return "UnaryOp(name=%s, id=%s, child=%s, dense_kernel=%s)" % ( self.op_name, str(self.tree_node_id), str(self.child.tree_node_id), + self.dense_kernel, ) def get_children(self): @@ -264,6 +278,9 @@ def copy(self, cluster_state, parent=None, new_ids=False): uop.child = self.child.copy(cluster_state, parent=uop, new_ids=new_ids) uop.op_name = self.op_name uop.copy_on_op = self.copy_on_op + + uop.tree_node_meta = self.tree_node_meta + uop.dense_kernel = self.dense_kernel return uop def update_child(self, old_children, new_children): @@ -317,7 +334,7 @@ def execute_on(self, device: Device, leaf_ids=None) -> Leaf: self.cluster_state.commit_uop(self._mem_cost(), self.child.block.id, device) # self.cluster_state.add_block(new_block.id, new_block.size(), [device]) self.cluster_state.add_block( - new_block.id, new_leaf.tree_node_size.nbytes, [device] + new_block.id, new_leaf.tree_node_meta.nbytes, [device] ) if not self.cluster_state.created_on_only: assert self.cluster_state.blocks_local( @@ -338,7 +355,8 @@ def _collapse(self, device: Device): block: BlockBase = block.ufunc(op_name, device=device) leaf: Leaf = Leaf(self.cluster_state) leaf.block = block - leaf.tree_node_size = self.child.tree_node_size.uop(op_name) + assert isinstance(self.child.tree_node_meta, LeafMeta) + leaf.tree_node_meta = self.child.tree_node_meta.uop(op_name) leaf.copy_on_op = self.copy_on_op return leaf, block @@ -348,8 +366,9 @@ def _mem_cost(self): if block.is_dense: return np.product(block.shape) if sync_nnz > 1: - self.child.tree_node_size.nnz = block.nnz # Blocking fetch - return self.child.tree_node_size.uop(self.op_name).nbytes + self.child.tree_node_meta.nnz = block.nnz # Blocking fetch + assert isinstance(self.child.tree_node_meta, LeafMeta) + return self.child.tree_node_meta.uop(self.op_name).nbytes def shape(self): if self._shape is None: @@ -387,24 +406,35 @@ def dtype(self): def expression(self): if self._expression is None: - self._expression = "UnaryOp(op=%s, x=%s)" % ( + self._expression = "UnaryOp(op=%s, dense_kernel=%s, x=%s)" % ( self.op_name, + self.dense_kernel, self.child.expression(), ) return self._expression def fuse(self, func_node, km: KernelManager): - child_op, child_args = self.child.fuse(func_node, km) + child_op, child_args, child_meta = self.child.fuse(func_node, km) if self.op_name == "transpose": self_op = km.get_fuseable("transpose") else: - self_op = km.get_fuseable("map_uop") - self_op = partial(self_op, op_name=self.op_name, args=(), kwargs={}) + if self.dense_kernel: + self_op = km.get_fuseable("map_uop") + self_op = partial(self_op, op_name=self.op_name, args=(), kwargs={}) + else: + self_op = km.get_fuseable("sparse_map_uop") + self_op = partial( + self_op, + op_name=self.op_name, + args=(), + kwargs={}, + densify=self.tree_node_meta.is_dense, + ) def fused(*args): return self_op(arr=child_op(*args)) - return fused, child_args + return fused, child_args, child_meta.uop(self.op_name) class ReduceAxis(UnaryOp): @@ -430,6 +460,9 @@ def copy(self, cluster_state, parent=None, new_ids=False): ra.axis = self.axis ra.keepdims = self.keepdims ra.copy_on_op = self.copy_on_op + + ra.tree_node_meta = self.tree_node_meta + ra.dense_kernel = self.dense_kernel return ra def _collapse(self, device: Device): @@ -445,22 +478,32 @@ def _collapse(self, device: Device): transposed=False, km=child_block.km, ) - block.oid = child_block.km.reduce_axis( - op_name=op_name, - arr=child_block.oid, - axis=self.axis, - keepdims=self.keepdims, - transposed=child_block.transposed, - syskwargs={"device": device}, - ) + if self.dense_kernel: + block.oid = child_block.km.reduce_axis( + op_name=op_name, + arr=child_block.oid, + axis=self.axis, + keepdims=self.keepdims, + transposed=child_block.transposed, + syskwargs={"device": device}, + ) + else: + block.oid = child_block.km.sparse_reduce_axis( + op_name=op_name, + arr=child_block.oid, + axis=self.axis, + keepdims=self.keepdims, + transposed=child_block.transposed, + syskwargs={"device": device}, + ) block._device = device leaf: Leaf = Leaf(self.cluster_state) leaf.block = block - leaf.tree_node_size = self.child.tree_node_size.reduce_axis( + assert isinstance(self.child.tree_node_meta, LeafMeta) + leaf.tree_node_meta = self.child.tree_node_meta.reduce_axis( self.op_name, self.axis, self.keepdims, - self.child.block.transposed, ) leaf.copy_on_op = self.copy_on_op return leaf, block @@ -471,12 +514,12 @@ def _mem_cost(self): if block.is_dense: return np.product(self.shape()) if sync_nnz > 1: - self.child.tree_node_size.nnz = block.nnz # Blocking fetch - return self.child.tree_node_size.reduce_axis( + self.child.tree_node_meta.nnz = block.nnz # Blocking fetch + assert isinstance(self.child.tree_node_meta, LeafMeta) + return self.child.tree_node_meta.reduce_axis( self.op_name, self.axis, self.keepdims, - block.transposed, ).nbytes def update_tuple_property(self, val, keep_dim_val: Union[int, tuple] = 1): @@ -519,18 +562,25 @@ def dtype(self): def expression(self): if self._expression is None: - self._expression = "ReduceAxis(op=%s, x=%s, axis=%s, keepdims=%s)" % ( - self.op_name, - self.child.expression(), - str(self.axis), - str(self.keepdims), + self._expression = ( + "ReduceAxis(op=%s, axis=%s, keepdims=%s, dense_kernel=%s, x=%s)" + % ( + self.op_name, + str(self.axis), + str(self.keepdims), + self.dense_kernel, + self.child.expression(), + ) ) return self._expression def fuse(self, func_node, km: KernelManager): - child_op, child_args = self.child.fuse(func_node, km) + child_op, child_args, child_meta = self.child.fuse(func_node, km) - self_op = km.get_fuseable("reduce_axis") + if self.dense_kernel: + self_op = km.get_fuseable("reduce_axis") + else: + self_op = km.get_fuseable("sparse_reduce_axis") kwargs = { "op_name": self.op_name, "axis": self.axis, @@ -543,7 +593,11 @@ def fuse(self, func_node, km: KernelManager): def fused(*args): return self_op(arr=child_op(*args)) - return fused, child_args + return ( + fused, + child_args, + child_meta.reduce_axis(self.op_name, self.axis, self.keepdims), + ) class BinaryOp(TreeNode): @@ -563,11 +617,12 @@ def __repr__(self): "matmul": "@", "tensordot": "@", }[self.op_name] - return "BOp(id=%s, op=%s%s%s)" % ( + return "BOp(id=%s, op=%s%s%s, dense_kernel=%s)" % ( self.tree_node_id, str(self.left.tree_node_id), bop_symbol, str(self.right.tree_node_id), + self.dense_kernel, ) def get_children(self): @@ -591,6 +646,9 @@ def copy(self, cluster_state, parent=None, new_ids=False): bop.left = self.left.copy(cluster_state, bop, new_ids=new_ids) bop.right = self.right.copy(cluster_state, bop, new_ids=new_ids) bop.copy_on_op = self.copy_on_op + + bop.tree_node_meta = self.tree_node_meta + bop.dense_kernel = self.dense_kernel return bop def update_child(self, old_children, new_children): @@ -672,7 +730,7 @@ def execute_on(self, device: Device, leaf_ids=None) -> Leaf: # Update cluster state with new block. # self.cluster_state.add_block(new_block.id, new_block.size(), [device]) self.cluster_state.add_block( - new_block.id, new_leaf.tree_node_size.nbytes, [device] + new_block.id, new_leaf.tree_node_meta.nbytes, [device] ) if not self.cluster_state.created_on_only: assert self.cluster_state.blocks_local( @@ -702,9 +760,11 @@ def _collapse(self, device: Device): block: BlockBase = lblock.bop(op_name, rblock, args=args, device=device) leaf: Leaf = Leaf(self.cluster_state) leaf.block = block - leaf.tree_node_size = self.left.tree_node_size.bop( + assert isinstance(self.left.tree_node_meta, LeafMeta) + assert isinstance(self.right.tree_node_meta, LeafMeta) + leaf.tree_node_meta = self.left.tree_node_meta.bop( op_name, - self.right.tree_node_size, + self.right.tree_node_meta, **args, ) leaf.copy_on_op = self.copy_on_op @@ -726,11 +786,13 @@ def _mem_cost(self): if lblock.is_dense and rblock.is_dense: return np.product(self.shape()) if sync_nnz > 1: - self.left.tree_node_size.nnz = lblock.nnz # Blocking fetch - self.right.tree_node_size.nnz = rblock.nnz # Blocking fetch - return self.left.tree_node_size.bop( + self.left.tree_node_meta.nnz = lblock.nnz # Blocking fetch + self.right.tree_node_meta.nnz = rblock.nnz # Blocking fetch + assert isinstance(self.left.tree_node_meta, LeafMeta) + assert isinstance(self.right.tree_node_meta, LeafMeta) + return self.left.tree_node_meta.bop( op_name, - self.right.tree_node_size, + self.right.tree_node_meta, **args, ).nbytes @@ -801,27 +863,44 @@ def expression(self): if self._expression is None: if self.op_name == "matmul" or self.op_name == "tensordot": axes = self.args.get("axes", 1) - self._expression = "BinaryOp(op=%s, x=%s, y=%s, axes=%s)" % ( - self.op_name, - self.left.expression(), - self.right.expression(), - axes, + self._expression = ( + "BinaryOp(op=%s, axes=%s, dense_kernel=%s, x=%s, y=%s)" + % ( + self.op_name, + axes, + self.dense_kernel, + self.left.expression(), + self.right.expression(), + ) ) - self._expression = "BinaryOp(op=%s, x=%s, y=%s)" % ( + self._expression = "BinaryOp(op=%s, dense_kernel=%s, x=%s, y=%s)" % ( self.op_name, + self.dense_kernel, self.left.expression(), self.right.expression(), ) return self._expression def fuse(self, func_node, km: KernelManager): - left_op, left_args = self.left.fuse(func_node, km) - right_op, right_args = self.right.fuse(func_node, km) - - self_op = km.get_fuseable("bop") + left_op, left_args, left_meta = self.left.fuse(func_node, km) + right_op, right_args, right_meta = self.right.fuse(func_node, km) axes = 1 if self.args is None else self.args.get("axes", 1) - self_op = partial(self_op, op=self.op_name, a1_T=False, a2_T=False, axes=axes) + if self.dense_kernel: + self_op = km.get_fuseable("bop") + self_op = partial( + self_op, op=self.op_name, a1_T=False, a2_T=False, axes=axes + ) + else: + self_op = km.get_fuseable("sparse_bop") + self_op = partial( + self_op, + op=self.op_name, + a1_T=False, + a2_T=False, + axes=axes, + densify=self.tree_node_meta.is_dense, + ) num_left = len(left_args) # Combine the left and right args. args = left_args + right_args @@ -836,7 +915,21 @@ def fused(*args): args2 = args[num_left:] return self_op(a1=left_op(*args1), a2=right_op(*args2)) - return fused, args + if self.op_name == "matmul": + op_name, extra_args = "tensordot", {"axes": 1} + elif self.op_name == "tensordot": + op_name, extra_args = "tensordot", self.args + else: + op_name, extra_args = self.op_name, {} + return ( + fused, + args, + left_meta.bop( + op_name, + right_meta, + **extra_args, + ), + ) class FunctionNode(TreeNode): @@ -867,7 +960,7 @@ def finalize(self, km: KernelManager): km.register(self.op_hash, self.op_func, {}) def __repr__(self): - return "Function(id=%s, op=%s, args=%s" % ( + return "Function(id=%s, op=%s, args=%s)" % ( self.tree_node_id, self.op_hash, len(self.children), @@ -895,11 +988,13 @@ def copy(self, cluster_state, parent=None, new_ids=False): fnode.parent = parent fnode.op_hash = self.op_hash fnode.op_func = self.op_func - fnode.op_expression = self.op_expression fnode.children = [ child.copy(cluster_state, fnode, new_ids=new_ids) for child in self.children ] fnode.copy_on_op = self.copy_on_op + + fnode.tree_node_meta = self.tree_node_meta + fnode.dense_kernel = self.dense_kernel return fnode def update_child(self, old_children, new_children): @@ -1007,7 +1102,11 @@ def _collapse(self, device: Device): block_oids.append(child.block.oid) if km is None: km = child.block.km - block: BlockBase = Block( + if self.tree_node_meta.is_dense: + block_type = Block + else: + block_type = SparseBlock + block: BlockBase = block_type( self._grid_entry, self._grid_shape, self._shape, self._dtype, False, km ) block._device = device @@ -1016,6 +1115,7 @@ def _collapse(self, device: Device): ) leaf: Leaf = Leaf(self.cluster_state) leaf.block = block + leaf.tree_node_meta = self.tree_node_meta leaf.copy_on_op = self.copy_on_op return leaf, block @@ -1063,7 +1163,7 @@ def __init__(self, cluster_state: ClusterState, tree_node_id=None): self.children: List[TreeNode] = None def __repr__(self): - return "Einsum(id=%s, subscript=%s, operands=%s" % ( + return "Einsum(id=%s, subscript=%s, operands=%s)" % ( self.tree_node_id, self.subscript, len(self.children), @@ -1184,7 +1284,7 @@ def execute_on(self, device: Device, leaf_ids=None) -> Leaf: # Update cluster state with new block. # self.cluster_state.add_block(new_block.id, new_block.size(), [device]) self.cluster_state.add_block( - new_block.id, new_leaf.tree_node_size.nbytes, [device] + new_block.id, new_leaf.tree_node_meta.nbytes, [device] ) if not self.cluster_state.created_on_only: for block_id in block_ids: @@ -1214,8 +1314,11 @@ def _collapse(self, device: Device): leaf: Leaf = Leaf(self.cluster_state) leaf.block = block # Assume dense for simplicity. - leaf.tree_node_size = TreeNodeSize( - self.shape(), np.prod(self.shape()), block.dtype, block.is_dense + leaf.tree_node_meta = LeafMeta( + self.shape(), + np.prod(self.shape()), + block.dtype, + block.is_dense, ) leaf.copy_on_op = self.copy_on_op return leaf, block diff --git a/nums/experimental/optimizer/grapharray.py b/nums/experimental/optimizer/grapharray.py index 4755a574..07de2b6f 100644 --- a/nums/experimental/optimizer/grapharray.py +++ b/nums/experimental/optimizer/grapharray.py @@ -35,12 +35,13 @@ Leaf, UnaryOp, ReduceAxis, + FunctionNode, Einsum, ) from nums.experimental.optimizer.reduction_ops import TreeReductionOp from nums.experimental.optimizer.fusion import FuseGraph from nums.experimental.optimizer.fusion_utils import set_using_marker, traverse_marker -from nums.experimental.optimizer.size import TreeNodeSize +from nums.experimental.optimizer.node_meta import LeafMeta class GraphArray(object): @@ -66,17 +67,18 @@ def graphs_from_ba( nnz = block.nnz # Blocking fetch else: nnz = np.prod(block.shape) - leaf.tree_node_size = TreeNodeSize( + leaf.tree_node_meta = LeafMeta( block.shape, nnz, block.dtype, block.is_dense, ) + leaf.dense_kernel = block.is_dense leaf.copy_on_op = copy_on_op graphs[grid_entry] = leaf cluster_state.add_block( - block.id, leaf.tree_node_size.nbytes, devices=[device] + block.id, leaf.tree_node_meta.nbytes, devices=[device] ) cluster_state.init_mem_load(device, block.id) return graphs @@ -224,6 +226,9 @@ def tensordot(a, b, axes=2): # tree structure here is never exposed. dot_node.parent = rop rop.add_child(dot_node) + rop.tree_node_meta = dot_node.tree_node_meta + # Assume children are all dense or all sparse. + rop.dense_kernel = dot_node.tree_node_meta.is_dense result_graphs[grid_entry] = rop return GraphArray( @@ -369,6 +374,8 @@ def _add_uop(self, op_name, grid_entry, old_arr, new_arr): assert old_root.parent is None uop.child = old_root old_root.parent = uop + uop.tree_node_meta = old_root.tree_node_meta.uop_partial(op_name) + uop.dense_kernel = old_root.tree_node_meta.is_dense new_arr[grid_entry] = uop def reduce_axis(self, op_name, axis, keepdims): @@ -388,6 +395,12 @@ def reduce_axis(self, op_name, axis, keepdims): reduced_tnode.op_name = op_name reduced_tnode.axis = axis reduced_tnode.keepdims = keepdims + reduced_tnode.tree_node_meta = tnode.tree_node_meta.reduce_axis_partial( + op_name, + axis, + keepdims, + ) + reduced_tnode.dense_kernel = tnode.tree_node_meta.is_dense reduced_graphs[grid_entry] = reduced_tnode # Compute output GraphArray properties. @@ -442,6 +455,8 @@ def reduce_axis(self, op_name, axis, keepdims): rop.add_child(child) assert child.parent is None child.parent = rop + rop.tree_node_meta = child.tree_node_meta + rop.dense_kernel = child.tree_node_meta.is_dense if result_grid.shape == (): # keepdims = False. result_graphs[()] = rop @@ -469,6 +484,8 @@ def reduce_axis(self, op_name, axis, keepdims): rop.add_child(child) assert child.parent is None child.parent = rop + rop.tree_node_meta = child.tree_node_meta + rop.dense_kernel = child.tree_node_meta.is_dense result_graphs[result_grid_entry] = rop return GraphArray( @@ -485,16 +502,22 @@ def sum(self, axis=None, keepdims=False): def compile(self, max_args: int): result_graphs = np.empty_like(self.graphs, dtype=self.graphs.dtype) counter = 0 + first_grid_entry = (0,) * len(self.grid.shape) for grid_entry in self.grid.get_entry_iterator(): graph = self.graphs[grid_entry] _, leaf_inputs = traverse_marker(graph, 0) - if grid_entry == (0,) or grid_entry == (0, 0): # generic - result_graphs[grid_entry] = FuseGraph( - graph, self.km, max_args=max_args - )() - fused_graph = result_graphs[grid_entry] - fused_graph.op_expression = fused_graph._expression + if grid_entry == first_grid_entry: + fused_graph = FuseGraph(graph, self.km, max_args=max_args)() + if not isinstance(fused_graph, FunctionNode): + # Stopgap as this function currently assumes root is FunctionNode. + return self + result_graphs[grid_entry] = fused_graph + # result_graphs[grid_entry] = FuseGraph( + # graph, self.km, max_args=max_args + # )() + # fused_graph = result_graphs[grid_entry] else: + # TODO: support subtree fusion in this section. FuseGraph already does. fused_graph_copy = fused_graph.copy(self.cluster_state, new_ids=True) fused_graph_copy = set_using_marker(fused_graph_copy, leaf_inputs) fused_graph_copy.set_grid_entry(grid_entry) diff --git a/nums/experimental/optimizer/size.py b/nums/experimental/optimizer/node_meta.py similarity index 74% rename from nums/experimental/optimizer/size.py rename to nums/experimental/optimizer/node_meta.py index 089d97d4..236fe580 100644 --- a/nums/experimental/optimizer/size.py +++ b/nums/experimental/optimizer/node_meta.py @@ -3,44 +3,65 @@ from nums.core.array import utils as array_utils -# TODO: integrate this class with FuseGraph. -class TreeNodeSize: +class TreeNodeMeta: """ - Encapsulated by each TreeNode to keep track of estimated or observed sizes of blocks. + Encapsulated by each TreeNode to keep track of node metadata like shape and output density. + """ + + def __init__(self, dtype, is_dense): + self.dtype = dtype + self.is_dense = is_dense + + def copy(self): + return TreeNodeMeta( + self.dtype, + self.is_dense, + ) + + def uop_partial(self, op_name): + """ + Does not update nnz, as it can't be estimated for non-leaf nodes. + """ + is_dense = self.is_dense or array_utils.get_sparse_uop_densify(op_name) + return TreeNodeMeta( + dtype=array_utils.get_uop_output_type(op_name, self.dtype), + is_dense=is_dense, + ) + + def reduce_axis_partial(self, op_name, axis, keepdims): + return TreeNodeMeta( + dtype=array_utils.get_uop_output_type(op_name, self.dtype), + is_dense=True, + ) + + def bop_partial(self, op_name, other, **kwargs): + dtype = array_utils.get_bop_output_type(op_name, self.dtype, other.dtype) + is_dense = array_utils.get_sparse_bop_densify( + op_name, + self.is_dense, + other.is_dense, + ) + return TreeNodeMeta(dtype, is_dense) + + +class LeafMeta(TreeNodeMeta): + """ + Encapsulated by each Leaf to keep track of sizes of blocks in addition to TreeNodeMeta. Sparse binary operations use B(n, p) to estimate nnz as needed. """ def __init__(self, shape, nnz, dtype, is_dense, index_dtype=np.int64): + super().__init__(dtype, is_dense) self.shape = shape self.nnz = nnz - self.dtype = dtype - self.is_dense = is_dense self.index_dtype = index_dtype if self.is_dense: assert nnz == np.prod(shape) else: assert index_dtype is not None - self.bop_estimation_map = { - "add": self.add, - "sum": self.add, - "sub": self.add, - "mul": self.mul, - "prod": self.mul, - "truediv": self.truediv, - "pow": self.pow, - "matmul": lambda other: self.tensordot(other, axes=1), - "tensordot": self.tensordot, - "lt": lambda other: self.inequality("lt", other), - "le": lambda other: self.inequality("le", other), - "gt": lambda other: self.inequality("gt", other), - "ge": lambda other: self.inequality("ge", other), - "eq": lambda other: self.inequality("eq", other), - "ne": lambda other: self.inequality("ne", other), - } - def copy(self): - return TreeNodeSize( + return LeafMeta( self.shape, self.nnz, self.dtype, @@ -61,36 +82,49 @@ def nbytes(self): def uop(self, op_name): if op_name == "transpose": - return TreeNodeSize( + return LeafMeta( shape=tuple(reversed(self.shape)), nnz=self.nnz, dtype=self.dtype, is_dense=self.is_dense, ) - return TreeNodeSize( + nnz = self.nnz + is_dense = self.is_dense + if not is_dense and array_utils.get_sparse_uop_densify(op_name): + nnz = np.prod(self.shape) + is_dense = True + return LeafMeta( shape=self.shape, - nnz=self.nnz, + nnz=nnz, dtype=array_utils.get_uop_output_type(op_name, self.dtype), - is_dense=self.is_dense, + is_dense=is_dense, ) - def reduce_axis(self, op_name, axis, keepdims, transposed): - # Assume dense for now + def reduce_axis(self, op_name, axis, keepdims): shape = list(self.shape) - if transposed: - shape.reverse() if axis is None: shape = [] elif keepdims: shape[axis] = 1 else: shape.pop(axis) - return TreeNodeSize( - shape=tuple(shape), - nnz=np.prod(shape), - dtype=array_utils.get_uop_output_type(op_name, self.dtype), - is_dense=True, - ) + if self.is_dense: + return LeafMeta( + shape=tuple(shape), + nnz=np.prod(shape), + dtype=array_utils.get_uop_output_type(op_name, self.dtype), + is_dense=True, + ) + else: + # If any element in reduced axis is nonzero, result is nonzero. + p1 = self.nnz / np.prod(self.shape) + nnz = int((1 - (1 - p1) ** self.shape[axis]) * np.prod(shape)) + return LeafMeta( + shape=tuple(shape), + nnz=nnz, + dtype=array_utils.get_uop_output_type(op_name, self.dtype), + is_dense=False, + ) def _nnz_disjunction(self, other, shape): # If either element is nonzero, result is nonzero. @@ -123,7 +157,7 @@ def add(self, other): other.is_dense, ) if is_dense: - return TreeNodeSize( + return LeafMeta( shape=shape, nnz=np.prod(shape), dtype=dtype, @@ -133,7 +167,7 @@ def add(self, other): raise ValueError( "TreeNodeSize.__add__ is inconsistent with sparse bop rules." ) - return TreeNodeSize( + return LeafMeta( shape=shape, nnz=self._nnz_disjunction(other, shape), dtype=dtype, @@ -151,7 +185,7 @@ def mul(self, other): other.is_dense, ) if is_dense: - return TreeNodeSize( + return LeafMeta( shape=shape, nnz=np.prod(shape), dtype=dtype, @@ -167,7 +201,7 @@ def mul(self, other): raise ValueError( "TreeNodeSize.__mul__ is inconsistent with sparse bop rules." ) - return TreeNodeSize( + return LeafMeta( shape=shape, nnz=nnz, dtype=dtype, @@ -185,7 +219,7 @@ def truediv(self, other): other.is_dense, ) if is_dense: - return TreeNodeSize( + return LeafMeta( shape=shape, nnz=np.prod(shape), dtype=dtype, @@ -201,7 +235,7 @@ def truediv(self, other): nnz = self._nnz_selection(other, shape) else: nnz = self._nnz_disjunction(other, shape) - return TreeNodeSize( + return LeafMeta( shape=shape, nnz=nnz, dtype=dtype, @@ -230,7 +264,7 @@ def tensordot(self, other, axes): "tensordot", self.is_dense, other.is_dense ) if is_dense: - return TreeNodeSize( + return LeafMeta( shape=shape, nnz=np.prod(shape), dtype=dtype, @@ -246,7 +280,7 @@ def tensordot(self, other, axes): p2 = other.nnz / n2 m = np.prod(shape) k = np.prod(sum_shape) - return TreeNodeSize( + return LeafMeta( shape=shape, nnz=int((1 - (1 - p1 * p2) ** k) * m), dtype=dtype, @@ -256,18 +290,18 @@ def tensordot(self, other, axes): def inequality(self, op_name, other): assert other.shape == () dtype = array_utils.get_bop_output_type(op_name, self.dtype, other.dtype) - return TreeNodeSize( + return LeafMeta( shape=self.shape, nnz=self.nnz, dtype=dtype, is_dense=self.is_dense, ) - def bop_dense(self, other): + def bop_dense(self, other, **kwargs): # NOTE: as catch-all fallback, this may be wrong for unknown non-elementwise binary ops. shape = array_utils.broadcast_shape(self.shape, other.shape) dtype = array_utils.get_bop_output_type("add", self.dtype, other.dtype) - return TreeNodeSize( + return LeafMeta( shape=shape, nnz=np.prod(shape), dtype=dtype, @@ -275,4 +309,21 @@ def bop_dense(self, other): ) def bop(self, op_name, other, **kwargs): - return self.bop_estimation_map.get(op_name, self.bop_dense)(other, **kwargs) + bop_estimation_map = { + "add": self.add, + "sum": self.add, + "sub": self.add, + "mul": self.mul, + "prod": self.mul, + "truediv": self.truediv, + "pow": self.pow, + "matmul": lambda other: self.tensordot(other, axes=1), + "tensordot": self.tensordot, + "lt": lambda other: self.inequality("lt", other), + "le": lambda other: self.inequality("le", other), + "gt": lambda other: self.inequality("gt", other), + "ge": lambda other: self.inequality("ge", other), + "eq": lambda other: self.inequality("eq", other), + "ne": lambda other: self.inequality("ne", other), + } + return bop_estimation_map.get(op_name, self.bop_dense)(other, **kwargs) diff --git a/nums/experimental/optimizer/reduction_ops.py b/nums/experimental/optimizer/reduction_ops.py index 5ef10f03..50228ad3 100644 --- a/nums/experimental/optimizer/reduction_ops.py +++ b/nums/experimental/optimizer/reduction_ops.py @@ -41,10 +41,11 @@ def __init__(self, cluster_state: ClusterState, tree_node_id=None, seed=1337): self.action_leaf_q = [] def __repr__(self): - return "Reduc(id=%s, op=%s, in=%d)" % ( + return "Reduc(id=%s, op=%s, in=%d, dense_kernel=%s)" % ( str(self.tree_node_id), self.op_name, len(self.children_dict), + self.dense_kernel, ) def get_children(self): @@ -83,6 +84,9 @@ def copy(self, cluster_state, parent=None, new_ids=False): if child.tree_node_id in self.leafs_dict: rop.leafs_dict[child_copy.tree_node_id] = child_copy # TODO (hme): How do we properly copy random state? + + rop.tree_node_meta = self.tree_node_meta + rop.dense_kernel = self.dense_kernel return rop def add_child(self, child: TreeNode): @@ -231,7 +235,7 @@ def execute_on(self, device: Device, leaf_ids=None) -> TreeNode: # Update cluster state with new block. # self.cluster_state.add_block(new_block.id, new_block.size(), [device]) self.cluster_state.add_block( - new_block.id, new_leaf.tree_node_size.nbytes, [device] + new_block.id, new_leaf.tree_node_meta.nbytes, [device] ) if not self.cluster_state.created_on_only: assert self.cluster_state.blocks_local(left.block.id, right.block.id) @@ -266,21 +270,33 @@ def _collapse(self, device: Device, left: Leaf, right: Leaf): block: BlockBase = lblock.copy() block.transposed = False block.dtype = array_utils.get_reduce_output_type(self.op_name, lblock.dtype) - block.oid = lblock.km.bop_reduce( - op_name, - lblock.oid, - rblock.oid, - lblock.transposed, - rblock.transposed, - syskwargs={"device": device}, - ) + if self.dense_kernel: + assert lblock.is_dense and rblock.is_dense + block.oid = lblock.km.bop_reduce( + op_name, + lblock.oid, + rblock.oid, + lblock.transposed, + rblock.transposed, + syskwargs={"device": device}, + ) + else: + assert not lblock.is_dense or not rblock.is_dense + block.oid = lblock.km.sparse_bop_reduce( + op_name, + lblock.oid, + rblock.oid, + lblock.transposed, + rblock.transposed, + syskwargs={"device": device}, + ) block._device = device leaf: Leaf = Leaf(self.cluster_state) leaf.block = block - leaf.tree_node_size = left.tree_node_size.bop( + leaf.tree_node_meta = left.tree_node_meta.bop( op_name, - right.tree_node_size, + right.tree_node_meta, **args, ) leaf.copy_on_op = self.copy_on_op @@ -303,13 +319,13 @@ def _mem_cost(self, leafs): if leaf_block.is_dense: return leaf_block.size() if sync_nnz > 1: - leafs[0].tree_node_size.nnz = leafs[0].block.nnz # Blocking fetch - leafs[1].tree_node_size.nnz = leafs[1].block.nnz # Blocking fetch + leafs[0].tree_node_meta.nnz = leafs[0].block.nnz # Blocking fetch + leafs[1].tree_node_meta.nnz = leafs[1].block.nnz # Blocking fetch return ( leafs[0] - .tree_node_size.bop( + .tree_node_meta.bop( self.op_name, - leafs[1].tree_node_size, + leafs[1].tree_node_meta, ) .nbytes ) @@ -350,10 +366,14 @@ def expression(self): # This will force a different hash for large fused reductions, # if this is, for whatever reason, needed. size = len(self.children_dict) - self._expression = "TreeReductionOp(op=%s, size=%s, id=%s)" % ( - self.op_name, - str(size), - self.tree_node_id, + self._expression = ( + "TreeReductionOp(op=%s, size=%s, id=%s, dense_kernel=%s)" + % ( + self.op_name, + str(size), + self.tree_node_id, + self.dense_kernel, + ) ) return self._expression diff --git a/tests/core/array/test_sparse.py b/tests/core/array/test_sparse.py index 4563771e..5a554799 100644 --- a/tests/core/array/test_sparse.py +++ b/tests/core/array/test_sparse.py @@ -1,9 +1,7 @@ import numpy as np import sparse -import pytest from nums.core.array.application import ArrayApplication -from nums.core.array.blockarray import BlockArray from nums.core.array.sparse import SparseBlockArray from nums.core.array.random import NumsRandomState @@ -83,16 +81,12 @@ def test_sparse_add(app_inst: ArrayApplication): assert np.array_equal(x2 - x1_sp, y_ba.get()) # Test sparse-scalar. - y_sp = x1_sp - 1 # __sub__ - y_sba = x1_sba - 1 - assert y_sba.nnz == y_sp.nnz # 16 - y_ba = y_sba.to_ba() - assert np.array_equal(x1 - 1, y_ba.get()) - y_sp = 1 - x1_sp # __rsub__ - y_sba = 1 - x1_sba - assert y_sba.nnz == y_sp.nnz # 16 - y_ba = y_sba.to_ba() - assert np.array_equal(1 - x1, y_ba.get()) + y = (x1_sp - 1).todense() # __sub__ + y_ba = x1_sba - 1 + assert np.array_equal(y, y_ba.get()) + y = (1 - x1_sp).todense() # __rsub__ + y_ba = 1 - x1_sba + assert np.array_equal(y, y_ba.get()) def test_sparse_mul(app_inst: ArrayApplication): diff --git a/tests/experimental/optimizer/test_fusion.py b/tests/experimental/optimizer/test_fusion.py index 7661fdae..4ced2e78 100644 --- a/tests/experimental/optimizer/test_fusion.py +++ b/tests/experimental/optimizer/test_fusion.py @@ -26,12 +26,14 @@ from nums.core.array.application import BlockArray from nums.core.array.base import BlockArrayBase +from nums.core.array.sparse import SparseBlockArray from nums.experimental.optimizer.clusterstate import ClusterState from nums.experimental.optimizer.grapharray import ( GraphArray, ) from nums.experimental.optimizer.tree_search import RandomTS from nums.experimental.optimizer.fusion import FuseGraph +from nums.experimental.optimizer.fusion_utils import print_graph from nums.experimental.optimizer.graph import TreeNode, Leaf, FunctionNode import conftest @@ -45,12 +47,8 @@ def fusion2(app, x, y): return x @ y -def fusion3(app, s, q, p): - return s * (q @ p.T) - - def ga_op( - app, func, x: BlockArray, y: BlockArray, copy_on_op=True, max_args=2 + app, func, x: BlockArrayBase, y: BlockArrayBase, copy_on_op=True, max_args=2 ) -> BlockArray: cluster_state: ClusterState = ClusterState(x.km.devices()) x_ga: GraphArray = GraphArray.from_ba(x, cluster_state, copy_on_op=copy_on_op) @@ -69,27 +67,6 @@ def ga_op( return BlockArray(result_ga.grid, x.km, result_ga.to_blocks()) -def ga_op_sampled_dense_dense( - app, func, s: BlockArray, p: BlockArray, q: BlockArray, copy_on_op=True, max_args=3 -) -> BlockArray: - cluster_state: ClusterState = ClusterState(s.km.devices()) - s_ga: GraphArray = GraphArray.from_ba(s, cluster_state, copy_on_op=copy_on_op) - p_ga: GraphArray = GraphArray.from_ba(p, cluster_state, copy_on_op=copy_on_op) - q_ga: GraphArray = GraphArray.from_ba(q, cluster_state, copy_on_op=copy_on_op) - op_ga: GraphArray = func(app, s_ga, p_ga, q_ga) - start_time = time.time() - fused_ga: GraphArray = op_ga.compile(max_args) - end_time = time.time() - result_ga: GraphArray = RandomTS( - seed=conftest.rs, - max_samples_per_step=1, - max_reduction_pairs=1, - force_final_action=True, - ).solve(fused_ga) - - return BlockArray(result_ga.grid, s.km, result_ga.to_blocks()) - - def test_fusion(app_inst_mock_none): app = app_inst_mock_none x_shape, x_block_shape = (10,), (5,) @@ -140,24 +117,132 @@ def test_tensordot(app_inst_mock_none): assert block.dtype == opt_z.dtype -def test_sparse_array(app_inst_mock_none): +def ga_op_sparse_2( + app, func, x: BlockArrayBase, y: BlockArrayBase, copy_on_op=True, max_args=2 +) -> SparseBlockArray: + cluster_state: ClusterState = ClusterState(x.km.devices()) + x_ga: GraphArray = GraphArray.from_ba(x, cluster_state, copy_on_op=copy_on_op) + y_ga: GraphArray = GraphArray.from_ba(y, cluster_state, copy_on_op=copy_on_op) + op_ga: GraphArray = func(app, x_ga, y_ga) + start_time = time.time() + fused_ga: GraphArray = op_ga.compile(max_args) + end_time = time.time() + result_ga: GraphArray = RandomTS( + seed=conftest.rs, + max_samples_per_step=1, + max_reduction_pairs=1, + force_final_action=True, + ).solve(fused_ga) + + return SparseBlockArray(result_ga.grid, x.km, result_ga.to_blocks()) + + +def spmm(app, p, q): + return p @ q + + +def test_spmm(app_inst_mock_none): app = app_inst_mock_none - q_shape, q_block_shape = (10, 2), (2, 2) - p_shape, p_block_shape = (20, 2), (2, 2) - s_shape, s_block_shape = (10, 20), (2, 2) - real_q = np.random.random(np.product(q_shape)).reshape(q_shape) + p_shape, p_block_shape = (10, 4), (2, 2) + q_shape, q_block_shape = (4, 10), (2, 2) + p: SparseBlockArray = app.random.sparse_normal( + shape=p_shape, block_shape=p_block_shape, p=0.1 + ) + q: SparseBlockArray = app.random.sparse_normal( + shape=q_shape, block_shape=q_block_shape, p=0.1 + ) + real_p = p.to_ba().get() + real_q = q.to_ba().get() + z: SparseBlockArray = spmm(app, p, q) + start_time = time.time() + opt_z: SparseBlockArray = ga_op_sparse_2(app, spmm, p, q) + end_time = time.time() + print(end_time - start_time) + assert z.nnz == opt_z.nnz + assert np.allclose(z.to_ba().get(), spmm(np, real_p, real_q)) + assert app.allclose(z.to_ba(), opt_z.to_ba()).get() + + +def ga_op_sparse_3( + app, + func, + s: BlockArrayBase, + p: BlockArrayBase, + q: BlockArrayBase, + copy_on_op=True, + max_args=3, +) -> SparseBlockArray: + cluster_state: ClusterState = ClusterState(s.km.devices()) + s_ga: GraphArray = GraphArray.from_ba(s, cluster_state, copy_on_op=copy_on_op) + p_ga: GraphArray = GraphArray.from_ba(p, cluster_state, copy_on_op=copy_on_op) + q_ga: GraphArray = GraphArray.from_ba(q, cluster_state, copy_on_op=copy_on_op) + op_ga: GraphArray = func(app, s_ga, p_ga, q_ga) + start_time = time.time() + fused_ga: GraphArray = op_ga.compile(max_args) + end_time = time.time() + print(fused_ga.graphs[0, 0]) + result_ga: GraphArray = RandomTS( + seed=conftest.rs, + max_samples_per_step=1, + max_reduction_pairs=1, + force_final_action=True, + ).solve(fused_ga) + + return SparseBlockArray(result_ga.grid, s.km, result_ga.to_blocks()) + + +def sparse_fusion(app, s, p, q): + return (s + s) * p * q + + +def test_sparse_fusion(app_inst_mock_none): + app = app_inst_mock_none + s_shape, s_block_shape = (10, 4), (2, 2) + p_shape, p_block_shape = (10, 4), (2, 2) + q_shape, q_block_shape = (10, 4), (2, 2) real_p = np.random.random(np.product(p_shape)).reshape(p_shape) - real_s = np.random.random(np.product(s_shape)).reshape(s_shape) + real_q = np.random.random(np.product(q_shape)).reshape(q_shape) + s: SparseBlockArray = app.random.sparse_normal( + shape=s_shape, block_shape=s_block_shape, p=0.1 + ) + p: BlockArray = app.array(real_p, p_block_shape) q: BlockArray = app.array(real_q, q_block_shape) + real_s = s.to_ba().get() + z: SparseBlockArray = sparse_fusion(app, s, p, q) + start_time = time.time() + opt_z: SparseBlockArray = ga_op_sparse_3(app, sparse_fusion, s, p, q) + end_time = time.time() + print(end_time - start_time) + assert z.nnz == opt_z.nnz + assert np.allclose(z.to_ba().get(), sparse_fusion(np, real_s, real_p, real_q)) + assert app.allclose(z.to_ba(), opt_z.to_ba()).get() + + +def sddmm(app, s, p, q): + return s * (p @ q.T) + + +def test_sddmm(app_inst_mock_none): + app = app_inst_mock_none + s_shape, s_block_shape = (20, 10), (2, 2) + p_shape, p_block_shape = (20, 4), (2, 2) + q_shape, q_block_shape = (10, 4), (2, 2) + real_p = np.random.random(np.product(p_shape)).reshape(p_shape) + real_q = np.random.random(np.product(q_shape)).reshape(q_shape) + s: SparseBlockArray = app.random.sparse_normal( + shape=s_shape, block_shape=s_block_shape, p=0.1 + ) p: BlockArray = app.array(real_p, p_block_shape) - s: BlockArray = app.array(real_s, s_block_shape) - z: BlockArray = fusion3(app, s, q, p) + q: BlockArray = app.array(real_q, q_block_shape) + real_s = s.to_ba().get() + z: SparseBlockArray = sddmm(app, s, p, q) start_time = time.time() - opt_z: BlockArray = ga_op_sampled_dense_dense(app, fusion3, s, q, p) + opt_z: SparseBlockArray = ga_op_sparse_3(app, sddmm, s, p, q) end_time = time.time() print(end_time - start_time) - assert np.allclose(z.get(), fusion3(np, real_s, real_q, real_p)) - assert app.allclose(z, opt_z).get() + assert z.nnz == opt_z.nnz + assert np.allclose(z.to_ba().get(), sddmm(np, real_s, real_p, real_q)) + assert app.allclose(z.to_ba(), opt_z.to_ba()).get() if __name__ == "__main__": @@ -165,8 +250,8 @@ def test_sparse_array(app_inst_mock_none): app = conftest.mock_ray_cluster((1, 1)) # test_sparse_array(app) - # test_fusion(app) - test_tensordot_variant2(app) + test_fusion(app) + # test_tensordot_variant2(app) conftest.destroy_mock_cluster(app) # app = conftest.mock_cluster((10, 1)) diff --git a/tests/experimental/optimizer/test_size.py b/tests/experimental/optimizer/test_meta.py similarity index 82% rename from tests/experimental/optimizer/test_size.py rename to tests/experimental/optimizer/test_meta.py index 340ae163..2b06ba78 100644 --- a/tests/experimental/optimizer/test_size.py +++ b/tests/experimental/optimizer/test_meta.py @@ -1,13 +1,13 @@ import numpy as np import sparse -from nums.experimental.optimizer.size import TreeNodeSize +from nums.experimental.optimizer.node_meta import LeafMeta def test_nbytes(): x1 = np.eye(10) x1_sp = sparse.COO.from_numpy(x1, fill_value=0) - x1_ts = TreeNodeSize( + x1_ts = LeafMeta( (10, 10), 10, np.int64, @@ -17,7 +17,7 @@ def test_nbytes(): def test_uop(): - x1_ts = TreeNodeSize( + x1_ts = LeafMeta( (10, 10), 10, np.int64, @@ -29,13 +29,13 @@ def test_uop(): def test_add(): - x1_ts = TreeNodeSize( + x1_ts = LeafMeta( (10, 10), 10, np.int64, False, ) - x2_ts = TreeNodeSize( + x2_ts = LeafMeta( (10, 10), 20, np.int64, @@ -47,13 +47,13 @@ def test_add(): def test_mul(): - x1_ts = TreeNodeSize( + x1_ts = LeafMeta( (10, 10), 10, np.int64, False, ) - x2_ts = TreeNodeSize( + x2_ts = LeafMeta( (10, 10), 20, np.int64, @@ -62,7 +62,7 @@ def test_mul(): y_ts = x1_ts * x2_ts assert np.allclose(y_ts.nnz, int(0.1 * 0.2 * 100)) - x2_ts = TreeNodeSize( + x2_ts = LeafMeta( (10, 1), 10, np.int64, @@ -71,7 +71,7 @@ def test_mul(): y_ts = x1_ts * x2_ts assert np.allclose(y_ts.nnz, int(0.1 * 100)) - x2_ts = TreeNodeSize( + x2_ts = LeafMeta( (10, 1), 10, np.int64, @@ -83,13 +83,13 @@ def test_mul(): def test_tensordot(): - x1_ts = TreeNodeSize( + x1_ts = LeafMeta( (10, 10), 10, np.int64, False, ) - x2_ts = TreeNodeSize( + x2_ts = LeafMeta( (10, 10), 20, np.int64, diff --git a/tests/experimental/optimizer/test_ops.py b/tests/experimental/optimizer/test_ops.py index 2098a692..156c9651 100644 --- a/tests/experimental/optimizer/test_ops.py +++ b/tests/experimental/optimizer/test_ops.py @@ -200,21 +200,24 @@ def compute_graph_array(app: ArrayApplication, ga) -> BlockArray: app = app_inst_mock_small cluster_state = ClusterState(app.km.devices()) - X = app.random.sparse_normal(shape=(10, 3), block_shape=(5, 3)) + X = app.random.sparse_normal(shape=(10, 6), block_shape=(5, 3), p=0.1) Xc = X - theta: SparseBlockArray = SparseBlockArray.from_ba( - app.zeros((Xc.shape[1],), (Xc.block_shape[1],), dtype=Xc.dtype) - ) + print(Xc.nnz) + theta: BlockArray = app.ones((Xc.shape[1],), (Xc.block_shape[1],), dtype=Xc.dtype) X_ga: GraphArray = GraphArray.from_ba(Xc, cluster_state) theta_ga: GraphArray = GraphArray.from_ba(theta, cluster_state) Z_ga: GraphArray = X_ga @ theta_ga Z_ga: GraphArray = collapse_graph_array(app, Z_ga) + # one_ga: GraphArray = GraphArray.from_ba( + # BlockArray.from_scalar(1, app.km), cluster_state + # ) one_ga: GraphArray = GraphArray.from_ba( - SparseBlockArray.from_scalar(1, app.km), cluster_state + app.ones((Xc.shape[0],), (Xc.block_shape[0],), dtype=Xc.dtype), cluster_state ) mu_ga: GraphArray = collapse_graph_array(app, one_ga / (one_ga + app.exp(-Z_ga))) - mu_ba: SparseBlockArray = compute_graph_array(app, mu_ga) - print(mu_ba.todense().get()) + mu_ba: BlockArray = compute_graph_array(app, mu_ga) + print(mu_ba.get()) + assert np.allclose(1 / (1 + np.exp(-(X.to_ba().get() @ theta.get()))), mu_ba.get()) def test_sparse_bop_2(app_inst_mock_small): @@ -256,7 +259,9 @@ def compute_graph_array(app: ArrayApplication, ga) -> BlockArray: P = P_sba.to_ba().get() Q = Q_sba.to_ba().get() S = S_sba.to_ba().get() - assert np.allclose(S - P @ Q.T, X_sba.to_ba().get()) + X = X_sba.to_ba().get() + print(X) + assert np.allclose(S - P @ Q.T, X) if __name__ == "__main__": From 27b29b8799147868e0c1bdb498ed861d78ed5c7c Mon Sep 17 00:00:00 2001 From: Daniel Zou Date: Sat, 17 Sep 2022 11:52:18 -0700 Subject: [PATCH 5/5] Merge from_sparse --- nums/core/array/application.py | 6 +++++- nums/core/array/sparse.py | 4 ++-- nums/numpy/api/creation.py | 2 +- tests/core/array/test_sparse.py | 4 +--- 4 files changed, 9 insertions(+), 7 deletions(-) diff --git a/nums/core/array/application.py b/nums/core/array/application.py index ce148994..9bc47aae 100644 --- a/nums/core/array/application.py +++ b/nums/core/array/application.py @@ -230,7 +230,11 @@ def loadtxt( def scalar(self, value): return BlockArray.from_scalar(value, self.km) - def array(self, array: Union[np.ndarray, sparse.COO, List[float]], block_shape: tuple = None): + def array( + self, + array: Union[np.ndarray, sparse.COO, List[float]], + block_shape: tuple = None, + ): if not isinstance(array, (np.ndarray, sparse.COO)): if array_utils.is_array_like(array): array = np.array(array) diff --git a/nums/core/array/sparse.py b/nums/core/array/sparse.py index 37939b68..118edcdc 100644 --- a/nums/core/array/sparse.py +++ b/nums/core/array/sparse.py @@ -307,10 +307,10 @@ def from_np(cls, arr, block_shape, copy, km): return rarr @classmethod - def from_sparse(cls, arr, block_shape, copy, km, fill_value=0): + def from_sparse(cls, arr, block_shape, copy, km): dtype_str = str(arr.dtype) grid = ArrayGrid(arr.shape, block_shape, dtype_str) - rarr = SparseBlockArray(grid, km, fill_value) + rarr = SparseBlockArray(grid, km) grid_entry_iterator = grid.get_entry_iterator() for grid_entry in grid_entry_iterator: grid_slice = grid.get_slice(grid_entry) diff --git a/nums/numpy/api/creation.py b/nums/numpy/api/creation.py index a76e815c..5257e38b 100644 --- a/nums/numpy/api/creation.py +++ b/nums/numpy/api/creation.py @@ -227,7 +227,7 @@ def array(object, dtype=None, copy=True, order="K", ndmin=0, subok=False) -> Blo def from_coo(a: sparse.COO): - assert(isinstance(a, sparse.COO)) + assert isinstance(a, sparse.COO) dtype = np.__getattribute__(str(a.dtype)) shape = a.shape app = _instance() diff --git a/tests/core/array/test_sparse.py b/tests/core/array/test_sparse.py index 5a554799..b03c8472 100644 --- a/tests/core/array/test_sparse.py +++ b/tests/core/array/test_sparse.py @@ -11,7 +11,6 @@ def test_sparse_init(app_inst: ArrayApplication): x_ba = app_inst.array(x1, block_shape=(2, 2)) x_sba = SparseBlockArray.from_ba(x_ba) assert x_sba.nnz == 8 - assert x_sba.nbytes == 8 * 4 + 2 * 8 * 8 y_ba = x_sba.to_ba() assert np.array_equal(x1, y_ba.get()) @@ -19,12 +18,11 @@ def test_sparse_init(app_inst: ArrayApplication): def test_from_coo(app_inst: ArrayApplication): row_coords = [0, 1, 0, 1, 2, 2, 3, 3] col_coords = [3, 2, 2, 3, 0, 1, 0, 1] - values = [1, 1, 1, 1, 2, 2, 2, 2] + values = [1, 1, 1, 1, 2, 2, 2, 2] x_sp = sparse.COO([row_coords, col_coords], values) x_de = x_sp.todense() x_sba = app_inst.array(x_sp, block_shape=(2, 2)) assert x_sba.nnz == 8 - assert x_sba.nbytes == 8 * 4 + 2 * 8 * 8 y_ba = x_sba.to_ba() assert np.array_equal(x_de, y_ba.get())