diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 05e8b56d..d8ea226a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -28,35 +28,17 @@ repos: - id: requirements-txt-fixer - id: trailing-whitespace -- repo: https://github.com/timothycrosley/isort - rev: 5.10.1 +- repo: https://github.com/astral-sh/ruff-pre-commit + # Ruff version. + rev: v0.6.1 hooks: - - id: isort + # Run the linter. + - id: ruff + args: [ --fix ] + # Run the formatter. + - id: ruff-format -- repo: https://github.com/pre-commit/mirrors-mypy - rev: v0.971 +- repo: https://github.com/RobertCraigie/pyright-python + rev: v1.1.376 hooks: - - id: mypy - exclude: ^(docs/)|(project/)|(assignments/)|(project/interface/) - - -# Black, the code formatter, natively supports pre-commit -- repo: https://github.com/psf/black - rev: 22.6.0 - hooks: - - id: black - -# Flake8 also supports pre-commit natively (same author) -- repo: https://github.com/PyCQA/flake8 - rev: 5.0.4 - hooks: - - id: flake8 - additional_dependencies: - - pep8-naming - exclude: ^(docs/)|(assignments/)|(project/interface/) - -# Doc linters -- repo: https://github.com/terrencepreilly/darglint - rev: v1.8.1 - hooks: - - id: darglint + - id: pyright diff --git a/files_to_sync.txt b/files_to_sync.txt index ac9fb875..727a323e 100644 --- a/files_to_sync.txt +++ b/files_to_sync.txt @@ -4,5 +4,6 @@ minitorch/autodiff.py minitorch/scalar.py minitorch/scalar_functions.py minitorch/module.py +minitorch/datasets.py project/run_manual.py -project/run_scalar.py \ No newline at end of file +project/run_scalar.py diff --git a/minitorch/__init__.py b/minitorch/__init__.py index 552b73ad..4e67014d 100644 --- a/minitorch/__init__.py +++ b/minitorch/__init__.py @@ -8,4 +8,5 @@ from .testing import * # noqa: F401,F403 from .module import * # noqa: F401,F403 from .autodiff import * # noqa: F401,F403 +from .scalar import * # noqa: F401,F403 from .module import * # noqa: F401,F403 diff --git a/minitorch/autodiff.py b/minitorch/autodiff.py index f7fa3b36..5ddba9d3 100644 --- a/minitorch/autodiff.py +++ b/minitorch/autodiff.py @@ -1,26 +1,29 @@ +from __future__ import annotations + from dataclasses import dataclass -from typing import Any, Iterable, List, Tuple +from typing import Any, Iterable, List, Tuple, Protocol -from typing_extensions import Protocol # ## Task 1.1 # Central Difference calculation def central_difference(f: Any, *vals: Any, arg: int = 0, epsilon: float = 1e-6) -> Any: - r""" - Computes an approximation to the derivative of `f` with respect to one arg. + r"""Computes an approximation to the derivative of `f` with respect to one arg. See :doc:`derivative` or https://en.wikipedia.org/wiki/Finite_difference for more details. Args: + ---- f : arbitrary function from n-scalar args to one value *vals : n-float values $x_0 \ldots x_{n-1}$ arg : the number $i$ of the arg to compute the derivative epsilon : a small constant Returns: + ------- An approximation of $f'_i(x_0, \ldots, x_{n-1})$ + """ raise NotImplementedError("Need to include this file from past assignment.") @@ -29,65 +32,60 @@ def central_difference(f: Any, *vals: Any, arg: int = 0, epsilon: float = 1e-6) class Variable(Protocol): - def accumulate_derivative(self, x: Any) -> None: - pass + def accumulate_derivative(self, x: Any) -> None: ... @property - def unique_id(self) -> int: - pass + def unique_id(self) -> int: ... - def is_leaf(self) -> bool: - pass + def is_leaf(self) -> bool: ... - def is_constant(self) -> bool: - pass + def is_constant(self) -> bool: ... @property - def parents(self) -> Iterable["Variable"]: - pass + def parents(self) -> Iterable["Variable"]: ... - def chain_rule(self, d_output: Any) -> Iterable[Tuple["Variable", Any]]: - pass + def chain_rule(self, d_output: Any) -> Iterable[Tuple[Variable, Any]]: ... def topological_sort(variable: Variable) -> Iterable[Variable]: - """ - Computes the topological order of the computation graph. + """Computes the topological order of the computation graph. Args: + ---- variable: The right-most variable Returns: + ------- Non-constant Variables in topological order starting from the right. + """ raise NotImplementedError("Need to include this file from past assignment.") def backpropagate(variable: Variable, deriv: Any) -> None: - """ - Runs backpropagation on the computation graph in order to + """Runs backpropagation on the computation graph in order to compute derivatives for the leave nodes. Args: + ---- variable: The right-most variable deriv : Its derivative that we want to propagate backward to the leaves. No return. Should write to its results to the derivative values of each leaf through `accumulate_derivative`. + """ raise NotImplementedError("Need to include this file from past assignment.") @dataclass class Context: - """ - Context class is used by `Function` to store information during the forward pass. - """ + """Context class is used by `Function` to store information during the forward pass.""" no_grad: bool = False saved_values: Tuple[Any, ...] = () def save_for_backward(self, *values: Any) -> None: - "Store the given `values` if they need to be used during backpropagation." + """Store the given `values` if they need to be used during backpropagation.""" if self.no_grad: return self.saved_values = values diff --git a/minitorch/module.py b/minitorch/module.py index 11fc1f39..4782cb4f 100644 --- a/minitorch/module.py +++ b/minitorch/module.py @@ -4,11 +4,11 @@ class Module: - """ - Modules form a tree that store parameters and other + """Modules form a tree that store parameters and other submodules. They make up the basis of neural network stacks. - Attributes: + Attributes + ---------- _modules : Storage of the child modules _parameters : Storage of the module's parameters training : Whether the module is in training mode or evaluation mode @@ -25,42 +25,44 @@ def __init__(self) -> None: self.training = True def modules(self) -> Sequence[Module]: - "Return the direct child modules of this module." + """Return the direct child modules of this module.""" m: Dict[str, Module] = self.__dict__["_modules"] return list(m.values()) def train(self) -> None: - "Set the mode of this module and all descendent modules to `train`." + """Set the mode of this module and all descendent modules to `train`.""" raise NotImplementedError("Need to include this file from past assignment.") def eval(self) -> None: - "Set the mode of this module and all descendent modules to `eval`." + """Set the mode of this module and all descendent modules to `eval`.""" raise NotImplementedError("Need to include this file from past assignment.") def named_parameters(self) -> Sequence[Tuple[str, Parameter]]: - """ - Collect all the parameters of this module and its descendents. + """Collect all the parameters of this module and its descendents. - - Returns: + Returns + ------- The name and `Parameter` of each ancestor parameter. + """ raise NotImplementedError("Need to include this file from past assignment.") def parameters(self) -> Sequence[Parameter]: - "Enumerate over all the parameters of this module and its descendents." + """Enumerate over all the parameters of this module and its descendents.""" raise NotImplementedError("Need to include this file from past assignment.") def add_parameter(self, k: str, v: Any) -> Parameter: - """ - Manually add a parameter. Useful helper for scalar parameters. + """Manually add a parameter. Useful helper for scalar parameters. Args: + ---- k: Local name of the parameter. v: Value for the parameter. Returns: + ------- Newly created parameter. + """ val = Parameter(v, k) self.__dict__["_parameters"][k] = val @@ -114,8 +116,7 @@ def _addindent(s_: str, numSpaces: int) -> str: class Parameter: - """ - A Parameter is a special container stored in a `Module`. + """A Parameter is a special container stored in a `Module`. It is designed to hold a `Variable`, but we allow it to hold any value for testing. @@ -130,7 +131,7 @@ def __init__(self, x: Any, name: Optional[str] = None) -> None: self.value.name = self.name def update(self, x: Any) -> None: - "Update the parameter value." + """Update the parameter value.""" self.value = x if hasattr(x, "requires_grad_"): self.value.requires_grad_(True) diff --git a/minitorch/operators.py b/minitorch/operators.py index 895ae82d..0be83e33 100644 --- a/minitorch/operators.py +++ b/minitorch/operators.py @@ -1,185 +1,52 @@ -""" -Collection of the core mathematical operators used throughout the code base. -""" +"""Collection of the core mathematical operators used throughout the code base.""" import math -from typing import Callable, Iterable # ## Task 0.1 +from typing import Callable, Iterable + # # Implementation of a prelude of elementary functions. - -def mul(x: float, y: float) -> float: - "$f(x, y) = x * y$" - raise NotImplementedError("Need to include this file from past assignment.") - - -def id(x: float) -> float: - "$f(x) = x$" - raise NotImplementedError("Need to include this file from past assignment.") - - -def add(x: float, y: float) -> float: - "$f(x, y) = x + y$" - raise NotImplementedError("Need to include this file from past assignment.") - - -def neg(x: float) -> float: - "$f(x) = -x$" - raise NotImplementedError("Need to include this file from past assignment.") - - -def lt(x: float, y: float) -> float: - "$f(x) =$ 1.0 if x is less than y else 0.0" - raise NotImplementedError("Need to include this file from past assignment.") - - -def eq(x: float, y: float) -> float: - "$f(x) =$ 1.0 if x is equal to y else 0.0" - raise NotImplementedError("Need to include this file from past assignment.") - - -def max(x: float, y: float) -> float: - "$f(x) =$ x if x is greater than y else y" - raise NotImplementedError("Need to include this file from past assignment.") - - -def is_close(x: float, y: float) -> float: - "$f(x) = |x - y| < 1e-2$" - raise NotImplementedError("Need to include this file from past assignment.") - - -def sigmoid(x: float) -> float: - r""" - $f(x) = \frac{1.0}{(1.0 + e^{-x})}$ - - (See https://en.wikipedia.org/wiki/Sigmoid_function ) - - Calculate as - - $f(x) = \frac{1.0}{(1.0 + e^{-x})}$ if x >=0 else $\frac{e^x}{(1.0 + e^{x})}$ - - for stability. - """ - raise NotImplementedError("Need to include this file from past assignment.") - - -def relu(x: float) -> float: - """ - $f(x) =$ x if x is greater than 0, else 0 - - (See https://en.wikipedia.org/wiki/Rectifier_(neural_networks) .) - """ - raise NotImplementedError("Need to include this file from past assignment.") - - -EPS = 1e-6 - - -def log(x: float) -> float: - "$f(x) = log(x)$" - return math.log(x + EPS) - - -def exp(x: float) -> float: - "$f(x) = e^{x}$" - return math.exp(x) - - -def log_back(x: float, d: float) -> float: - r"If $f = log$ as above, compute $d \times f'(x)$" - raise NotImplementedError("Need to include this file from past assignment.") - - -def inv(x: float) -> float: - "$f(x) = 1/x$" - raise NotImplementedError("Need to include this file from past assignment.") - - -def inv_back(x: float, d: float) -> float: - r"If $f(x) = 1/x$ compute $d \times f'(x)$" - raise NotImplementedError("Need to include this file from past assignment.") +# Mathematical functions: +# - mul +# - id +# - add +# - neg +# - lt +# - eq +# - max +# - is_close +# - sigmoid +# - relu +# - log +# - exp +# - log_back +# - inv +# - inv_back +# - relu_back +# +# For sigmoid calculate as: +# $f(x) = \frac{1.0}{(1.0 + e^{-x})}$ if x >=0 else $\frac{e^x}{(1.0 + e^{x})}$ +# For is_close: +# $f(x) = |x - y| < 1e-2$ -def relu_back(x: float, d: float) -> float: - r"If $f = relu$ compute $d \times f'(x)$" - raise NotImplementedError("Need to include this file from past assignment.") # ## Task 0.3 # Small practice library of elementary higher-order functions. - -def map(fn: Callable[[float], float]) -> Callable[[Iterable[float]], Iterable[float]]: - """ - Higher-order map. - - See https://en.wikipedia.org/wiki/Map_(higher-order_function) - - Args: - fn: Function from one value to one value. - - Returns: - A function that takes a list, applies `fn` to each element, and returns a - new list - """ - raise NotImplementedError("Need to include this file from past assignment.") - - -def negList(ls: Iterable[float]) -> Iterable[float]: - "Use `map` and `neg` to negate each element in `ls`" - raise NotImplementedError("Need to include this file from past assignment.") - - -def zipWith( - fn: Callable[[float, float], float] -) -> Callable[[Iterable[float], Iterable[float]], Iterable[float]]: - """ - Higher-order zipwith (or map2). - - See https://en.wikipedia.org/wiki/Map_(higher-order_function) - - Args: - fn: combine two values - - Returns: - Function that takes two equally sized lists `ls1` and `ls2`, produce a new list by - applying fn(x, y) on each pair of elements. - - """ - raise NotImplementedError("Need to include this file from past assignment.") - - -def addLists(ls1: Iterable[float], ls2: Iterable[float]) -> Iterable[float]: - "Add the elements of `ls1` and `ls2` using `zipWith` and `add`" - raise NotImplementedError("Need to include this file from past assignment.") - - -def reduce( - fn: Callable[[float, float], float], start: float -) -> Callable[[Iterable[float]], float]: - r""" - Higher-order reduce. - - Args: - fn: combine two values - start: start value $x_0$ - - Returns: - Function that takes a list `ls` of elements - $x_1 \ldots x_n$ and computes the reduction :math:`fn(x_3, fn(x_2, - fn(x_1, x_0)))` - """ - raise NotImplementedError("Need to include this file from past assignment.") - - -def sum(ls: Iterable[float]) -> float: - "Sum up a list using `reduce` and `add`." - raise NotImplementedError("Need to include this file from past assignment.") +# Implement the following core functions +# - map +# - zipWith +# - reduce +# +# Use these to implement +# - negList : negate a list +# - addLists : add two lists together +# - sum: sum lists +# - prod: take the product of lists -def prod(ls: Iterable[float]) -> float: - "Product of a list using `reduce` and `mul`." - raise NotImplementedError("Need to include this file from past assignment.") diff --git a/minitorch/optim.py b/minitorch/optim.py index 4150b57f..21c9dde2 100644 --- a/minitorch/optim.py +++ b/minitorch/optim.py @@ -1,6 +1,7 @@ from typing import Sequence from .module import Parameter +from .scalar import Scalar class Optimizer: @@ -28,6 +29,9 @@ def step(self) -> None: for p in self.parameters: if p.value is None: continue + if hasattr(p.value, "derivative"): + if p.value.derivative is not None: + p.update(Scalar(p.value.data - self.lr * p.value.derivative)) elif hasattr(p.value, "grad"): if p.value.grad is not None: p.update(p.value - self.lr * p.value.grad) diff --git a/minitorch/scalar.py b/minitorch/scalar.py index 3c853a2e..79c49683 100644 --- a/minitorch/scalar.py +++ b/minitorch/scalar.py @@ -5,6 +5,7 @@ import numpy as np +from dataclasses import field from .autodiff import Context, Variable, backpropagate, central_difference from .scalar_functions import ( EQ, @@ -25,11 +26,11 @@ @dataclass class ScalarHistory: - """ - `ScalarHistory` stores the history of `Function` operations that was + """`ScalarHistory` stores the history of `Function` operations that was used to construct the current Variable. - Attributes: + Attributes + ---------- last_fn : The last Function that was called. ctx : The context for that Function. inputs : The inputs that were given when `last_fn.forward` was called. @@ -47,40 +48,30 @@ class ScalarHistory: _var_count = 0 +@dataclass class Scalar: - """ - A reimplementation of scalar values for autodifferentiation + """A reimplementation of scalar values for autodifferentiation tracking. Scalar Variables behave as close as possible to standard Python numbers while also tracking the operations that led to the number's creation. They can only be manipulated by `ScalarFunction`. """ - history: Optional[ScalarHistory] - derivative: Optional[float] data: float - unique_id: int - name: str - - def __init__( - self, - v: float, - back: ScalarHistory = ScalarHistory(), - name: Optional[str] = None, - ): + history: Optional[ScalarHistory] = field(default_factory=ScalarHistory) + derivative: Optional[float] = None + name: str = field(default="") + unique_id: int = field(default=0) + + def __post_init__(self): global _var_count _var_count += 1 - self.unique_id = _var_count - self.data = float(v) - self.history = back - self.derivative = None - if name is not None: - self.name = name - else: - self.name = str(self.unique_id) + object.__setattr__(self, "unique_id", _var_count) + object.__setattr__(self, "name", str(self.unique_id)) + object.__setattr__(self, "data", float(self.data)) def __repr__(self) -> str: - return "Scalar(%f)" % self.data + return f"Scalar({self.data})" def __mul__(self, b: ScalarLike) -> Scalar: return Mul.apply(self, b) @@ -91,62 +82,33 @@ def __truediv__(self, b: ScalarLike) -> Scalar: def __rtruediv__(self, b: ScalarLike) -> Scalar: return Mul.apply(b, Inv.apply(self)) - def __add__(self, b: ScalarLike) -> Scalar: - raise NotImplementedError("Need to include this file from past assignment.") - def __bool__(self) -> bool: return bool(self.data) - def __lt__(self, b: ScalarLike) -> Scalar: - raise NotImplementedError("Need to include this file from past assignment.") - - def __gt__(self, b: ScalarLike) -> Scalar: - raise NotImplementedError("Need to include this file from past assignment.") - - def __eq__(self, b: ScalarLike) -> Scalar: # type: ignore[override] - raise NotImplementedError("Need to include this file from past assignment.") - - def __sub__(self, b: ScalarLike) -> Scalar: - raise NotImplementedError("Need to include this file from past assignment.") - - def __neg__(self) -> Scalar: - raise NotImplementedError("Need to include this file from past assignment.") - def __radd__(self, b: ScalarLike) -> Scalar: return self + b def __rmul__(self, b: ScalarLike) -> Scalar: return self * b - def log(self) -> Scalar: - raise NotImplementedError("Need to include this file from past assignment.") - - def exp(self) -> Scalar: - raise NotImplementedError("Need to include this file from past assignment.") - - def sigmoid(self) -> Scalar: - raise NotImplementedError("Need to include this file from past assignment.") - - def relu(self) -> Scalar: - raise NotImplementedError("Need to include this file from past assignment.") - # Variable elements for backprop def accumulate_derivative(self, x: Any) -> None: - """ - Add `val` to the the derivative accumulated on this variable. + """Add `val` to the the derivative accumulated on this variable. Should only be called during autodifferentiation on leaf variables. Args: + ---- x: value to be accumulated + """ assert self.is_leaf(), "Only leaf variables can have derivatives." if self.derivative is None: - self.derivative = 0.0 - self.derivative += x + self.__setattr__("derivative", 0.0) + self.__setattr__("derivative", self.derivative + x) def is_leaf(self) -> bool: - "True if this variable created by the user (no `last_fn`)" + """True if this variable created by the user (no `last_fn`)""" return self.history is not None and self.history.last_fn is None def is_constant(self) -> bool: @@ -166,26 +128,30 @@ def chain_rule(self, d_output: Any) -> Iterable[Tuple[Variable, Any]]: raise NotImplementedError("Need to include this file from past assignment.") def backward(self, d_output: Optional[float] = None) -> None: - """ - Calls autodiff to fill in the derivatives for the history of this object. + """Calls autodiff to fill in the derivatives for the history of this object. Args: + ---- d_output (number, opt): starting derivative to backpropagate through the model (typically left out, and assumed to be 1.0). + """ if d_output is None: d_output = 1.0 backpropagate(self, d_output) + raise NotImplementedError("Need to include this file from past assignment.") + def derivative_check(f: Any, *scalars: Scalar) -> None: - """ - Checks that autodiff works on a python function. + """Checks that autodiff works on a python function. Asserts False if derivative is incorrect. - Parameters: + Parameters + ---------- f : function from n-scalars to 1-scalar. *scalars : n input scalar values. + """ out = f(*scalars) out.backward() diff --git a/minitorch/tensor.py b/minitorch/tensor.py index c675699b..cfea66c9 100644 --- a/minitorch/tensor.py +++ b/minitorch/tensor.py @@ -1,6 +1,4 @@ -""" -Implementation of the core Tensor object for autodifferentiation. -""" +"""Implementation of the core Tensor object for autodifferentiation.""" from __future__ import annotations @@ -47,8 +45,7 @@ @dataclass class History: - """ - `History` stores the history of `Function` operations that was + """`History` stores the history of `Function` operations that was used to construct the current Variable. """ @@ -61,8 +58,7 @@ class History: class Tensor: - """ - Tensor is a generalization of Scalar in that it is a Variable that + """Tensor is a generalization of Scalar in that it is a Variable that handles multidimensional arrays. """ @@ -103,39 +99,14 @@ def requires_grad(self) -> bool: return self.history is not None def to_numpy(self) -> npt.NDArray[np.float64]: - """ - Returns: - Converted to numpy array - """ - return self.contiguous()._tensor._storage.reshape(self.shape) - - # Properties - @property - def shape(self) -> UserShape: - """ - Returns: - shape of the tensor - """ - return self._tensor.shape - - @property - def size(self) -> int: - """ - Returns: - int : size of the tensor - """ - return self._tensor.size + """Returns + Converted to numpy array - @property - def dims(self) -> int: """ - Returns: - int : dimensionality of the tensor - """ - return self._tensor.dims + return self.contiguous()._tensor._storage.reshape(self.shape) def _ensure_tensor(self, b: TensorLike) -> Tensor: - "Turns a python number into a tensor with the same backend." + """Turns a python number into a tensor with the same backend.""" if isinstance(b, (int, float)): c = Tensor.make([b], (1,), backend=self.backend) else: @@ -143,93 +114,14 @@ def _ensure_tensor(self, b: TensorLike) -> Tensor: c = b return c - # Functions - def __add__(self, b: TensorLike) -> Tensor: - return Add.apply(self, self._ensure_tensor(b)) - - def __sub__(self, b: TensorLike) -> Tensor: - return Add.apply(self, -self._ensure_tensor(b)) - - def __mul__(self, b: TensorLike) -> Tensor: - return Mul.apply(self, self._ensure_tensor(b)) - - def __truediv__(self, b: TensorLike) -> Tensor: - return Mul.apply(self, Inv.apply(self._ensure_tensor(b))) - - def __rtruediv__(self, b: TensorLike) -> Tensor: - return Mul.apply(self._ensure_tensor(b), Inv.apply(self)) - - def __matmul__(self, b: Tensor) -> Tensor: - "Not used until Module 3" - return MatMul.apply(self, b) - - def __lt__(self, b: TensorLike) -> Tensor: - return LT.apply(self, self._ensure_tensor(b)) - - def __eq__(self, b: TensorLike) -> Tensor: # type: ignore[override] - return EQ.apply(self, self._ensure_tensor(b)) - - def __gt__(self, b: TensorLike) -> Tensor: - return LT.apply(self._ensure_tensor(b), self) - - def __neg__(self) -> Tensor: - return Neg.apply(self) - - def __radd__(self, b: TensorLike) -> Tensor: - return self + b - - def __rmul__(self, b: TensorLike) -> Tensor: - return self * b - - def all(self, dim: Optional[int] = None) -> Tensor: - if dim is None: - return All.apply(self.view(self.size), self._ensure_tensor(0)) - else: - return All.apply(self, self._ensure_tensor(dim)) - - def is_close(self, y: Tensor) -> Tensor: - return IsClose.apply(self, y) - - def sigmoid(self) -> Tensor: - return Sigmoid.apply(self) - - def relu(self) -> Tensor: - return ReLU.apply(self) - - def log(self) -> Tensor: - return Log.apply(self) - - def exp(self) -> Tensor: - return Exp.apply(self) - def item(self) -> float: + """Convert a 1-element tensor to a float""" assert self.size == 1 - return self[0] - - def sum(self, dim: Optional[int] = None) -> Tensor: - "Compute the sum over dimension `dim`" - if dim is None: - return Sum.apply(self.contiguous().view(self.size), self._ensure_tensor(0)) - else: - return Sum.apply(self, self._ensure_tensor(dim)) - - def mean(self, dim: Optional[int] = None) -> Tensor: - "Compute the mean over dimension `dim`" - if dim is not None: - return self.sum(dim) / self.shape[dim] - else: - return self.sum() / self.size - - def permute(self, *order: int) -> Tensor: - "Permute tensor dimensions to *order" - return Permute.apply(self, tensor(list(order))) - - def view(self, *shape: int) -> Tensor: - "Change the shape of the tensor to a new shape with the same size" - return View.apply(self, tensor(list(shape))) + x: float = self._tensor._storage[0] + return x def contiguous(self) -> Tensor: - "Return a contiguous tensor with the same data" + """Return a contiguous tensor with the same data""" return Copy.apply(self) def __repr__(self) -> str: @@ -259,24 +151,24 @@ def make( strides: Optional[UserStrides] = None, backend: Optional[TensorBackend] = None, ) -> Tensor: - "Create a new tensor from data" + """Create a new tensor from data""" return Tensor(TensorData(storage, shape, strides), backend=backend) def expand(self, other: Tensor) -> Tensor: - """ - Method used to allow for backprop over broadcasting. + """Method used to allow for backprop over broadcasting. This method is called when the output of `backward` is a different size than the input of `forward`. - Parameters: + Args: + ---- other : backward tensor (must broadcast with self) Returns: + ------- Expanded version of `other` with the right derivatives """ - # Case 1: Both the same shape. if self.shape == other.shape: return other @@ -313,30 +205,35 @@ def zero(shape: UserShape) -> Tensor: return out def tuple(self) -> Tuple[Storage, Shape, Strides]: + """Get the tensor data info as a tuple.""" return self._tensor.tuple() def detach(self) -> Tensor: + """Detach from backprop""" return Tensor(self._tensor, backend=self.backend) # Variable elements for backprop def accumulate_derivative(self, x: Any) -> None: - """ - Add `val` to the the derivative accumulated on this variable. + """Add `val` to the the derivative accumulated on this variable. Should only be called during autodifferentiation on leaf variables. Args: + ---- x : value to be accumulated + """ assert self.is_leaf(), "Only leaf variables can have derivatives." if self.grad is None: self.grad = Tensor.make( - [0] * int(operators.prod(self.shape)), self.shape, backend=self.backend + [0.0] * int(operators.prod(self.shape)), + self.shape, + backend=self.backend, ) self.grad += x def is_leaf(self) -> bool: - "True if this variable created by the user (no `last_fn`)" + """True if this variable created by the user (no `last_fn`)""" return self.history is not None and self.history.last_fn is None def is_constant(self) -> bool: @@ -366,8 +263,6 @@ def backward(self, grad_output: Optional[Tensor] = None) -> None: grad_output = Tensor.make([1.0], (1,), backend=self.backend) backpropagate(self, grad_output) - def zero_grad_(self) -> None: # pragma: no cover - """ - Reset the derivative on this variable. - """ - self.grad = None + # Functions + # TODO: Implement for Task 2.3. + raise NotImplementedError("Need to implement for Task 2.3") diff --git a/minitorch/tensor_data.py b/minitorch/tensor_data.py index 452b7904..e9d1156b 100644 --- a/minitorch/tensor_data.py +++ b/minitorch/tensor_data.py @@ -4,6 +4,7 @@ from typing import Iterable, Optional, Sequence, Tuple, Union import numba +import numba.cuda import numpy as np import numpy.typing as npt from numpy import array, float64 @@ -15,7 +16,8 @@ class IndexingError(RuntimeError): - "Exception raised for indexing errors." + """Exception raised for indexing errors.""" + pass @@ -31,30 +33,31 @@ class IndexingError(RuntimeError): def index_to_position(index: Index, strides: Strides) -> int: - """ - Converts a multidimensional tensor `index` into a single-dimensional position in + """Converts a multidimensional tensor `index` into a single-dimensional position in storage based on strides. Args: + ---- index : index tuple of ints strides : tensor strides Returns: + ------- Position in storage - """ + """ # TODO: Implement for Task 2.1. raise NotImplementedError("Need to implement for Task 2.1") def to_index(ordinal: int, shape: Shape, out_index: OutIndex) -> None: - """ - Convert an `ordinal` to an index in the `shape`. + """Convert an `ordinal` to an index in the `shape`. Should ensure that enumerating position 0 ... size of a tensor produces every index exactly once. It may not be the inverse of `index_to_position`. Args: + ---- ordinal: ordinal position to convert. shape : tensor shape. out_index : return index corresponding to position. @@ -67,45 +70,51 @@ def to_index(ordinal: int, shape: Shape, out_index: OutIndex) -> None: def broadcast_index( big_index: Index, big_shape: Shape, shape: Shape, out_index: OutIndex ) -> None: - """ - Convert a `big_index` into `big_shape` to a smaller `out_index` + """Convert a `big_index` into `big_shape` to a smaller `out_index` into `shape` following broadcasting rules. In this case it may be larger or with more dimensions than the `shape` given. Additional dimensions may need to be mapped to 0 or removed. Args: + ---- big_index : multidimensional index of bigger tensor big_shape : tensor shape of bigger tensor shape : tensor shape of smaller tensor out_index : multidimensional index of smaller tensor Returns: + ------- None + """ # TODO: Implement for Task 2.2. raise NotImplementedError("Need to implement for Task 2.2") def shape_broadcast(shape1: UserShape, shape2: UserShape) -> UserShape: - """ - Broadcast two shapes to create a new union shape. + """Broadcast two shapes to create a new union shape. Args: + ---- shape1 : first shape shape2 : second shape Returns: + ------- broadcasted shape Raises: + ------ IndexingError : if cannot broadcast + """ # TODO: Implement for Task 2.2. raise NotImplementedError("Need to implement for Task 2.2") def strides_from_shape(shape: UserShape) -> UserStrides: + "Return a contiguous stride for a shape" layout = [1] offset = 1 for s in reversed(shape): @@ -153,11 +162,12 @@ def to_cuda_(self) -> None: # pragma: no cover self._storage = numba.cuda.to_device(self._storage) def is_contiguous(self) -> bool: - """ - Check that the layout is contiguous, i.e. outer dimensions have bigger strides than inner dimensions. + """Check that the layout is contiguous, i.e. outer dimensions have bigger strides than inner dimensions. - Returns: + Returns + ------- bool : True if contiguous + """ last = 1e9 for stride in self._strides: @@ -173,9 +183,14 @@ def shape_broadcast(shape_a: UserShape, shape_b: UserShape) -> UserShape: def index(self, index: Union[int, UserIndex]) -> int: if isinstance(index, int): aindex: Index = array([index]) - if isinstance(index, tuple): + else: # if isinstance(index, tuple): aindex = array(index) + # Pretend 0-dim shape is 1-dim shape of singleton + shape = self.shape + if len(shape) == 0 and len(aindex) != 0: + shape = (1,) + # Check for errors if aindex.shape[0] != len(self.shape): raise IndexingError(f"Index {aindex} must be size of {self.shape}.") @@ -209,14 +224,16 @@ def tuple(self) -> Tuple[Storage, Shape, Strides]: return (self._storage, self._shape, self._strides) def permute(self, *order: int) -> TensorData: - """ - Permute the dimensions of the tensor. + """Permute the dimensions of the tensor. Args: - order (list): a permutation of the dimensions + ---- + *order: a permutation of the dimensions Returns: + ------- New `TensorData` with the same storage and a new dimension order. + """ assert list(sorted(order)) == list( range(len(self.shape)) diff --git a/minitorch/tensor_functions.py b/minitorch/tensor_functions.py index 86db01a1..57131b2c 100644 --- a/minitorch/tensor_functions.py +++ b/minitorch/tensor_functions.py @@ -1,6 +1,4 @@ -""" -Implementation of the autodifferentiation Functions for Tensor. -""" +"""Implementation of the autodifferentiation Functions for Tensor.""" from __future__ import annotations @@ -22,8 +20,8 @@ from .tensor_data import UserIndex, UserShape -def wrap_tuple(x): # type: ignore - "Turn a possible value into a tuple" +def wrap_tuple(x: Any) -> tuple: # type: ignore + """Turn a possible value into a tuple""" if isinstance(x, tuple): return x return (x,) @@ -41,6 +39,7 @@ def _forward(cls, ctx: Context, *inps: Tensor) -> Tensor: @classmethod def apply(cls, *vals: Tensor) -> Tensor: + """Call the forward function and track history""" raw_vals = [] need_grad = False for v in vals: @@ -96,128 +95,7 @@ def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, Tensor]: return grad_output, grad_output -class Mul(Function): - @staticmethod - def forward(ctx: Context, a: Tensor, b: Tensor) -> Tensor: - # TODO: Implement for Task 2.3. - raise NotImplementedError("Need to implement for Task 2.3") - - @staticmethod - def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, Tensor]: - # TODO: Implement for Task 2.4. - raise NotImplementedError("Need to implement for Task 2.4") - - -class Sigmoid(Function): - @staticmethod - def forward(ctx: Context, t1: Tensor) -> Tensor: - # TODO: Implement for Task 2.3. - raise NotImplementedError("Need to implement for Task 2.3") - - @staticmethod - def backward(ctx: Context, grad_output: Tensor) -> Tensor: - # TODO: Implement for Task 2.4. - raise NotImplementedError("Need to implement for Task 2.4") - - -class ReLU(Function): - @staticmethod - def forward(ctx: Context, t1: Tensor) -> Tensor: - # TODO: Implement for Task 2.3. - raise NotImplementedError("Need to implement for Task 2.3") - - @staticmethod - def backward(ctx: Context, grad_output: Tensor) -> Tensor: - # TODO: Implement for Task 2.4. - raise NotImplementedError("Need to implement for Task 2.4") - - -class Log(Function): - @staticmethod - def forward(ctx: Context, t1: Tensor) -> Tensor: - # TODO: Implement for Task 2.3. - raise NotImplementedError("Need to implement for Task 2.3") - - @staticmethod - def backward(ctx: Context, grad_output: Tensor) -> Tensor: - # TODO: Implement for Task 2.4. - raise NotImplementedError("Need to implement for Task 2.4") - - -class Exp(Function): - @staticmethod - def forward(ctx: Context, t1: Tensor) -> Tensor: - # TODO: Implement for Task 2.3. - raise NotImplementedError("Need to implement for Task 2.3") - - @staticmethod - def backward(ctx: Context, grad_output: Tensor) -> Tensor: - # TODO: Implement for Task 2.4. - raise NotImplementedError("Need to implement for Task 2.4") - - -class Sum(Function): - @staticmethod - def forward(ctx: Context, a: Tensor, dim: Tensor) -> Tensor: - ctx.save_for_backward(a.shape, dim) - return a.f.add_reduce(a, int(dim.item())) - - @staticmethod - def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, float]: - a_shape, dim = ctx.saved_values - return grad_output, 0.0 - - -class All(Function): - @staticmethod - def forward(ctx: Context, a: Tensor, dim: Tensor) -> Tensor: - if dim is not None: - return a.f.mul_reduce(a, int(dim.item())) - else: - return a.f.mul_reduce(a.contiguous().view(int(operators.prod(a.shape))), 0) - - -class LT(Function): - @staticmethod - def forward(ctx: Context, a: Tensor, b: Tensor) -> Tensor: - # TODO: Implement for Task 2.3. - raise NotImplementedError("Need to implement for Task 2.3") - - @staticmethod - def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, Tensor]: - # TODO: Implement for Task 2.4. - raise NotImplementedError("Need to implement for Task 2.4") - - -class EQ(Function): - @staticmethod - def forward(ctx: Context, a: Tensor, b: Tensor) -> Tensor: - # TODO: Implement for Task 2.3. - raise NotImplementedError("Need to implement for Task 2.3") - - @staticmethod - def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, Tensor]: - # TODO: Implement for Task 2.4. - raise NotImplementedError("Need to implement for Task 2.4") - - -class IsClose(Function): - @staticmethod - def forward(ctx: Context, a: Tensor, b: Tensor) -> Tensor: - # TODO: Implement for Task 2.3. - raise NotImplementedError("Need to implement for Task 2.3") - - -class Permute(Function): - @staticmethod - def forward(ctx: Context, a: Tensor, order: Tensor) -> Tensor: - # TODO: Implement for Task 2.3. - raise NotImplementedError("Need to implement for Task 2.3") - - @staticmethod - def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, float]: - # TODO: Implement for Task 2.4. - raise NotImplementedError("Need to implement for Task 2.4") +# TODO: Implement for Task 2.3. class View(Function): @@ -232,6 +110,7 @@ def forward(ctx: Context, a: Tensor, shape: Tensor) -> Tensor: @staticmethod def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, float]: + """Matrix Multiply backward (module 3)""" (original,) = ctx.saved_values return ( minitorch.Tensor.make( @@ -244,21 +123,25 @@ def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, float]: class Copy(Function): @staticmethod def forward(ctx: Context, a: Tensor) -> Tensor: + """Id function makes contiguous""" return a.f.id_map(a) @staticmethod def backward(ctx: Context, grad_output: Tensor) -> Tensor: + """Undo""" return grad_output class MatMul(Function): @staticmethod def forward(ctx: Context, t1: Tensor, t2: Tensor) -> Tensor: + """Matrix Multiply Forward (module 3)""" ctx.save_for_backward(t1, t2) return t1.f.matrix_multiply(t1, t2) @staticmethod def backward(ctx: Context, grad_output: Tensor) -> Tuple[Tensor, Tensor]: + """Matrix Multiply backward (module 3)""" t1, t2 = ctx.saved_values def transpose(a: Tensor) -> Tensor: @@ -274,18 +157,20 @@ def transpose(a: Tensor) -> Tensor: # Helpers for Constructing tensors def zeros(shape: UserShape, backend: TensorBackend = SimpleBackend) -> Tensor: - """ - Produce a zero tensor of size `shape`. + """Produce a zero tensor of size `shape`. Args: + ---- shape : shape of tensor backend : tensor backend Returns: + ------- new tensor + """ return minitorch.Tensor.make( - [0] * int(operators.prod(shape)), shape, backend=backend + [0.0] * int(operators.prod(shape)), shape, backend=backend ) @@ -294,16 +179,18 @@ def rand( backend: TensorBackend = SimpleBackend, requires_grad: bool = False, ) -> Tensor: - """ - Produce a random tensor of size `shape`. + """Produce a random tensor of size `shape`. Args: + ---- shape : shape of tensor backend : tensor backend requires_grad : turn on autodifferentiation Returns: + ------- :class:`Tensor` : new tensor + """ vals = [random.random() for _ in range(int(operators.prod(shape)))] tensor = minitorch.Tensor.make(vals, shape, backend=backend) @@ -317,17 +204,19 @@ def _tensor( backend: TensorBackend = SimpleBackend, requires_grad: bool = False, ) -> Tensor: - """ - Produce a tensor with data ls and shape `shape`. + """Produce a tensor with data ls and shape `shape`. Args: + ---- ls: data for tensor shape: shape of tensor backend: tensor backend requires_grad: turn on autodifferentiation Returns: + ------- new tensor + """ tensor = minitorch.Tensor.make(ls, shape, backend=backend) tensor.requires_grad_(requires_grad) @@ -337,16 +226,18 @@ def _tensor( def tensor( ls: Any, backend: TensorBackend = SimpleBackend, requires_grad: bool = False ) -> Tensor: - """ - Produce a tensor with data and shape from ls + """Produce a tensor with data and shape from ls Args: + ---- ls: data for tensor backend : tensor backend requires_grad : turn on autodifferentiation Returns: + ------- :class:`Tensor` : new tensor + """ def shape(ls: Any) -> List[int]: @@ -383,6 +274,7 @@ def grad_central_difference( def grad_check(f: Any, *vals: Tensor) -> None: + """Check whether autodiff matches central difference.""" for x in vals: x.requires_grad_(True) x.zero_grad_() diff --git a/minitorch/tensor_ops.py b/minitorch/tensor_ops.py index 96411b42..365d6c1f 100644 --- a/minitorch/tensor_ops.py +++ b/minitorch/tensor_ops.py @@ -1,6 +1,6 @@ from __future__ import annotations -from typing import TYPE_CHECKING, Any, Callable, Optional, Type +from typing import TYPE_CHECKING, Callable, Optional, Type import numpy as np from typing_extensions import Protocol @@ -20,31 +20,26 @@ class MapProto(Protocol): - def __call__(self, x: Tensor, out: Optional[Tensor] = ..., /) -> Tensor: - ... + def __call__(self, x: Tensor, out: Optional[Tensor] = ..., /) -> Tensor: ... class TensorOps: @staticmethod - def map(fn: Callable[[float], float]) -> MapProto: - pass - - @staticmethod - def cmap(fn: Callable[[float], float]) -> Callable[[Tensor, Tensor], Tensor]: - pass + def map(fn: Callable[[float], float]) -> MapProto: ... @staticmethod - def zip(fn: Callable[[float, float], float]) -> Callable[[Tensor, Tensor], Tensor]: - pass + def zip( + fn: Callable[[float, float], float], + ) -> Callable[[Tensor, Tensor], Tensor]: ... @staticmethod def reduce( fn: Callable[[float, float], float], start: float = 0.0 - ) -> Callable[[Tensor, int], Tensor]: - pass + ) -> Callable[[Tensor, int], Tensor]: ... @staticmethod def matrix_multiply(a: Tensor, b: Tensor) -> Tensor: + """Matrix multiply""" raise NotImplementedError("Not implemented in this assignment") cuda = False @@ -52,11 +47,11 @@ def matrix_multiply(a: Tensor, b: Tensor) -> Tensor: class TensorBackend: def __init__(self, ops: Type[TensorOps]): - """ - Dynamically construct a tensor backend based on a `tensor_ops` object + """Dynamically construct a tensor backend based on a `tensor_ops` object that implements map, zip, and reduce higher-order functions. Args: + ---- ops : tensor operations object see `tensor_ops.py` @@ -64,7 +59,6 @@ def __init__(self, ops: Type[TensorOps]): A collection of tensor functions """ - # Maps self.neg_map = ops.map(operators.neg) self.sigmoid_map = ops.map(operators.sigmoid) @@ -72,7 +66,6 @@ def __init__(self, ops: Type[TensorOps]): self.log_map = ops.map(operators.log) self.exp_map = ops.map(operators.exp) self.id_map = ops.map(operators.id) - self.id_cmap = ops.cmap(operators.id) self.inv_map = ops.map(operators.inv) # Zips @@ -95,8 +88,7 @@ def __init__(self, ops: Type[TensorOps]): class SimpleOps(TensorOps): @staticmethod def map(fn: Callable[[float], float]) -> MapProto: - """ - Higher-order tensor map function :: + """Higher-order tensor map function :: fn_map = map(fn) fn_map(a, out) @@ -115,15 +107,17 @@ def map(fn: Callable[[float], float]) -> MapProto: out[i, j] = fn(a[i, 0]) Args: + ---- fn: function from float-to-float to apply. a (:class:`TensorData`): tensor to map over out (:class:`TensorData`): optional, tensor data to fill in, should broadcast with `a` Returns: + ------- new tensor data - """ + """ f = tensor_map(fn) def ret(a: Tensor, out: Optional[Tensor] = None) -> Tensor: @@ -136,10 +130,9 @@ def ret(a: Tensor, out: Optional[Tensor] = None) -> Tensor: @staticmethod def zip( - fn: Callable[[float, float], float] + fn: Callable[[float, float], float], ) -> Callable[["Tensor", "Tensor"], "Tensor"]: - """ - Higher-order tensor zip function :: + """Higher-order tensor zip function :: fn_zip = zip(fn) out = fn_zip(a, b) @@ -158,14 +151,16 @@ def zip( Args: + ---- fn: function from two floats-to-float to apply a (:class:`TensorData`): tensor to zip over b (:class:`TensorData`): tensor to zip over Returns: + ------- :class:`TensorData` : new tensor data - """ + """ f = tensor_zip(fn) def ret(a: "Tensor", b: "Tensor") -> "Tensor": @@ -183,8 +178,7 @@ def ret(a: "Tensor", b: "Tensor") -> "Tensor": def reduce( fn: Callable[[float, float], float], start: float = 0.0 ) -> Callable[["Tensor", int], "Tensor"]: - """ - Higher-order tensor reduce function. :: + """Higher-order tensor reduce function. :: fn_reduce = reduce(fn) out = fn_reduce(a, dim) @@ -198,12 +192,15 @@ def reduce( Args: + ---- fn: function from two floats-to-float to apply a (:class:`TensorData`): tensor to reduce over dim (int): int of dim to reduce Returns: + ------- :class:`TensorData` : new tensor + """ f = tensor_reduce(fn) @@ -230,9 +227,10 @@ def matrix_multiply(a: "Tensor", b: "Tensor") -> "Tensor": # Implementations. -def tensor_map(fn: Callable[[float], float]) -> Any: - """ - Low-level implementation of tensor map between +def tensor_map( + fn: Callable[[float], float], +) -> Callable[[Storage, Shape, Strides, Storage, Shape, Strides], None]: + """Low-level implementation of tensor map between tensors with *possibly different strides*. Simple version: @@ -248,16 +246,13 @@ def tensor_map(fn: Callable[[float], float]) -> Any: broadcast. (`in_shape` must be smaller than `out_shape`). Args: + ---- fn: function from float-to-float to apply - out (array): storage for out tensor - out_shape (array): shape for out tensor - out_strides (array): strides for out tensor - in_storage (array): storage for in tensor - in_shape (array): shape for in tensor - in_strides (array): strides for in tensor Returns: - None : Fills in `out` + ------- + Tensor map function. + """ def _map( @@ -274,9 +269,12 @@ def _map( return _map -def tensor_zip(fn: Callable[[float, float], float]) -> Any: - """ - Low-level implementation of tensor zip between +def tensor_zip( + fn: Callable[[float, float], float], +) -> Callable[ + [Storage, Shape, Strides, Storage, Shape, Strides, Storage, Shape, Strides], None +]: + """Low-level implementation of tensor zip between tensors with *possibly different strides*. Simple version: @@ -292,19 +290,13 @@ def tensor_zip(fn: Callable[[float, float], float]) -> Any: and `b_shape` broadcast to `out_shape`. Args: + ---- fn: function mapping two floats to float to apply - out (array): storage for `out` tensor - out_shape (array): shape for `out` tensor - out_strides (array): strides for `out` tensor - a_storage (array): storage for `a` tensor - a_shape (array): shape for `a` tensor - a_strides (array): strides for `a` tensor - b_storage (array): storage for `b` tensor - b_shape (array): shape for `b` tensor - b_strides (array): strides for `b` tensor Returns: - None : Fills in `out` + ------- + Tensor zip function. + """ def _zip( @@ -324,25 +316,22 @@ def _zip( return _zip -def tensor_reduce(fn: Callable[[float, float], float]) -> Any: - """ - Low-level implementation of tensor reduce. +def tensor_reduce( + fn: Callable[[float, float], float], +) -> Callable[[Storage, Shape, Strides, Storage, Shape, Strides, int], None]: + """Low-level implementation of tensor reduce. * `out_shape` will be the same as `a_shape` except with `reduce_dim` turned to size `1` Args: + ---- fn: reduction function mapping two floats to float - out (array): storage for `out` tensor - out_shape (array): shape for `out` tensor - out_strides (array): strides for `out` tensor - a_storage (array): storage for `a` tensor - a_shape (array): shape for `a` tensor - a_strides (array): strides for `a` tensor - reduce_dim (int): dimension to reduce out Returns: - None : Fills in `out` + ------- + Tensor reduce function. + """ def _reduce( diff --git a/project/app.py b/project/app.py index 5ed4ec27..7251c682 100644 --- a/project/app.py +++ b/project/app.py @@ -19,9 +19,7 @@ st.sidebar.markdown( """