Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
// Update 'VARIANT' to pick a Python version: 3, 3.9, 3.8, 3.7, 3.6.
// Append -bullseye or -buster to pin to an OS version.
// Use -bullseye variants on local on arm64/Apple Silicon.
"VARIANT": "3.7",
"VARIANT": "3.9",
// Options
"NODE_VERSION": "lts/*"
}
Expand All @@ -29,12 +29,13 @@
"python.linting.mypyPath": "/usr/local/py-utils/bin/mypy",
"python.linting.pycodestylePath": "/usr/local/py-utils/bin/pycodestyle",
"python.linting.pydocstylePath": "/usr/local/py-utils/bin/pydocstyle",
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint",
"python.linting.pylintPath": "/usr/local/py-utils/bin/pylint"
},
// Add the IDs of extensions you want installed when the container is created.
"extensions": [
"ms-python.python",
"ms-python.vscode-pylance"
"ms-python.vscode-pylance",
"elagil.pre-commit-helper"
],
// Use 'forwardPorts' to make a list of ports inside the container available locally.
"forwardPorts": [
Expand Down
6 changes: 3 additions & 3 deletions .github/workflows/pre-commit.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,11 +7,11 @@ env:
LC_ALL: "C.UTF-8"
jobs:
pre-commit:
runs-on: ubuntu-20.04
runs-on: ubuntu-22.04
strategy:
matrix:
os: [ubuntu-20.04, macos-latest]
python-version: [3.7, 3.8]
os: [ubuntu-22.04, macos-latest]
python-version: [3.8, 3.9]

steps:
- uses: actions/checkout@v2
Expand Down
6 changes: 3 additions & 3 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,12 +1,12 @@
repos:
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.0.1
rev: v4.4.0
hooks:
- id: check-yaml
- id: end-of-file-fixer
- id: trailing-whitespace
- repo: https://github.com/pre-commit/mirrors-yapf
rev: "v0.31.0"
rev: "v0.32.0"
hooks:
- id: yapf
args: ["--style=.style.yapf", "--parallel", "--in-place"]
Expand All @@ -15,7 +15,7 @@ repos:
hooks:
- id: seed-isort-config
- repo: https://github.com/pycqa/isort
rev: 5.9.3
rev: 5.12.0
hooks:
- id: isort
name: isort (python)
Expand Down
42 changes: 39 additions & 3 deletions picatrix/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,12 @@
# limitations under the License.
"""Sets up Picatrix environment."""

from typing import Optional, Text, Tuple
from typing import Dict, Optional, Text, Tuple

import pandas as pd

from .lib.namespace import (
AccessorNamespaceTemplate,
FeatureContext,
FeatureNamespace,
Function,
Expand Down Expand Up @@ -112,6 +115,39 @@ def new_cell_magic(func: Function, name: Optional[Text] = None):
px.add_cell_magic(func, name if name else func.__name__)


_accessor_namespaces: Dict[Text, AccessorNamespaceTemplate] = {}


def new_accessor_namespace(
name: Text, docstring: Optional[Text] = None) -> AccessorNamespaceTemplate:
"""Adds a new Pandas DataFrame accessor namespace.

Function returns an AccessorNamespaceTemplate which exposes `.add` that
can be used to add functions (accessors) to the namespace.
Newly added namespace will later be available as <dataframe_obj>.<name>
and its accessors as <dataframe_obj>.<name>.<accessor_name>.

Args:
name: name of the the namespace
docstring: a string describing the functionalities of the namespace

Returns:
AccessorNamespaceTemplate: a template to be used for spawning accessor
namespaces
"""
if not docstring:
docstring = f"Group of namespaces related to \"{name}\""

template = AccessorNamespaceTemplate(docstring)
reg = pd.api.extensions.register_dataframe_accessor # type: ignore
reg(name)(template.create)

_accessor_namespaces[name] = template
return template


# shouldn't be exported
del Optional, Text, Tuple # type: ignore
del FeatureContext, FeatureNamespace, Function, RootContext, RootNamespace,
del Dict, Optional, Text, Tuple
del AccessorNamespaceTemplate, FeatureContext, FeatureNamespace,
del Function, RootContext, RootNamespace
del pd
115 changes: 100 additions & 15 deletions picatrix/lib/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
"""Types and functions defining Picatrix namespacing."""

from difflib import get_close_matches
from functools import wraps
from inspect import cleandoc
from types import SimpleNamespace
from typing import (
Expand Down Expand Up @@ -96,16 +97,14 @@ def _as_df_record(name: Text, item: Any, with_doc: bool,
return record


class Namespace(SimpleNamespace, Generic[A]):
"""Key-value type of structure with items accessible as attributes."""
class BareNamespace(SimpleNamespace, Generic[A]):
"""Minimal key-value type of structure with items accessible as attributes."""

name: Text
__dict__: Dict[Text, A]

def __init__(self, name: Text, docstring: Text, **kwargs: A):
def __init__(self, docstring: Text, **kwargs: Any):
super().__init__(**kwargs)
self.__doc__ = cleandoc(docstring)
self.name = name

def __setattr__(self, key: Text, value: A):
if not key.isidentifier():
Expand All @@ -123,7 +122,7 @@ def __setattr__(self, key: Text, value: A):

def __getattr__(self, key: Text) -> A:
if key in self:
return super().__getattr__(key)
return super().__getattribute__(key)
else:
raise NamespaceKeyMissingError(key, self.keys())

Expand All @@ -139,6 +138,26 @@ def __iter__(self) -> Iterator[Text]:
def __contains__(self, key: Text):
return self.__dict__.__contains__(key)

def _add(self, key: Text, value: A):
"""Adds a new value under the key.

Raises:
NamespaceKeyExistsError: when required key already exists
NamespaceKeyError: when key is invalid, e.g. isn't Python identifier
"""
setattr(self, key, value)


class Namespace(BareNamespace[A]):
"""Key-value type of structure with items accessible as attributes."""

name: Text
__dict__: Dict[Text, A]

def __init__(self, name: Text, docstring: Text, **kwargs: A):
super().__init__(docstring, **kwargs)
self.name = name

def keys(self) -> Iterator[Text]:
"""Iterator over all of the keys in the namespace."""
return iter(self.__dict__.keys())
Expand Down Expand Up @@ -183,15 +202,6 @@ def search(self, keyword: Text) -> pandas.DataFrame:
return df[df.Name.str.contains(keyword) | # type: ignore
df.Docstring.str.contains(keyword)] # type: ignore

def _add(self, key: Text, value: A):
"""Adds a new value under the key.

Raises:
NamespaceKeyExistsError: when required key already exists
NamespaceKeyError: when key is invalid, e.g. isn't Python identifier
"""
setattr(self, key, value)

def get(self, key: Text, default: A) -> A:
"""Return the value for key if key is in the namespace, else default."""
if key in self:
Expand Down Expand Up @@ -383,3 +393,78 @@ def add_namespace(
ctx = FeatureContext(name=key, docstring=docstring)
self._add(name, ctx)
return ctx


PandasAccessor = Callable[..., Any]
"""Type representation of a pandas DataFrame accessor.

The definition of this type should be `Callable[[pd.DataFrame, ...], Any]`
(meaning first argument is a DataFrame and rest is up to the implementer)
but it isn't allowed by Python typing system."""

PandasAccessorValidator = Callable[[pandas.DataFrame], bool]
"""Function validating if an accessor is applicable to a specific DataFrame."""

AccessorDef = Tuple[Text, PandasAccessorValidator, PandasAccessor]
"""Definition of the accessor, i.e. name, validator and the accessor itself."""


def _accessor_wrapper(f: PandasAccessor, df: pandas.DataFrame):

@wraps(f)
def _inner(*args: Any, **kwargs: Any):
return f(df, *args, **kwargs)

return _inner


class AccessorNamespace(BareNamespace[PandasAccessor]):
"""Namespace holding pandas DataFrame accessors."""

def __init__(
self, docstring: Text, df: pandas.DataFrame, fs: List[AccessorDef]):
super().__init__(docstring)

valid = False
for name, validate, accessor in fs:
if validate(df):
self._add(name, _accessor_wrapper(accessor, df))
valid = True

if not valid:
raise AttributeError(
"DataFrame doesn't match requirments of any of the accessors.")


class AccessorNamespaceTemplate:
"""Holds parameters to be used to create AccessorNamespace."""
docstring: Text
functions: Dict[Text, Tuple[PandasAccessorValidator, PandasAccessor]]

def __init__(self, docstring: Text):
self.docstring = docstring
self.functions = {}

def add_accessor(
self,
name: Optional[Text] = None,
validator: PandasAccessorValidator = lambda _: True,
) -> Callable[[PandasAccessor], None]:
"""A decorator for adding accessor to the namespace."""

def _inner(accessor: PandasAccessor):

key = name if name else accessor.__name__

if key in self.functions:
raise KeyError(f"Accessor \"{key}\" already exists.")
else:
self.functions[key] = (validator, accessor)

return _inner

def create(self, df: pandas.DataFrame) -> AccessorNamespace:
"""Creates AccessorNamespace."""
fs = [(n, v, a) for n, (v, a) in self.functions.items()]

return AccessorNamespace(self.docstring, df, fs)
93 changes: 92 additions & 1 deletion picatrix/lib/namespace_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import pandas as pd
import pytest

from .namespace import Namespace, NamespaceKeyError
from .namespace import AccessorNamespaceTemplate, Namespace, NamespaceKeyError


def test_invalid_key():
Expand Down Expand Up @@ -107,3 +107,94 @@ def test_to_frame():
])
got = n.to_frame(with_doc=True)
assert want.equals(got)


def test_accessor_template_add_accessor():
"""Test adding accessors to AccessorNamespaces through templates."""

ant = AccessorNamespaceTemplate("Some docstring")
ant.add_accessor("i_never_attach", validator=lambda _: False)(lambda x: x)

with pytest.raises(AttributeError):
_ = ant.create(
pd.DataFrame.from_records(
[
{
"name": "something",
"value": 11
},
{
"name": "anything",
"value": 15
},
]))

ant.add_accessor("i_always_attach", validator=lambda _: True)(lambda x: x)
ant.add_accessor(
"i_conditionally_attach",
validator=lambda df: "abracadabra" in df.columns)(lambda x: x)

ns1 = ant.create(
pd.DataFrame.from_records(
[
{
"name": "something",
"value": 11
},
{
"name": "anything",
"value": 15
},
]))

assert "i_always_attach" in ns1
assert "i_never_attach" not in ns1
assert "i_conditionally_attach" not in ns1

ns2 = ant.create(
pd.DataFrame.from_records(
[
{
"name": "something",
"value": 11,
"abracadabra": "alakazam"
},
{
"name": "anything",
"value": 15,
"abracadabra": "hocus pocus"
},
]))

assert "i_always_attach" in ns2
assert "i_never_attach" not in ns2
assert "i_conditionally_attach" in ns2


def test_accessor_template_call_accessor():
"""Test calling the accessor in AccessorNamepace created out of template."""

ant = AccessorNamespaceTemplate("Some docstring")
ant.add_accessor(
"echo", validator=lambda _: True)(lambda df, a, b, c: (df, a, b, c))

df = pd.DataFrame.from_records(
[
{
"name": "something",
"value": 11,
"abracadabra": "alakazam"
},
{
"name": "anything",
"value": 15,
"abracadabra": "hocus pocus"
},
])
a, b, c = 1, 2, 3

ns = ant.create(df)

df_, a_, b_, c_ = ns.echo(a, b, c)

assert df.equals(df_) and a == a_ and b == b_ and c == c_
Loading