Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions extras/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,9 @@ dependencies:
- ipytest=0.12.*
- pandas=1.*.*
- plotly=5.*.*
- pip
- pip:
- pygount~=1.4.0
- statsmodels=0.*.*
# https://plotly.com/python/static-image-export/
- python-kaleido
Expand Down
20 changes: 20 additions & 0 deletions extras/scripts/fixtures.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
import os
import pytest

from .nb_helper import notebook_to_script, read_notebook


@pytest.fixture()
def nb_full_path():
print(os.getenv('PYTEST_CURRENT_TEST'))
return os.path.join(os.getcwd(), "hw_6.ipynb")


@pytest.fixture()
def notebook(nb_full_path):
return read_notebook(nb_full_path)


@pytest.fixture()
def script(notebook):
return notebook_to_script(notebook)
103 changes: 6 additions & 97 deletions extras/scripts/hw_6_check.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,9 @@
# Corresponds to the Requirements for homework 6. Requires cloc and nbconvert. Usage:
#
# python3 ./extras/scripts/hw_6_check.py <assignment>.ipynb
# Helper file for homework 6. The checks correspond to the Requirements for homework 6.

import ast
import json
import os
import pandas as pd
from pygount import SourceAnalysis
import re
import shlex
import subprocess
import sys

sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from nb_helper import read_notebook, notebook_to_script

MIN_LINES = 40
VIZ_PACKAGES = set(
Expand Down Expand Up @@ -59,59 +50,17 @@ def visit_Call(self, node):
self.is_present = True


def handle_process_err(cmd, err):
if type(cmd) == list:
cmd = shlex.join(cmd)

output = err.stderr.decode("utf-8")
print(
f"{bcolors.FAIL}ERROR{bcolors.ENDC} while running\n\n\t{cmd}\n\n{output}",
file=sys.stderr,
)
sys.exit(err.returncode)


def get_cmd_output(cmd, input=None, shell=False):
try:
process = subprocess.run(
cmd,
capture_output=True,
check=True,
input=bytes(input, "utf-8"),
shell=shell,
)
except subprocess.CalledProcessError as err:
handle_process_err(cmd, err)

return process.stdout


def lines_of_code(code):
output = get_cmd_output(
"cloc --stdin-name=script.py --json -", input=code, shell=True
)
data = json.loads(output)
return data["SUM"]["code"]
def lines_of_code(file_path):
# TODO needs the code as a script, not as the ipynb
results = SourceAnalysis.from_file(file_path, "pygount")
return results.code_count


def code_contains(pattern, code):
matches = re.search(re.compile(pattern, re.VERBOSE), code)
return bool(matches)


def has_link(cell):
pattern = r"https?://"
if cell.cell_type == "code":
# check for URL in comment
pattern = r"^\s*\#.*" + pattern

return code_contains(pattern, cell.source)


def includes_link(cells):
return any(has_link(cell) for cell in cells)


def uses_transform(script):
return code_contains(
r"""\b(
Expand Down Expand Up @@ -144,43 +93,3 @@ def has_plotting(script):
return (
has_overlap(VIZ_PACKAGES, imports_checker.packages) or method_checker.is_present
)


# https://stackoverflow.com/a/287944/358804
class bcolors:
OKGREEN = "\033[92m"
FAIL = "\033[91m"
ENDC = "\033[0m"


def pass_fail(result):
"""Apply ANSI color escape codes"""
color = bcolors.OKGREEN if result else bcolors.FAIL
return f"{color}{result}{bcolors.ENDC}"


def exit(results):
exit_code = 0 if results.all() else 1
sys.exit(exit_code)


if __name__ == "__main__":
notebook_path = sys.argv[1]

notebook = read_notebook(notebook_path)
script = notebook_to_script(notebook)
num_lines = lines_of_code(script)

# use pandas for outputting a table
results = pd.Series(
{
f"Enough lines of code ({num_lines})": num_lines >= MIN_LINES,
"Includes link": includes_link(notebook.cells),
"Uses transform": uses_transform(script),
"Has plotting": has_plotting(script),
}
)

outputs = results.apply(lambda val: pass_fail(val))
print(outputs.to_string())
exit(results)
33 changes: 1 addition & 32 deletions extras/scripts/test_hw_6_check.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import nbformat
from .hw_6_check import has_plotting, includes_link
from .hw_6_check import has_plotting


def test_nothing():
Expand All @@ -20,33 +19,3 @@ def test_plot_method():

def test_plot_submodule():
assert has_plotting("df.plot.scatter()")


def test_includes_link_base():
cells = []
assert not includes_link(cells)


def test_includes_link_missing():
cells = [nbformat.from_dict({"cell_type": "markdown", "source": ""})]
assert not includes_link(cells)


def test_includes_link_markdown():
cells = [
nbformat.from_dict({"cell_type": "markdown", "source": "https://google.com"})
]

assert includes_link(cells)


def test_includes_link_code_only():
cells = [nbformat.from_dict({"cell_type": "code", "source": "https://google.com"})]
assert not includes_link(cells)


def test_includes_link_code_comment():
cells = [
nbformat.from_dict({"cell_type": "code", "source": "# https://google.com"})
]
assert includes_link(cells)
124 changes: 116 additions & 8 deletions hw_6.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -95,14 +95,11 @@
"source": [
"## Once you start\n",
"\n",
"- Create a new notebook to do the actual analysis; that is what you'll turn in. To create, click:\n",
" 1. `File`\n",
" 1. `New notebook`\n",
" 1. `Python [conda env:python-public-policy]`\n",
"- Go back and find any information that's available _around_ the data, to get a better understanding of what it contains and means.\n",
" - Might include a data dictionary\n",
" - Might involve poking around a government agency's web site to understand their processes\n",
" - Understand what all the different columns and values represent"
"go back and find any information that's available _around_ the data, to get a better understanding of what it contains and means.\n",
"\n",
"- Might include a data dictionary\n",
"- Might involve poking around a government agency's web site to understand their processes\n",
"- Understand what all the different columns and values represent"
]
},
{
Expand Down Expand Up @@ -136,6 +133,117 @@
"\n",
"If you answer the first question easily, that's fine; dig into / build off of it. Go deep, not broad."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"Everything above and the tests below are obviously just present for the assignment; you are encouraged to save a copy of the notebook and delete them for sharing with potential employers, etc."
]
},
{
"cell_type": "markdown",
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"source": [
"YOUR CODE AND ANALYSIS HERE"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"slideshow": {
"slide_type": "skip"
}
},
"outputs": [],
"source": [
"import ipytest\n",
"ipytest.autoconfig()"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"slideshow": {
"slide_type": "slide"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\u001b[31mF\u001b[0m\u001b[31mF\u001b[0m\u001b[31mF\u001b[0m\u001b[31m [100%]\u001b[0m\n",
"============================================= FAILURES =============================================\n",
"\u001b[31m\u001b[1m__________________________________________ test_min_lines __________________________________________\u001b[0m\n",
"\n",
"script = '#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n'\n",
"\n",
" \u001b[94mdef\u001b[39;49;00m \u001b[92mtest_min_lines\u001b[39;49;00m(script):\n",
" num_lines = lines_of_code(script)\n",
"> \u001b[94massert\u001b[39;49;00m num_lines >= MIN_LINES, \u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mnotebook must have more than \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mMIN_LINES\u001b[33m}\u001b[39;49;00m\u001b[33m lines of code\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\n",
"\u001b[1m\u001b[31mE AssertionError: notebook must have more than 40 lines of code\u001b[0m\n",
"\u001b[1m\u001b[31mE assert 7 >= 40\u001b[0m\n",
"\n",
"\u001b[1m\u001b[31m/var/folders/kg/1ys0dccx4237f5wsd_w10dt80000gn/T/ipykernel_52701/1909726489.py\u001b[0m:7: AssertionError\n",
"\u001b[31m\u001b[1m________________________________________ test_has_plotting _________________________________________\u001b[0m\n",
"\n",
"script = '#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n'\n",
"\n",
" \u001b[94mdef\u001b[39;49;00m \u001b[92mtest_has_plotting\u001b[39;49;00m(script):\n",
"> \u001b[94massert\u001b[39;49;00m has_plotting(script), \u001b[33m\"\u001b[39;49;00m\u001b[33mnotebook must contain a plot of some kind\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\n",
"\u001b[1m\u001b[31mE AssertionError: notebook must contain a plot of some kind\u001b[0m\n",
"\u001b[1m\u001b[31mE assert False\u001b[0m\n",
"\u001b[1m\u001b[31mE + where False = has_plotting('#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n')\u001b[0m\n",
"\n",
"\u001b[1m\u001b[31m/var/folders/kg/1ys0dccx4237f5wsd_w10dt80000gn/T/ipykernel_52701/1909726489.py\u001b[0m:11: AssertionError\n",
"\u001b[31m\u001b[1m_______________________________________ test_uses_transform ________________________________________\u001b[0m\n",
"\n",
"script = '#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n'\n",
"\n",
" \u001b[94mdef\u001b[39;49;00m \u001b[92mtest_uses_transform\u001b[39;49;00m(script):\n",
"> \u001b[94massert\u001b[39;49;00m uses_transform(script), \u001b[33m\"\u001b[39;49;00m\u001b[33mnotebook must contain a transform of some kind: grouping, reshaping, etc.\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\n",
"\u001b[1m\u001b[31mE AssertionError: notebook must contain a transform of some kind: grouping, reshaping, etc.\u001b[0m\n",
"\u001b[1m\u001b[31mE assert False\u001b[0m\n",
"\u001b[1m\u001b[31mE + where False = uses_transform('#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n')\u001b[0m\n",
"\n",
"\u001b[1m\u001b[31m/var/folders/kg/1ys0dccx4237f5wsd_w10dt80000gn/T/ipykernel_52701/1909726489.py\u001b[0m:15: AssertionError\n",
"===================================== short test summary info ======================================\n",
"FAILED tmp1ioay2j3.py::test_min_lines - AssertionError: notebook must have more than 40 lines of ...\n",
"FAILED tmp1ioay2j3.py::test_has_plotting - AssertionError: notebook must contain a plot of some kind\n",
"FAILED tmp1ioay2j3.py::test_uses_transform - AssertionError: notebook must contain a transform of...\n"
]
}
],
"source": [
"%%ipytest -qq\n",
"\n",
"from extras.scripts.fixtures import *\n",
"from extras.scripts.hw_6_check import lines_of_code, MIN_LINES, has_plotting, uses_transform\n",
"\n",
"\n",
"def test_min_lines(script):\n",
" num_lines = lines_of_code(script)\n",
" assert num_lines >= MIN_LINES, f\"notebook must have more than {MIN_LINES} lines of code\"\n",
" \n",
"\n",
"def test_has_plotting(script):\n",
" assert has_plotting(script), \"notebook must contain a plot of some kind\"\n",
"\n",
" \n",
"def test_uses_transform(script):\n",
" assert uses_transform(script), \"notebook must contain a transform of some kind: grouping, reshaping, etc.\""
]
}
],
"metadata": {
Expand Down