diff --git a/extras/environment.yml b/extras/environment.yml index ca216870..3081dcc7 100644 --- a/extras/environment.yml +++ b/extras/environment.yml @@ -17,6 +17,9 @@ dependencies: - ipytest=0.12.* - pandas=1.*.* - plotly=5.*.* + - pip + - pip: + - pygount~=1.4.0 - statsmodels=0.*.* # https://plotly.com/python/static-image-export/ - python-kaleido diff --git a/extras/scripts/fixtures.py b/extras/scripts/fixtures.py new file mode 100644 index 00000000..43e1aa37 --- /dev/null +++ b/extras/scripts/fixtures.py @@ -0,0 +1,20 @@ +import os +import pytest + +from .nb_helper import notebook_to_script, read_notebook + + +@pytest.fixture() +def nb_full_path(): + print(os.getenv('PYTEST_CURRENT_TEST')) + return os.path.join(os.getcwd(), "hw_6.ipynb") + + +@pytest.fixture() +def notebook(nb_full_path): + return read_notebook(nb_full_path) + + +@pytest.fixture() +def script(notebook): + return notebook_to_script(notebook) diff --git a/extras/scripts/hw_6_check.py b/extras/scripts/hw_6_check.py index f489a121..3e1e25ba 100644 --- a/extras/scripts/hw_6_check.py +++ b/extras/scripts/hw_6_check.py @@ -1,18 +1,9 @@ -# Corresponds to the Requirements for homework 6. Requires cloc and nbconvert. Usage: -# -# python3 ./extras/scripts/hw_6_check.py .ipynb +# Helper file for homework 6. The checks correspond to the Requirements for homework 6. import ast -import json -import os -import pandas as pd +from pygount import SourceAnalysis import re -import shlex -import subprocess -import sys -sys.path.append(os.path.dirname(os.path.abspath(__file__))) -from nb_helper import read_notebook, notebook_to_script MIN_LINES = 40 VIZ_PACKAGES = set( @@ -59,39 +50,10 @@ def visit_Call(self, node): self.is_present = True -def handle_process_err(cmd, err): - if type(cmd) == list: - cmd = shlex.join(cmd) - - output = err.stderr.decode("utf-8") - print( - f"{bcolors.FAIL}ERROR{bcolors.ENDC} while running\n\n\t{cmd}\n\n{output}", - file=sys.stderr, - ) - sys.exit(err.returncode) - - -def get_cmd_output(cmd, input=None, shell=False): - try: - process = subprocess.run( - cmd, - capture_output=True, - check=True, - input=bytes(input, "utf-8"), - shell=shell, - ) - except subprocess.CalledProcessError as err: - handle_process_err(cmd, err) - - return process.stdout - - -def lines_of_code(code): - output = get_cmd_output( - "cloc --stdin-name=script.py --json -", input=code, shell=True - ) - data = json.loads(output) - return data["SUM"]["code"] +def lines_of_code(file_path): + # TODO needs the code as a script, not as the ipynb + results = SourceAnalysis.from_file(file_path, "pygount") + return results.code_count def code_contains(pattern, code): @@ -99,19 +61,6 @@ def code_contains(pattern, code): return bool(matches) -def has_link(cell): - pattern = r"https?://" - if cell.cell_type == "code": - # check for URL in comment - pattern = r"^\s*\#.*" + pattern - - return code_contains(pattern, cell.source) - - -def includes_link(cells): - return any(has_link(cell) for cell in cells) - - def uses_transform(script): return code_contains( r"""\b( @@ -144,43 +93,3 @@ def has_plotting(script): return ( has_overlap(VIZ_PACKAGES, imports_checker.packages) or method_checker.is_present ) - - -# https://stackoverflow.com/a/287944/358804 -class bcolors: - OKGREEN = "\033[92m" - FAIL = "\033[91m" - ENDC = "\033[0m" - - -def pass_fail(result): - """Apply ANSI color escape codes""" - color = bcolors.OKGREEN if result else bcolors.FAIL - return f"{color}{result}{bcolors.ENDC}" - - -def exit(results): - exit_code = 0 if results.all() else 1 - sys.exit(exit_code) - - -if __name__ == "__main__": - notebook_path = sys.argv[1] - - notebook = read_notebook(notebook_path) - script = notebook_to_script(notebook) - num_lines = lines_of_code(script) - - # use pandas for outputting a table - results = pd.Series( - { - f"Enough lines of code ({num_lines})": num_lines >= MIN_LINES, - "Includes link": includes_link(notebook.cells), - "Uses transform": uses_transform(script), - "Has plotting": has_plotting(script), - } - ) - - outputs = results.apply(lambda val: pass_fail(val)) - print(outputs.to_string()) - exit(results) diff --git a/extras/scripts/test_hw_6_check.py b/extras/scripts/test_hw_6_check.py index 81b0f26d..7e302e49 100644 --- a/extras/scripts/test_hw_6_check.py +++ b/extras/scripts/test_hw_6_check.py @@ -1,5 +1,4 @@ -import nbformat -from .hw_6_check import has_plotting, includes_link +from .hw_6_check import has_plotting def test_nothing(): @@ -20,33 +19,3 @@ def test_plot_method(): def test_plot_submodule(): assert has_plotting("df.plot.scatter()") - - -def test_includes_link_base(): - cells = [] - assert not includes_link(cells) - - -def test_includes_link_missing(): - cells = [nbformat.from_dict({"cell_type": "markdown", "source": ""})] - assert not includes_link(cells) - - -def test_includes_link_markdown(): - cells = [ - nbformat.from_dict({"cell_type": "markdown", "source": "https://google.com"}) - ] - - assert includes_link(cells) - - -def test_includes_link_code_only(): - cells = [nbformat.from_dict({"cell_type": "code", "source": "https://google.com"})] - assert not includes_link(cells) - - -def test_includes_link_code_comment(): - cells = [ - nbformat.from_dict({"cell_type": "code", "source": "# https://google.com"}) - ] - assert includes_link(cells) diff --git a/hw_6.ipynb b/hw_6.ipynb index 2a80e1a4..2a78e5b2 100644 --- a/hw_6.ipynb +++ b/hw_6.ipynb @@ -95,14 +95,11 @@ "source": [ "## Once you start\n", "\n", - "- Create a new notebook to do the actual analysis; that is what you'll turn in. To create, click:\n", - " 1. `File`\n", - " 1. `New notebook`\n", - " 1. `Python [conda env:python-public-policy]`\n", - "- Go back and find any information that's available _around_ the data, to get a better understanding of what it contains and means.\n", - " - Might include a data dictionary\n", - " - Might involve poking around a government agency's web site to understand their processes\n", - " - Understand what all the different columns and values represent" + "go back and find any information that's available _around_ the data, to get a better understanding of what it contains and means.\n", + "\n", + "- Might include a data dictionary\n", + "- Might involve poking around a government agency's web site to understand their processes\n", + "- Understand what all the different columns and values represent" ] }, { @@ -136,6 +133,117 @@ "\n", "If you answer the first question easily, that's fine; dig into / build off of it. Go deep, not broad." ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "Everything above and the tests below are obviously just present for the assignment; you are encouraged to save a copy of the notebook and delete them for sharing with potential employers, etc." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "source": [ + "YOUR CODE AND ANALYSIS HERE" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": { + "slideshow": { + "slide_type": "skip" + } + }, + "outputs": [], + "source": [ + "import ipytest\n", + "ipytest.autoconfig()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": { + "slideshow": { + "slide_type": "slide" + } + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[31mF\u001b[0m\u001b[31mF\u001b[0m\u001b[31mF\u001b[0m\u001b[31m [100%]\u001b[0m\n", + "============================================= FAILURES =============================================\n", + "\u001b[31m\u001b[1m__________________________________________ test_min_lines __________________________________________\u001b[0m\n", + "\n", + "script = '#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n'\n", + "\n", + " \u001b[94mdef\u001b[39;49;00m \u001b[92mtest_min_lines\u001b[39;49;00m(script):\n", + " num_lines = lines_of_code(script)\n", + "> \u001b[94massert\u001b[39;49;00m num_lines >= MIN_LINES, \u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mnotebook must have more than \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mMIN_LINES\u001b[33m}\u001b[39;49;00m\u001b[33m lines of code\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\n", + "\u001b[1m\u001b[31mE AssertionError: notebook must have more than 40 lines of code\u001b[0m\n", + "\u001b[1m\u001b[31mE assert 7 >= 40\u001b[0m\n", + "\n", + "\u001b[1m\u001b[31m/var/folders/kg/1ys0dccx4237f5wsd_w10dt80000gn/T/ipykernel_52701/1909726489.py\u001b[0m:7: AssertionError\n", + "\u001b[31m\u001b[1m________________________________________ test_has_plotting _________________________________________\u001b[0m\n", + "\n", + "script = '#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n'\n", + "\n", + " \u001b[94mdef\u001b[39;49;00m \u001b[92mtest_has_plotting\u001b[39;49;00m(script):\n", + "> \u001b[94massert\u001b[39;49;00m has_plotting(script), \u001b[33m\"\u001b[39;49;00m\u001b[33mnotebook must contain a plot of some kind\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\n", + "\u001b[1m\u001b[31mE AssertionError: notebook must contain a plot of some kind\u001b[0m\n", + "\u001b[1m\u001b[31mE assert False\u001b[0m\n", + "\u001b[1m\u001b[31mE + where False = has_plotting('#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n')\u001b[0m\n", + "\n", + "\u001b[1m\u001b[31m/var/folders/kg/1ys0dccx4237f5wsd_w10dt80000gn/T/ipykernel_52701/1909726489.py\u001b[0m:11: AssertionError\n", + "\u001b[31m\u001b[1m_______________________________________ test_uses_transform ________________________________________\u001b[0m\n", + "\n", + "script = '#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n'\n", + "\n", + " \u001b[94mdef\u001b[39;49;00m \u001b[92mtest_uses_transform\u001b[39;49;00m(script):\n", + "> \u001b[94massert\u001b[39;49;00m uses_transform(script), \u001b[33m\"\u001b[39;49;00m\u001b[33mnotebook must contain a transform of some kind: grouping, reshaping, etc.\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\n", + "\u001b[1m\u001b[31mE AssertionError: notebook must contain a transform of some kind: grouping, reshaping, etc.\u001b[0m\n", + "\u001b[1m\u001b[31mE assert False\u001b[0m\n", + "\u001b[1m\u001b[31mE + where False = uses_transform('#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n')\u001b[0m\n", + "\n", + "\u001b[1m\u001b[31m/var/folders/kg/1ys0dccx4237f5wsd_w10dt80000gn/T/ipykernel_52701/1909726489.py\u001b[0m:15: AssertionError\n", + "===================================== short test summary info ======================================\n", + "FAILED tmp1ioay2j3.py::test_min_lines - AssertionError: notebook must have more than 40 lines of ...\n", + "FAILED tmp1ioay2j3.py::test_has_plotting - AssertionError: notebook must contain a plot of some kind\n", + "FAILED tmp1ioay2j3.py::test_uses_transform - AssertionError: notebook must contain a transform of...\n" + ] + } + ], + "source": [ + "%%ipytest -qq\n", + "\n", + "from extras.scripts.fixtures import *\n", + "from extras.scripts.hw_6_check import lines_of_code, MIN_LINES, has_plotting, uses_transform\n", + "\n", + "\n", + "def test_min_lines(script):\n", + " num_lines = lines_of_code(script)\n", + " assert num_lines >= MIN_LINES, f\"notebook must have more than {MIN_LINES} lines of code\"\n", + " \n", + "\n", + "def test_has_plotting(script):\n", + " assert has_plotting(script), \"notebook must contain a plot of some kind\"\n", + "\n", + " \n", + "def test_uses_transform(script):\n", + " assert uses_transform(script), \"notebook must contain a transform of some kind: grouping, reshaping, etc.\"" + ] } ], "metadata": {