afeld · afeld · Apr 26, 2022 · Apr 26, 2022
diff --git a/extras/environment.yml b/extras/environment.yml
@@ -17,6 +17,9 @@ dependencies:
   - ipytest=0.12.*
   - pandas=1.*.*
   - plotly=5.*.*
+  - pip
+  - pip:
+      - pygount~=1.4.0
   - statsmodels=0.*.*
   # https://plotly.com/python/static-image-export/
   - python-kaleido

diff --git a/extras/scripts/fixtures.py b/extras/scripts/fixtures.py
@@ -0,0 +1,20 @@
+import os
+import pytest
+
+from .nb_helper import notebook_to_script, read_notebook
+
+
+@pytest.fixture()
+def nb_full_path():
+    print(os.getenv('PYTEST_CURRENT_TEST'))
+    return os.path.join(os.getcwd(), "hw_6.ipynb")
+
+
+@pytest.fixture()
+def notebook(nb_full_path):
+    return read_notebook(nb_full_path)
+
+
+@pytest.fixture()
+def script(notebook):
+    return notebook_to_script(notebook)
diff --git a/extras/scripts/hw_6_check.py b/extras/scripts/hw_6_check.py
@@ -1,18 +1,9 @@
-# Corresponds to the Requirements for homework 6. Requires cloc and nbconvert. Usage:
-#
-#   python3 ./extras/scripts/hw_6_check.py <assignment>.ipynb
+# Helper file for homework 6. The checks correspond to the Requirements for homework 6.
 
 import ast
-import json
-import os
-import pandas as pd
+from pygount import SourceAnalysis
 import re
-import shlex
-import subprocess
-import sys
 
-sys.path.append(os.path.dirname(os.path.abspath(__file__)))
-from nb_helper import read_notebook, notebook_to_script
 
 MIN_LINES = 40
 VIZ_PACKAGES = set(
@@ -59,59 +50,17 @@ def visit_Call(self, node):
             self.is_present = True
 
 
-def handle_process_err(cmd, err):
-    if type(cmd) == list:
-        cmd = shlex.join(cmd)
-
-    output = err.stderr.decode("utf-8")
-    print(
-        f"{bcolors.FAIL}ERROR{bcolors.ENDC} while running\n\n\t{cmd}\n\n{output}",
-        file=sys.stderr,
-    )
-    sys.exit(err.returncode)
-
-
-def get_cmd_output(cmd, input=None, shell=False):
-    try:
-        process = subprocess.run(
-            cmd,
-            capture_output=True,
-            check=True,
-            input=bytes(input, "utf-8"),
-            shell=shell,
-        )
-    except subprocess.CalledProcessError as err:
-        handle_process_err(cmd, err)
-
-    return process.stdout
-
-
-def lines_of_code(code):
-    output = get_cmd_output(
-        "cloc --stdin-name=script.py --json -", input=code, shell=True
-    )
-    data = json.loads(output)
-    return data["SUM"]["code"]
+def lines_of_code(file_path):
+    # TODO needs the code as a script, not as the ipynb
+    results = SourceAnalysis.from_file(file_path, "pygount")
+    return results.code_count
 
 
 def code_contains(pattern, code):
     matches = re.search(re.compile(pattern, re.VERBOSE), code)
     return bool(matches)
 
 
-def has_link(cell):
-    pattern = r"https?://"
-    if cell.cell_type == "code":
-        # check for URL in comment
-        pattern = r"^\s*\#.*" + pattern
-
-    return code_contains(pattern, cell.source)
-
-
-def includes_link(cells):
-    return any(has_link(cell) for cell in cells)
-
-
 def uses_transform(script):
     return code_contains(
         r"""\b(
@@ -144,43 +93,3 @@ def has_plotting(script):
     return (
         has_overlap(VIZ_PACKAGES, imports_checker.packages) or method_checker.is_present
     )
-
-
-# https://stackoverflow.com/a/287944/358804
-class bcolors:
-    OKGREEN = "\033[92m"
-    FAIL = "\033[91m"
-    ENDC = "\033[0m"
-
-
-def pass_fail(result):
-    """Apply ANSI color escape codes"""
-    color = bcolors.OKGREEN if result else bcolors.FAIL
-    return f"{color}{result}{bcolors.ENDC}"
-
-
-def exit(results):
-    exit_code = 0 if results.all() else 1
-    sys.exit(exit_code)
-
-
-if __name__ == "__main__":
-    notebook_path = sys.argv[1]
-
-    notebook = read_notebook(notebook_path)
-    script = notebook_to_script(notebook)
-    num_lines = lines_of_code(script)
-
-    # use pandas for outputting a table
-    results = pd.Series(
-        {
-            f"Enough lines of code ({num_lines})": num_lines >= MIN_LINES,
-            "Includes link": includes_link(notebook.cells),
-            "Uses transform": uses_transform(script),
-            "Has plotting": has_plotting(script),
-        }
-    )
-
-    outputs = results.apply(lambda val: pass_fail(val))
-    print(outputs.to_string())
-    exit(results)
diff --git a/extras/scripts/test_hw_6_check.py b/extras/scripts/test_hw_6_check.py
@@ -1,5 +1,4 @@
-import nbformat
-from .hw_6_check import has_plotting, includes_link
+from .hw_6_check import has_plotting
 
 
 def test_nothing():
@@ -20,33 +19,3 @@ def test_plot_method():
 
 def test_plot_submodule():
     assert has_plotting("df.plot.scatter()")
-
-
-def test_includes_link_base():
-    cells = []
-    assert not includes_link(cells)
-
-
-def test_includes_link_missing():
-    cells = [nbformat.from_dict({"cell_type": "markdown", "source": ""})]
-    assert not includes_link(cells)
-
-
-def test_includes_link_markdown():
-    cells = [
-        nbformat.from_dict({"cell_type": "markdown", "source": "https://google.com"})
-    ]
-
-    assert includes_link(cells)
-
-
-def test_includes_link_code_only():
-    cells = [nbformat.from_dict({"cell_type": "code", "source": "https://google.com"})]
-    assert not includes_link(cells)
-
-
-def test_includes_link_code_comment():
-    cells = [
-        nbformat.from_dict({"cell_type": "code", "source": "# https://google.com"})
-    ]
-    assert includes_link(cells)
diff --git a/hw_6.ipynb b/hw_6.ipynb
@@ -95,14 +95,11 @@
    "source": [
     "## Once you start\n",
     "\n",
-    "- Create a new notebook to do the actual analysis; that is what you'll turn in. To create, click:\n",
-    "  1. `File`\n",
-    "  1. `New notebook`\n",
-    "  1. `Python [conda env:python-public-policy]`\n",
-    "- Go back and find any information that's available _around_ the data, to get a better understanding of what it contains and means.\n",
-    "  - Might include a data dictionary\n",
-    "  - Might involve poking around a government agency's web site to understand their processes\n",
-    "  - Understand what all the different columns and values represent"
+    "go back and find any information that's available _around_ the data, to get a better understanding of what it contains and means.\n",
+    "\n",
+    "- Might include a data dictionary\n",
+    "- Might involve poking around a government agency's web site to understand their processes\n",
+    "- Understand what all the different columns and values represent"
    ]
   },
   {
@@ -136,6 +133,117 @@
     "\n",
     "If you answer the first question easily, that's fine; dig into / build off of it. Go deep, not broad."
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "Everything above and the tests below are obviously just present for the assignment; you are encouraged to save a copy of the notebook and delete them for sharing with potential employers, etc."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "source": [
+    "YOUR CODE AND ANALYSIS HERE"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "skip"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "import ipytest\n",
+    "ipytest.autoconfig()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {
+    "slideshow": {
+     "slide_type": "slide"
+    }
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[31mF\u001b[0m\u001b[31mF\u001b[0m\u001b[31mF\u001b[0m\u001b[31m                                                                                          [100%]\u001b[0m\n",
+      "============================================= FAILURES =============================================\n",
+      "\u001b[31m\u001b[1m__________________________________________ test_min_lines __________________________________________\u001b[0m\n",
+      "\n",
+      "script = '#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n'\n",
+      "\n",
+      "    \u001b[94mdef\u001b[39;49;00m \u001b[92mtest_min_lines\u001b[39;49;00m(script):\n",
+      "        num_lines = lines_of_code(script)\n",
+      ">       \u001b[94massert\u001b[39;49;00m num_lines >= MIN_LINES, \u001b[33mf\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\u001b[33mnotebook must have more than \u001b[39;49;00m\u001b[33m{\u001b[39;49;00mMIN_LINES\u001b[33m}\u001b[39;49;00m\u001b[33m lines of code\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\n",
+      "\u001b[1m\u001b[31mE       AssertionError: notebook must have more than 40 lines of code\u001b[0m\n",
+      "\u001b[1m\u001b[31mE       assert 7 >= 40\u001b[0m\n",
+      "\n",
+      "\u001b[1m\u001b[31m/var/folders/kg/1ys0dccx4237f5wsd_w10dt80000gn/T/ipykernel_52701/1909726489.py\u001b[0m:7: AssertionError\n",
+      "\u001b[31m\u001b[1m________________________________________ test_has_plotting _________________________________________\u001b[0m\n",
+      "\n",
+      "script = '#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n'\n",
+      "\n",
+      "    \u001b[94mdef\u001b[39;49;00m \u001b[92mtest_has_plotting\u001b[39;49;00m(script):\n",
+      ">       \u001b[94massert\u001b[39;49;00m has_plotting(script), \u001b[33m\"\u001b[39;49;00m\u001b[33mnotebook must contain a plot of some kind\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\n",
+      "\u001b[1m\u001b[31mE       AssertionError: notebook must contain a plot of some kind\u001b[0m\n",
+      "\u001b[1m\u001b[31mE       assert False\u001b[0m\n",
+      "\u001b[1m\u001b[31mE        +  where False = has_plotting('#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n')\u001b[0m\n",
+      "\n",
+      "\u001b[1m\u001b[31m/var/folders/kg/1ys0dccx4237f5wsd_w10dt80000gn/T/ipykernel_52701/1909726489.py\u001b[0m:11: AssertionError\n",
+      "\u001b[31m\u001b[1m_______________________________________ test_uses_transform ________________________________________\u001b[0m\n",
+      "\n",
+      "script = '#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n'\n",
+      "\n",
+      "    \u001b[94mdef\u001b[39;49;00m \u001b[92mtest_uses_transform\u001b[39;49;00m(script):\n",
+      ">       \u001b[94massert\u001b[39;49;00m uses_transform(script), \u001b[33m\"\u001b[39;49;00m\u001b[33mnotebook must contain a transform of some kind: grouping, reshaping, etc.\u001b[39;49;00m\u001b[33m\"\u001b[39;49;00m\n",
+      "\u001b[1m\u001b[31mE       AssertionError: notebook must contain a transform of some kind: grouping, reshaping, etc.\u001b[0m\n",
+      "\u001b[1m\u001b[31mE       assert False\u001b[0m\n",
+      "\u001b[1m\u001b[31mE        +  where False = uses_transform('#!/usr/bin/env python\\n# coding: utf-8\\n\\n# # Homework 0\\n# \\n# [Kaggle](https://www.kaggle.com/) is a data science p...n` under [`HW0 questions` in Discussions](https://brightspace.nyu.edu/d2l/le/156784/discussions/topics/281271/View).\\n')\u001b[0m\n",
+      "\n",
+      "\u001b[1m\u001b[31m/var/folders/kg/1ys0dccx4237f5wsd_w10dt80000gn/T/ipykernel_52701/1909726489.py\u001b[0m:15: AssertionError\n",
+      "===================================== short test summary info ======================================\n",
+      "FAILED tmp1ioay2j3.py::test_min_lines - AssertionError: notebook must have more than 40 lines of ...\n",
+      "FAILED tmp1ioay2j3.py::test_has_plotting - AssertionError: notebook must contain a plot of some kind\n",
+      "FAILED tmp1ioay2j3.py::test_uses_transform - AssertionError: notebook must contain a transform of...\n"
+     ]
+    }
+   ],
+   "source": [
+    "%%ipytest -qq\n",
+    "\n",
+    "from extras.scripts.fixtures import *\n",
+    "from extras.scripts.hw_6_check import lines_of_code, MIN_LINES, has_plotting, uses_transform\n",
+    "\n",
+    "\n",
+    "def test_min_lines(script):\n",
+    "    num_lines = lines_of_code(script)\n",
+    "    assert num_lines >= MIN_LINES, f\"notebook must have more than {MIN_LINES} lines of code\"\n",
+    "    \n",
+    "\n",
+    "def test_has_plotting(script):\n",
+    "    assert has_plotting(script), \"notebook must contain a plot of some kind\"\n",
+    "\n",
+    "    \n",
+    "def test_uses_transform(script):\n",
+    "    assert uses_transform(script), \"notebook must contain a transform of some kind: grouping, reshaping, etc.\""
+   ]
   }
  ],
  "metadata": {