From 4bf9f3db2e372342c73b518d03f7a142298b59b5 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Thu, 5 Nov 2020 02:56:07 -0500 Subject: [PATCH 1/2] ensure calls to .sample() are deterministic in CI --- .github/workflows/main.yml | 2 +- scripts/diffable.py | 9 +++++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index 0bdff89d..5b4c58c5 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -31,7 +31,7 @@ jobs: # - hw_4.ipynb # has incomplete code - hw_5.ipynb - lecture_0.ipynb - # - lecture_1.ipynb # uses sample() + - lecture_1.ipynb - lecture_2.ipynb - lecture_3.ipynb - lecture_4.ipynb diff --git a/scripts/diffable.py b/scripts/diffable.py index 93b4395f..e2fd3e04 100644 --- a/scripts/diffable.py +++ b/scripts/diffable.py @@ -16,6 +16,13 @@ def is_vid(cell): return text == "" +def fix_sample(line): + """Ensure calls to .sample() are deterministic by passing in a seed value + + https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sample.html""" + return re.sub(r"\.sample\((\d+)\)", r".sample(\1, random_state=1)", line) + + def has_html_output(output): return "data" in output and "text/html" in output["data"] @@ -38,6 +45,8 @@ def has_html_output(output): if cell["source"][0].startswith("!"): cell["outputs"] = [] + cell["source"] = [fix_sample(line) for line in cell["source"]] + # filter out pip upgrade warnings cell["outputs"] = [line for line in cell["outputs"] if not is_pip_upgrade_msg(line)] From 593fa88b0076fdb87d2553702922026768bc75f4 Mon Sep 17 00:00:00 2001 From: Aidan Feldman Date: Sun, 10 Jan 2021 22:37:43 -0500 Subject: [PATCH 2/2] exclude sample output --- scripts/diffable.py | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/scripts/diffable.py b/scripts/diffable.py index e2fd3e04..7d13148f 100644 --- a/scripts/diffable.py +++ b/scripts/diffable.py @@ -16,13 +16,6 @@ def is_vid(cell): return text == "" -def fix_sample(line): - """Ensure calls to .sample() are deterministic by passing in a seed value - - https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.sample.html""" - return re.sub(r"\.sample\((\d+)\)", r".sample(\1, random_state=1)", line) - - def has_html_output(output): return "data" in output and "text/html" in output["data"] @@ -45,7 +38,9 @@ def has_html_output(output): if cell["source"][0].startswith("!"): cell["outputs"] = [] - cell["source"] = [fix_sample(line) for line in cell["source"]] + # ignore sample() output + if any(".sample(" in line for line in cell["source"]): + cell["outputs"] = [] # filter out pip upgrade warnings cell["outputs"] = [line for line in cell["outputs"] if not is_pip_upgrade_msg(line)]