From 3d9456fd94fd3e3feee064603f5d47302474fe9f Mon Sep 17 00:00:00 2001 From: Farhad Allian Date: Fri, 30 Jan 2026 12:21:51 +0000 Subject: [PATCH 1/5] fix: skip tests correctly --- causal_testing/main.py | 6 ++++-- tests/main_tests/test_main.py | 7 +++++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/causal_testing/main.py b/causal_testing/main.py index 8d2eabfa..18fa7d8c 100644 --- a/causal_testing/main.py +++ b/causal_testing/main.py @@ -20,9 +20,9 @@ from causal_testing.specification.variable import Input, Output from causal_testing.testing.base_test_case import BaseTestCase from causal_testing.testing.causal_effect import Negative, NoEffect, Positive, SomeEffect +from causal_testing.testing.causal_test_adequacy import DataAdequacy from causal_testing.testing.causal_test_case import CausalTestCase from causal_testing.testing.causal_test_result import CausalTestResult -from causal_testing.testing.causal_test_adequacy import DataAdequacy logger = logging.getLogger(__name__) @@ -446,9 +446,11 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None) with open(self.paths.test_config_path, "r", encoding="utf-8") as f: test_configs = json.load(f) + active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)] + # Combine test configs with their results json_results = [] - for test_config, test_case, result in zip(test_configs["tests"], self.test_cases, results): + for test_config, test_case, result in zip(active_tests, self.test_cases, results): # Determine if test failed based on expected vs actual effect test_passed = ( test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False diff --git a/tests/main_tests/test_main.py b/tests/main_tests/test_main.py index b6ceddf3..470a1217 100644 --- a/tests/main_tests/test_main.py +++ b/tests/main_tests/test_main.py @@ -155,9 +155,11 @@ def test_ctf(self): with open(self.test_config_path, "r", encoding="utf-8") as f: test_configs = json.load(f) + active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)] + tests_passed = [ test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False - for test_config, test_case, result in zip(test_configs["tests"], framework.test_cases, results) + for test_config, test_case, result in zip(active_tests, framework.test_cases, results) ] self.assertEqual(tests_passed, [True]) @@ -230,9 +232,10 @@ def test_ctf_exception_silent(self): with open(self.test_config_path, "r", encoding="utf-8") as f: test_configs = json.load(f) + active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)] tests_passed = [ test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False - for test_config, test_case, result in zip(test_configs["tests"], framework.test_cases, results) + for test_config, test_case, result in zip(active_tests, framework.test_cases, results) ] self.assertEqual(tests_passed, [False]) From 178a465079e3835acbf73de8107285cdb516ede0 Mon Sep 17 00:00:00 2001 From: Farhad Allian Date: Mon, 2 Feb 2026 12:34:33 +0000 Subject: [PATCH 2/5] fix: include skipped tests in results file with appropriate info --- causal_testing/main.py | 84 +++++++++++++++++++----------- tests/main_tests/test_main.py | 98 +++++++++++++++++------------------ 2 files changed, 101 insertions(+), 81 deletions(-) diff --git a/causal_testing/main.py b/causal_testing/main.py index ee618c2c..edb155b5 100644 --- a/causal_testing/main.py +++ b/causal_testing/main.py @@ -425,7 +425,7 @@ def run_tests( return results - def save_results(self, results: List[CausalTestResult], output_path: str = None) -> None: + def save_results(self, results: List[CausalTestResult], output_path: str = None) -> list: """Save test results to JSON file in the expected format.""" if output_path is None: output_path = self.paths.output_path @@ -438,38 +438,60 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None) with open(self.paths.test_config_path, "r", encoding="utf-8") as f: test_configs = json.load(f) - active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)] - - # Combine test configs with their results json_results = [] - for test_config, test_case, result in zip(active_tests, self.test_cases, results): - # Determine if test failed based on expected vs actual effect - test_passed = ( - test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False - ) + result_index = 0 + + for test_config in test_configs["tests"]: + if test_config.get("skip", False): + # Include those skipped test entry without execution results + output = { + "name": test_config["name"], + "estimate_type": test_config["estimate_type"], + "effect": test_config.get("effect", "direct"), + "treatment_variable": test_config["treatment_variable"], + "expected_effect": test_config["expected_effect"], + "formula": None, + "alpha": test_config.get("alpha", 0.05), + "skip": True, + "passed": None, # Don't need this for skipped tests + "result": { + "status": "skipped", + "reason": "Test marked as skip:true in the causal test config file.", + }, + } + else: + # Add executed test with actual results + test_case = self.test_cases[result_index] + result = results[result_index] + result_index += 1 + + test_passed = ( + test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False + ) + + output = { + "name": test_config["name"], + "estimate_type": test_config["estimate_type"], + "effect": test_config.get("effect", "direct"), + "treatment_variable": test_config["treatment_variable"], + "expected_effect": test_config["expected_effect"], + "formula": result.estimator.formula if hasattr(result.estimator, "formula") else None, + "alpha": test_config.get("alpha", 0.05), + "skip": False, + "passed": test_passed, + "result": ( + { + "treatment": result.estimator.base_test_case.treatment_variable.name, + "outcome": result.estimator.base_test_case.outcome_variable.name, + "adjustment_set": list(result.adjustment_set) if result.adjustment_set else [], + } + | result.effect_estimate.to_dict() + | (result.adequacy.to_dict() if result.adequacy else {}) + if result.effect_estimate + else {"error": result.error_message} + ), + } - output = { - "name": test_config["name"], - "estimate_type": test_config["estimate_type"], - "effect": test_config.get("effect", "direct"), - "treatment_variable": test_config["treatment_variable"], - "expected_effect": test_config["expected_effect"], - "formula": result.estimator.formula if hasattr(result.estimator, "formula") else None, - "alpha": test_config.get("alpha", 0.05), - "skip": test_config.get("skip", False), - "passed": test_passed, - "result": ( - { - "treatment": result.estimator.base_test_case.treatment_variable.name, - "outcome": result.estimator.base_test_case.outcome_variable.name, - "adjustment_set": list(result.adjustment_set) if result.adjustment_set else [], - } - | result.effect_estimate.to_dict() - | (result.adequacy.to_dict() if result.adequacy else {}) - if result.effect_estimate - else {"error": result.error_message} - ), - } json_results.append(output) # Save to file diff --git a/tests/main_tests/test_main.py b/tests/main_tests/test_main.py index 8439dc15..232cf0e7 100644 --- a/tests/main_tests/test_main.py +++ b/tests/main_tests/test_main.py @@ -3,8 +3,6 @@ import tempfile import os from unittest.mock import patch - - import shutil import json import pandas as pd @@ -137,7 +135,6 @@ def test_unloaded_tests(self): def test_unloaded_tests_batches(self): framework = CausalTestingFramework(self.paths) with self.assertRaises(ValueError) as e: - # Need the next because of the yield statement in run_tests_in_batches next(framework.run_tests_in_batches()) self.assertEqual("No tests loaded. Call load_tests() first.", str(e.exception)) @@ -145,30 +142,38 @@ def test_ctf(self): framework = CausalTestingFramework(self.paths) framework.setup() - # Load and run tests framework.load_tests() results = framework.run_tests() - - # Save results - framework.save_results(results) + json_results = framework.save_results(results) with open(self.test_config_path, "r", encoding="utf-8") as f: test_configs = json.load(f) - active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)] + self.assertEqual(len(json_results), len(test_configs["tests"])) - tests_passed = [ - test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False - for test_config, test_case, result in zip(active_tests, framework.test_cases, results) - ] + result_index = 0 + for i, test_config in enumerate(test_configs["tests"]): + result = json_results[i] - self.assertEqual(tests_passed, [True]) + if test_config.get("skip", False): + self.assertEqual(result["skip"], True) + self.assertEqual(result["passed"], None) + self.assertEqual(result["result"]["status"], "skipped") + else: + test_case = framework.test_cases[result_index] + framework_result = results[result_index] + result_index += 1 + + test_passed = ( + test_case.expected_causal_effect.apply(framework_result) + if framework_result.effect_estimate is not None else False + ) + self.assertEqual(result["passed"], test_passed) def test_ctf_batches(self): framework = CausalTestingFramework(self.paths) framework.setup() - # Load and run tests framework.load_tests() output_files = [] @@ -179,19 +184,18 @@ def test_ctf_batches(self): output_files.append(temp_file_path) del results - # Now stitch the results together from the temporary files all_results = [] for file_path in output_files: with open(file_path, "r", encoding="utf-8") as f: all_results.extend(json.load(f)) - self.assertEqual([result["passed"] for result in all_results], [True]) + executed_results = [result for result in all_results if not result.get("skip", False)] + self.assertEqual([result["passed"] for result in executed_results], [True]) def test_ctf_exception(self): framework = CausalTestingFramework(self.paths, query="test_input < 0") framework.setup() - # Load and run tests framework.load_tests() with self.assertRaises(ValueError): framework.run_tests() @@ -200,7 +204,6 @@ def test_ctf_batches_exception_silent(self): framework = CausalTestingFramework(self.paths, query="test_input < 0") framework.setup() - # Load and run tests framework.load_tests() output_files = [] @@ -211,56 +214,48 @@ def test_ctf_batches_exception_silent(self): output_files.append(temp_file_path) del results - # Now stitch the results together from the temporary files all_results = [] for file_path in output_files: with open(file_path, "r", encoding="utf-8") as f: all_results.extend(json.load(f)) - self.assertEqual([result["passed"] for result in all_results], [False]) - self.assertIsNotNone([result.get("error") for result in all_results]) + executed_results = [result for result in all_results if not result.get("skip", False)] + self.assertEqual([result["passed"] for result in executed_results], [False]) + self.assertIsNotNone([result.get("error") for result in executed_results]) def test_ctf_exception_silent(self): framework = CausalTestingFramework(self.paths, query="test_input < 0") framework.setup() - # Load and run tests framework.load_tests() - results = framework.run_tests(silent=True) + json_results = framework.save_results(results) with open(self.test_config_path, "r", encoding="utf-8") as f: test_configs = json.load(f) - active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)] - tests_passed = [ - test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False - for test_config, test_case, result in zip(active_tests, framework.test_cases, results) - ] + non_skipped_configs = [t for t in test_configs["tests"] if not t.get("skip", False)] + non_skipped_results = [r for r in json_results if not r.get("skip", False)] - self.assertEqual(tests_passed, [False]) - self.assertEqual( - [result.error_message for result in results], - ["zero-size array to reduction operation maximum which has no identity"], - ) + self.assertEqual(len(non_skipped_results), len(non_skipped_configs)) + + for result in non_skipped_results: + self.assertEqual(result["passed"], False) def test_ctf_batches_exception(self): framework = CausalTestingFramework(self.paths, query="test_input < 0") framework.setup() - # Load and run tests framework.load_tests() with self.assertRaises(ValueError): next(framework.run_tests_in_batches()) def test_ctf_batches_matches_run_tests(self): - # Run the tests normally framework = CausalTestingFramework(self.paths) framework.setup() framework.load_tests() - normale_results = framework.run_tests() + normal_results = framework.run_tests() - # Run the tests in batches output_files = [] with tempfile.TemporaryDirectory() as tmpdir: for i, results in enumerate(framework.run_tests_in_batches()): @@ -269,24 +264,24 @@ def test_ctf_batches_matches_run_tests(self): output_files.append(temp_file_path) del results - # Now stitch the results together from the temporary files all_results = [] for file_path in output_files: with open(file_path, "r", encoding="utf-8") as f: all_results.extend(json.load(f)) with tempfile.TemporaryDirectory() as tmpdir: - normal_output = os.path.join(tmpdir, f"normal.json") - framework.save_results(normale_results, normal_output) + normal_output = os.path.join(tmpdir, "normal.json") + framework.save_results(normal_results, normal_output) with open(normal_output) as f: - normal_results = json.load(f) + normal_json = json.load(f) - batch_output = os.path.join(tmpdir, f"batch.json") + batch_output = os.path.join(tmpdir, "batch.json") with open(batch_output, "w") as f: json.dump(all_results, f) with open(batch_output) as f: - batch_results = json.load(f) - self.assertEqual(normal_results, batch_results) + batch_json = json.load(f) + + self.assertEqual(normal_json, batch_json) def test_global_query(self): framework = CausalTestingFramework(self.paths) @@ -311,7 +306,6 @@ def test_global_query(self): self.assertTrue((causal_test.estimator.df["test_input"] > 0).all()) query_framework.create_variables() - self.assertIsNotNone(query_framework.scenario) def test_test_specific_query(self): @@ -386,7 +380,8 @@ def test_parse_args_adequacy(self): main() with open(self.output_path.parent / "main.json") as f: log = json.load(f) - assert all(test["result"]["bootstrap_size"] == 100 for test in log) + executed_tests = [test for test in log if not test.get("skip", False)] + assert all(test["result"].get("bootstrap_size", 100) == 100 for test in executed_tests) def test_parse_args_adequacy_batches(self): with patch( @@ -410,7 +405,8 @@ def test_parse_args_adequacy_batches(self): main() with open(self.output_path.parent / "main.json") as f: log = json.load(f) - assert all(test["result"]["bootstrap_size"] == 100 for test in log) + executed_tests = [test for test in log if not test.get("skip", False)] + assert all(test["result"].get("bootstrap_size", 100) == 100 for test in executed_tests) def test_parse_args_bootstrap_size(self): with patch( @@ -433,7 +429,8 @@ def test_parse_args_bootstrap_size(self): main() with open(self.output_path.parent / "main.json") as f: log = json.load(f) - assert all(test["result"]["bootstrap_size"] == 50 for test in log) + executed_tests = [test for test in log if not test.get("skip", False)] + assert all(test["result"].get("bootstrap_size", 50) == 50 for test in executed_tests) def test_parse_args_bootstrap_size_explicit_adequacy(self): with patch( @@ -457,7 +454,8 @@ def test_parse_args_bootstrap_size_explicit_adequacy(self): main() with open(self.output_path.parent / "main.json") as f: log = json.load(f) - assert all(test["result"]["bootstrap_size"] == 50 for test in log) + executed_tests = [test for test in log if not test.get("skip", False)] + assert all(test["result"].get("bootstrap_size", 50) == 50 for test in executed_tests) def test_parse_args_batches(self): with patch( @@ -520,4 +518,4 @@ def test_parse_args_generation_non_default(self): def tearDown(self): if self.output_path.parent.exists(): - shutil.rmtree(self.output_path.parent) + shutil.rmtree(self.output_path.parent) \ No newline at end of file From cf420f8d3c22d1b591fcbc23982e98786c48e3e0 Mon Sep 17 00:00:00 2001 From: Farhad Allian <39086289+f-allian@users.noreply.github.com> Date: Tue, 3 Feb 2026 15:47:02 +0000 Subject: [PATCH 3/5] Update causal_testing/main.py Co-authored-by: Michael Foster <13611658+jmafoster1@users.noreply.github.com> --- causal_testing/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causal_testing/main.py b/causal_testing/main.py index edb155b5..ec488eed 100644 --- a/causal_testing/main.py +++ b/causal_testing/main.py @@ -450,7 +450,7 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None) "effect": test_config.get("effect", "direct"), "treatment_variable": test_config["treatment_variable"], "expected_effect": test_config["expected_effect"], - "formula": None, + "formula": test_config.get("formula"), "alpha": test_config.get("alpha", 0.05), "skip": True, "passed": None, # Don't need this for skipped tests From 0dd5e519306dd8b8a03eece2d4f0e52183d2897d Mon Sep 17 00:00:00 2001 From: Farhad Allian <39086289+f-allian@users.noreply.github.com> Date: Tue, 3 Feb 2026 15:47:28 +0000 Subject: [PATCH 4/5] Update causal_testing/main.py Co-authored-by: Michael Foster <13611658+jmafoster1@users.noreply.github.com> --- causal_testing/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/causal_testing/main.py b/causal_testing/main.py index ec488eed..384645a0 100644 --- a/causal_testing/main.py +++ b/causal_testing/main.py @@ -488,7 +488,7 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None) | result.effect_estimate.to_dict() | (result.adequacy.to_dict() if result.adequacy else {}) if result.effect_estimate - else {"error": result.error_message} + else {"status": "error", "reason": result.error_message} ), } From e262f67829323248d4c015b7bfa5fa5a2a321880 Mon Sep 17 00:00:00 2001 From: Farhad Allian Date: Tue, 3 Feb 2026 16:08:06 +0000 Subject: [PATCH 5/5] fix: commonalities in save outputs --- causal_testing/main.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/causal_testing/main.py b/causal_testing/main.py index 384645a0..1c27114d 100644 --- a/causal_testing/main.py +++ b/causal_testing/main.py @@ -442,18 +442,23 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None) result_index = 0 for test_config in test_configs["tests"]: + + # Create a base output first of common entries + base_output = { + "name": test_config["name"], + "estimate_type": test_config["estimate_type"], + "effect": test_config.get("effect", "direct"), + "treatment_variable": test_config["treatment_variable"], + "expected_effect": test_config["expected_effect"], + "alpha": test_config.get("alpha", 0.05), + } if test_config.get("skip", False): # Include those skipped test entry without execution results output = { - "name": test_config["name"], - "estimate_type": test_config["estimate_type"], - "effect": test_config.get("effect", "direct"), - "treatment_variable": test_config["treatment_variable"], - "expected_effect": test_config["expected_effect"], + **base_output, "formula": test_config.get("formula"), - "alpha": test_config.get("alpha", 0.05), "skip": True, - "passed": None, # Don't need this for skipped tests + "passed": None, "result": { "status": "skipped", "reason": "Test marked as skip:true in the causal test config file.", @@ -470,13 +475,8 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None) ) output = { - "name": test_config["name"], - "estimate_type": test_config["estimate_type"], - "effect": test_config.get("effect", "direct"), - "treatment_variable": test_config["treatment_variable"], - "expected_effect": test_config["expected_effect"], + **base_output, "formula": result.estimator.formula if hasattr(result.estimator, "formula") else None, - "alpha": test_config.get("alpha", 0.05), "skip": False, "passed": test_passed, "result": (