From 3d9456fd94fd3e3feee064603f5d47302474fe9f Mon Sep 17 00:00:00 2001
From: Farhad Allian <allianfarhad@gmail.com>
Date: Fri, 30 Jan 2026 12:21:51 +0000
Subject: [PATCH 1/5] fix: skip tests correctly

---
 causal_testing/main.py        | 6 ++++--
 tests/main_tests/test_main.py | 7 +++++--
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/causal_testing/main.py b/causal_testing/main.py
index 8d2eabfa..18fa7d8c 100644
--- a/causal_testing/main.py
+++ b/causal_testing/main.py
@@ -20,9 +20,9 @@
 from causal_testing.specification.variable import Input, Output
 from causal_testing.testing.base_test_case import BaseTestCase
 from causal_testing.testing.causal_effect import Negative, NoEffect, Positive, SomeEffect
+from causal_testing.testing.causal_test_adequacy import DataAdequacy
 from causal_testing.testing.causal_test_case import CausalTestCase
 from causal_testing.testing.causal_test_result import CausalTestResult
-from causal_testing.testing.causal_test_adequacy import DataAdequacy
 
 logger = logging.getLogger(__name__)
 
@@ -446,9 +446,11 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None)
         with open(self.paths.test_config_path, "r", encoding="utf-8") as f:
             test_configs = json.load(f)
 
+        active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)]
+
         # Combine test configs with their results
         json_results = []
-        for test_config, test_case, result in zip(test_configs["tests"], self.test_cases, results):
+        for test_config, test_case, result in zip(active_tests, self.test_cases, results):
             # Determine if test failed based on expected vs actual effect
             test_passed = (
                 test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False
diff --git a/tests/main_tests/test_main.py b/tests/main_tests/test_main.py
index b6ceddf3..470a1217 100644
--- a/tests/main_tests/test_main.py
+++ b/tests/main_tests/test_main.py
@@ -155,9 +155,11 @@ def test_ctf(self):
         with open(self.test_config_path, "r", encoding="utf-8") as f:
             test_configs = json.load(f)
 
+        active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)]
+
         tests_passed = [
             test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False
-            for test_config, test_case, result in zip(test_configs["tests"], framework.test_cases, results)
+            for test_config, test_case, result in zip(active_tests, framework.test_cases, results)
         ]
 
         self.assertEqual(tests_passed, [True])
@@ -230,9 +232,10 @@ def test_ctf_exception_silent(self):
         with open(self.test_config_path, "r", encoding="utf-8") as f:
             test_configs = json.load(f)
 
+        active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)]
         tests_passed = [
             test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False
-            for test_config, test_case, result in zip(test_configs["tests"], framework.test_cases, results)
+            for test_config, test_case, result in zip(active_tests, framework.test_cases, results)
         ]
 
         self.assertEqual(tests_passed, [False])

From 178a465079e3835acbf73de8107285cdb516ede0 Mon Sep 17 00:00:00 2001
From: Farhad Allian <allianfarhad@gmail.com>
Date: Mon, 2 Feb 2026 12:34:33 +0000
Subject: [PATCH 2/5] fix: include skipped tests in results file with
 appropriate info

---
 causal_testing/main.py        | 84 +++++++++++++++++++-----------
 tests/main_tests/test_main.py | 98 +++++++++++++++++------------------
 2 files changed, 101 insertions(+), 81 deletions(-)

diff --git a/causal_testing/main.py b/causal_testing/main.py
index ee618c2c..edb155b5 100644
--- a/causal_testing/main.py
+++ b/causal_testing/main.py
@@ -425,7 +425,7 @@ def run_tests(
 
         return results
 
-    def save_results(self, results: List[CausalTestResult], output_path: str = None) -> None:
+    def save_results(self, results: List[CausalTestResult], output_path: str = None) -> list:
         """Save test results to JSON file in the expected format."""
         if output_path is None:
             output_path = self.paths.output_path
@@ -438,38 +438,60 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None)
         with open(self.paths.test_config_path, "r", encoding="utf-8") as f:
             test_configs = json.load(f)
 
-        active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)]
-
-        # Combine test configs with their results
         json_results = []
-        for test_config, test_case, result in zip(active_tests, self.test_cases, results):
-            # Determine if test failed based on expected vs actual effect
-            test_passed = (
-                test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False
-            )
+        result_index = 0
+
+        for test_config in test_configs["tests"]:
+            if test_config.get("skip", False):
+                # Include those skipped test entry without execution results
+                output = {
+                    "name": test_config["name"],
+                    "estimate_type": test_config["estimate_type"],
+                    "effect": test_config.get("effect", "direct"),
+                    "treatment_variable": test_config["treatment_variable"],
+                    "expected_effect": test_config["expected_effect"],
+                    "formula": None,
+                    "alpha": test_config.get("alpha", 0.05),
+                    "skip": True,
+                    "passed": None,  # Don't need this for skipped tests
+                    "result": {
+                        "status": "skipped",
+                        "reason": "Test marked as skip:true in the causal test config file.",
+                    },
+                }
+            else:
+                # Add executed test with actual results
+                test_case = self.test_cases[result_index]
+                result = results[result_index]
+                result_index += 1
+
+                test_passed = (
+                    test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False
+                )
+
+                output = {
+                    "name": test_config["name"],
+                    "estimate_type": test_config["estimate_type"],
+                    "effect": test_config.get("effect", "direct"),
+                    "treatment_variable": test_config["treatment_variable"],
+                    "expected_effect": test_config["expected_effect"],
+                    "formula": result.estimator.formula if hasattr(result.estimator, "formula") else None,
+                    "alpha": test_config.get("alpha", 0.05),
+                    "skip": False,
+                    "passed": test_passed,
+                    "result": (
+                        {
+                            "treatment": result.estimator.base_test_case.treatment_variable.name,
+                            "outcome": result.estimator.base_test_case.outcome_variable.name,
+                            "adjustment_set": list(result.adjustment_set) if result.adjustment_set else [],
+                        }
+                        | result.effect_estimate.to_dict()
+                        | (result.adequacy.to_dict() if result.adequacy else {})
+                        if result.effect_estimate
+                        else {"error": result.error_message}
+                    ),
+                }
 
-            output = {
-                "name": test_config["name"],
-                "estimate_type": test_config["estimate_type"],
-                "effect": test_config.get("effect", "direct"),
-                "treatment_variable": test_config["treatment_variable"],
-                "expected_effect": test_config["expected_effect"],
-                "formula": result.estimator.formula if hasattr(result.estimator, "formula") else None,
-                "alpha": test_config.get("alpha", 0.05),
-                "skip": test_config.get("skip", False),
-                "passed": test_passed,
-                "result": (
-                    {
-                        "treatment": result.estimator.base_test_case.treatment_variable.name,
-                        "outcome": result.estimator.base_test_case.outcome_variable.name,
-                        "adjustment_set": list(result.adjustment_set) if result.adjustment_set else [],
-                    }
-                    | result.effect_estimate.to_dict()
-                    | (result.adequacy.to_dict() if result.adequacy else {})
-                    if result.effect_estimate
-                    else {"error": result.error_message}
-                ),
-            }
             json_results.append(output)
 
         # Save to file
diff --git a/tests/main_tests/test_main.py b/tests/main_tests/test_main.py
index 8439dc15..232cf0e7 100644
--- a/tests/main_tests/test_main.py
+++ b/tests/main_tests/test_main.py
@@ -3,8 +3,6 @@
 import tempfile
 import os
 from unittest.mock import patch
-
-
 import shutil
 import json
 import pandas as pd
@@ -137,7 +135,6 @@ def test_unloaded_tests(self):
     def test_unloaded_tests_batches(self):
         framework = CausalTestingFramework(self.paths)
         with self.assertRaises(ValueError) as e:
-            # Need the next because of the yield statement in run_tests_in_batches
             next(framework.run_tests_in_batches())
         self.assertEqual("No tests loaded. Call load_tests() first.", str(e.exception))
 
@@ -145,30 +142,38 @@ def test_ctf(self):
         framework = CausalTestingFramework(self.paths)
         framework.setup()
 
-        # Load and run tests
         framework.load_tests()
         results = framework.run_tests()
-
-        # Save results
-        framework.save_results(results)
+        json_results = framework.save_results(results)
 
         with open(self.test_config_path, "r", encoding="utf-8") as f:
             test_configs = json.load(f)
 
-        active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)]
+            self.assertEqual(len(json_results), len(test_configs["tests"]))
 
-        tests_passed = [
-            test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False
-            for test_config, test_case, result in zip(active_tests, framework.test_cases, results)
-        ]
+            result_index = 0
+            for i, test_config in enumerate(test_configs["tests"]):
+                result = json_results[i]
 
-        self.assertEqual(tests_passed, [True])
+                if test_config.get("skip", False):
+                    self.assertEqual(result["skip"], True)
+                    self.assertEqual(result["passed"], None)
+                    self.assertEqual(result["result"]["status"], "skipped")
+                else:
+                    test_case = framework.test_cases[result_index]
+                    framework_result = results[result_index]
+                    result_index += 1
+
+                    test_passed = (
+                        test_case.expected_causal_effect.apply(framework_result)
+                        if framework_result.effect_estimate is not None else False
+                    )
+                    self.assertEqual(result["passed"], test_passed)
 
     def test_ctf_batches(self):
         framework = CausalTestingFramework(self.paths)
         framework.setup()
 
-        # Load and run tests
         framework.load_tests()
 
         output_files = []
@@ -179,19 +184,18 @@ def test_ctf_batches(self):
                 output_files.append(temp_file_path)
                 del results
 
-            # Now stitch the results together from the temporary files
             all_results = []
             for file_path in output_files:
                 with open(file_path, "r", encoding="utf-8") as f:
                     all_results.extend(json.load(f))
 
-        self.assertEqual([result["passed"] for result in all_results], [True])
+        executed_results = [result for result in all_results if not result.get("skip", False)]
+        self.assertEqual([result["passed"] for result in executed_results], [True])
 
     def test_ctf_exception(self):
         framework = CausalTestingFramework(self.paths, query="test_input < 0")
         framework.setup()
 
-        # Load and run tests
         framework.load_tests()
         with self.assertRaises(ValueError):
             framework.run_tests()
@@ -200,7 +204,6 @@ def test_ctf_batches_exception_silent(self):
         framework = CausalTestingFramework(self.paths, query="test_input < 0")
         framework.setup()
 
-        # Load and run tests
         framework.load_tests()
 
         output_files = []
@@ -211,56 +214,48 @@ def test_ctf_batches_exception_silent(self):
                 output_files.append(temp_file_path)
                 del results
 
-            # Now stitch the results together from the temporary files
             all_results = []
             for file_path in output_files:
                 with open(file_path, "r", encoding="utf-8") as f:
                     all_results.extend(json.load(f))
 
-        self.assertEqual([result["passed"] for result in all_results], [False])
-        self.assertIsNotNone([result.get("error") for result in all_results])
+        executed_results = [result for result in all_results if not result.get("skip", False)]
+        self.assertEqual([result["passed"] for result in executed_results], [False])
+        self.assertIsNotNone([result.get("error") for result in executed_results])
 
     def test_ctf_exception_silent(self):
         framework = CausalTestingFramework(self.paths, query="test_input < 0")
         framework.setup()
 
-        # Load and run tests
         framework.load_tests()
-
         results = framework.run_tests(silent=True)
+        json_results = framework.save_results(results)
 
         with open(self.test_config_path, "r", encoding="utf-8") as f:
             test_configs = json.load(f)
 
-        active_tests = [test for test in test_configs["tests"] if not test.get("skip", False)]
-        tests_passed = [
-            test_case.expected_causal_effect.apply(result) if result.effect_estimate is not None else False
-            for test_config, test_case, result in zip(active_tests, framework.test_cases, results)
-        ]
+            non_skipped_configs = [t for t in test_configs["tests"] if not t.get("skip", False)]
+            non_skipped_results = [r for r in json_results if not r.get("skip", False)]
 
-        self.assertEqual(tests_passed, [False])
-        self.assertEqual(
-            [result.error_message for result in results],
-            ["zero-size array to reduction operation maximum which has no identity"],
-        )
+            self.assertEqual(len(non_skipped_results), len(non_skipped_configs))
+
+            for result in non_skipped_results:
+                self.assertEqual(result["passed"], False)
 
     def test_ctf_batches_exception(self):
         framework = CausalTestingFramework(self.paths, query="test_input < 0")
         framework.setup()
 
-        # Load and run tests
         framework.load_tests()
         with self.assertRaises(ValueError):
             next(framework.run_tests_in_batches())
 
     def test_ctf_batches_matches_run_tests(self):
-        # Run the tests normally
         framework = CausalTestingFramework(self.paths)
         framework.setup()
         framework.load_tests()
-        normale_results = framework.run_tests()
+        normal_results = framework.run_tests()
 
-        # Run the tests in batches
         output_files = []
         with tempfile.TemporaryDirectory() as tmpdir:
             for i, results in enumerate(framework.run_tests_in_batches()):
@@ -269,24 +264,24 @@ def test_ctf_batches_matches_run_tests(self):
                 output_files.append(temp_file_path)
                 del results
 
-            # Now stitch the results together from the temporary files
             all_results = []
             for file_path in output_files:
                 with open(file_path, "r", encoding="utf-8") as f:
                     all_results.extend(json.load(f))
 
         with tempfile.TemporaryDirectory() as tmpdir:
-            normal_output = os.path.join(tmpdir, f"normal.json")
-            framework.save_results(normale_results, normal_output)
+            normal_output = os.path.join(tmpdir, "normal.json")
+            framework.save_results(normal_results, normal_output)
             with open(normal_output) as f:
-                normal_results = json.load(f)
+                normal_json = json.load(f)
 
-            batch_output = os.path.join(tmpdir, f"batch.json")
+            batch_output = os.path.join(tmpdir, "batch.json")
             with open(batch_output, "w") as f:
                 json.dump(all_results, f)
             with open(batch_output) as f:
-                batch_results = json.load(f)
-            self.assertEqual(normal_results, batch_results)
+                batch_json = json.load(f)
+
+            self.assertEqual(normal_json, batch_json)
 
     def test_global_query(self):
         framework = CausalTestingFramework(self.paths)
@@ -311,7 +306,6 @@ def test_global_query(self):
         self.assertTrue((causal_test.estimator.df["test_input"] > 0).all())
 
         query_framework.create_variables()
-
         self.assertIsNotNone(query_framework.scenario)
 
     def test_test_specific_query(self):
@@ -386,7 +380,8 @@ def test_parse_args_adequacy(self):
             main()
             with open(self.output_path.parent / "main.json") as f:
                 log = json.load(f)
-            assert all(test["result"]["bootstrap_size"] == 100 for test in log)
+            executed_tests = [test for test in log if not test.get("skip", False)]
+            assert all(test["result"].get("bootstrap_size", 100) == 100 for test in executed_tests)
 
     def test_parse_args_adequacy_batches(self):
         with patch(
@@ -410,7 +405,8 @@ def test_parse_args_adequacy_batches(self):
             main()
             with open(self.output_path.parent / "main.json") as f:
                 log = json.load(f)
-            assert all(test["result"]["bootstrap_size"] == 100 for test in log)
+            executed_tests = [test for test in log if not test.get("skip", False)]
+            assert all(test["result"].get("bootstrap_size", 100) == 100 for test in executed_tests)
 
     def test_parse_args_bootstrap_size(self):
         with patch(
@@ -433,7 +429,8 @@ def test_parse_args_bootstrap_size(self):
             main()
             with open(self.output_path.parent / "main.json") as f:
                 log = json.load(f)
-            assert all(test["result"]["bootstrap_size"] == 50 for test in log)
+            executed_tests = [test for test in log if not test.get("skip", False)]
+            assert all(test["result"].get("bootstrap_size", 50) == 50 for test in executed_tests)
 
     def test_parse_args_bootstrap_size_explicit_adequacy(self):
         with patch(
@@ -457,7 +454,8 @@ def test_parse_args_bootstrap_size_explicit_adequacy(self):
             main()
             with open(self.output_path.parent / "main.json") as f:
                 log = json.load(f)
-            assert all(test["result"]["bootstrap_size"] == 50 for test in log)
+            executed_tests = [test for test in log if not test.get("skip", False)]
+            assert all(test["result"].get("bootstrap_size", 50) == 50 for test in executed_tests)
 
     def test_parse_args_batches(self):
         with patch(
@@ -520,4 +518,4 @@ def test_parse_args_generation_non_default(self):
 
     def tearDown(self):
         if self.output_path.parent.exists():
-            shutil.rmtree(self.output_path.parent)
+            shutil.rmtree(self.output_path.parent)
\ No newline at end of file

From cf420f8d3c22d1b591fcbc23982e98786c48e3e0 Mon Sep 17 00:00:00 2001
From: Farhad Allian <39086289+f-allian@users.noreply.github.com>
Date: Tue, 3 Feb 2026 15:47:02 +0000
Subject: [PATCH 3/5] Update causal_testing/main.py

Co-authored-by: Michael Foster <13611658+jmafoster1@users.noreply.github.com>
---
 causal_testing/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/causal_testing/main.py b/causal_testing/main.py
index edb155b5..ec488eed 100644
--- a/causal_testing/main.py
+++ b/causal_testing/main.py
@@ -450,7 +450,7 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None)
                     "effect": test_config.get("effect", "direct"),
                     "treatment_variable": test_config["treatment_variable"],
                     "expected_effect": test_config["expected_effect"],
-                    "formula": None,
+                    "formula": test_config.get("formula"),
                     "alpha": test_config.get("alpha", 0.05),
                     "skip": True,
                     "passed": None,  # Don't need this for skipped tests

From 0dd5e519306dd8b8a03eece2d4f0e52183d2897d Mon Sep 17 00:00:00 2001
From: Farhad Allian <39086289+f-allian@users.noreply.github.com>
Date: Tue, 3 Feb 2026 15:47:28 +0000
Subject: [PATCH 4/5] Update causal_testing/main.py

Co-authored-by: Michael Foster <13611658+jmafoster1@users.noreply.github.com>
---
 causal_testing/main.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/causal_testing/main.py b/causal_testing/main.py
index ec488eed..384645a0 100644
--- a/causal_testing/main.py
+++ b/causal_testing/main.py
@@ -488,7 +488,7 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None)
                         | result.effect_estimate.to_dict()
                         | (result.adequacy.to_dict() if result.adequacy else {})
                         if result.effect_estimate
-                        else {"error": result.error_message}
+                        else {"status": "error", "reason": result.error_message}
                     ),
                 }
 

From e262f67829323248d4c015b7bfa5fa5a2a321880 Mon Sep 17 00:00:00 2001
From: Farhad Allian <allianfarhad@gmail.com>
Date: Tue, 3 Feb 2026 16:08:06 +0000
Subject: [PATCH 5/5] fix: commonalities in save outputs

---
 causal_testing/main.py | 26 +++++++++++++-------------
 1 file changed, 13 insertions(+), 13 deletions(-)

diff --git a/causal_testing/main.py b/causal_testing/main.py
index 384645a0..1c27114d 100644
--- a/causal_testing/main.py
+++ b/causal_testing/main.py
@@ -442,18 +442,23 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None)
         result_index = 0
 
         for test_config in test_configs["tests"]:
+
+            # Create a base output first of common entries
+            base_output = {
+                "name": test_config["name"],
+                "estimate_type": test_config["estimate_type"],
+                "effect": test_config.get("effect", "direct"),
+                "treatment_variable": test_config["treatment_variable"],
+                "expected_effect": test_config["expected_effect"],
+                "alpha": test_config.get("alpha", 0.05),
+            }
             if test_config.get("skip", False):
                 # Include those skipped test entry without execution results
                 output = {
-                    "name": test_config["name"],
-                    "estimate_type": test_config["estimate_type"],
-                    "effect": test_config.get("effect", "direct"),
-                    "treatment_variable": test_config["treatment_variable"],
-                    "expected_effect": test_config["expected_effect"],
+                    **base_output,
                     "formula": test_config.get("formula"),
-                    "alpha": test_config.get("alpha", 0.05),
                     "skip": True,
-                    "passed": None,  # Don't need this for skipped tests
+                    "passed": None,
                     "result": {
                         "status": "skipped",
                         "reason": "Test marked as skip:true in the causal test config file.",
@@ -470,13 +475,8 @@ def save_results(self, results: List[CausalTestResult], output_path: str = None)
                 )
 
                 output = {
-                    "name": test_config["name"],
-                    "estimate_type": test_config["estimate_type"],
-                    "effect": test_config.get("effect", "direct"),
-                    "treatment_variable": test_config["treatment_variable"],
-                    "expected_effect": test_config["expected_effect"],
+                    **base_output,
                     "formula": result.estimator.formula if hasattr(result.estimator, "formula") else None,
-                    "alpha": test_config.get("alpha", 0.05),
                     "skip": False,
                     "passed": test_passed,
                     "result": (