From 1a5f7c5b1d856728c4b6a9261d60278eae01537e Mon Sep 17 00:00:00 2001 From: diehlbw Date: Mon, 2 Dec 2024 14:57:32 -0600 Subject: [PATCH 1/7] change version loc --- setup.cfg | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 4a6d4071..417b8903 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,12 +1,12 @@ [metadata] name = seismometer -version = 0.2.2 +version = 0.3.0 description = seismometer: Data Science visualization and investigation tools for AI Trust & Assurance author = Epic author_email = OpenSourceContributions-Python@epic.com license_files = LICENSE.txt license = BSD 3-Clause -classifiers = +classifiers = Programming Language :: Python :: 3 License :: OSI Approved :: BSD License Operating System :: OS Independent From 13f7ea066cc7e9666c4028154373aa08fe50f3fb Mon Sep 17 00:00:00 2001 From: diehlbw Date: Wed, 4 Dec 2024 13:53:09 +0000 Subject: [PATCH 2/7] add tech plans fix links fix links --- docs/roadmap/index.rst | 45 ++++++++++++++++++++++++++---------------- 1 file changed, 28 insertions(+), 17 deletions(-) diff --git a/docs/roadmap/index.rst b/docs/roadmap/index.rst index 171ecb2f..172c4ea6 100644 --- a/docs/roadmap/index.rst +++ b/docs/roadmap/index.rst @@ -7,11 +7,11 @@ Roadmap Contributions ============= -As an open-source project, we welcome community contributions to ``seismometer``. +As an open-source project, we welcome community contributions to ``seismometer``. Ultimately, we want this project to be a community-led effort to codify guidelines -for ensuring the equitable and informed use of machine learning and AI tools in the -healthcare space. Contributions to this project can be as simple as fixing typos or -small bugs, or more complex contributions that, with the support and scrutiny of our +for ensuring the equitable and informed use of machine learning and AI tools in the +healthcare space. Contributions to this project can be as simple as fixing typos or +small bugs, or more complex contributions that, with the support and scrutiny of our development team, guide the overall direction of the project. .. seealso:: @@ -23,20 +23,20 @@ Use Cases Templates --------- -As of ``v0.1.0``, ``seismometer`` supports evaluating model performance using standardized evaluation -criteria binary classifier models. We plan to add support in the near future for other types of -machine learning models, such as multiple classifier models. Similarly, we plan to add +As of ``v0.1.0``, ``seismometer`` supports evaluating model performance using standardized evaluation +criteria binary classifier models. We plan to add support in the near future for other types of +machine learning models, such as multiple classifier models. Similarly, we plan to add support for validating generative AI models. These enhancements will include changes to the underlying ``seismometer`` tooling, as well as adding new templates for validating -generative models. +generative models. Workflows and Pre-Live Evaluation --------------------------------- As of ``v0.1.0``, ``seismometer`` has limited support for evaluating model performance pre-live. -We are planning to add support for workflow simulation (e.g., estimating the number of -alerts that would be shown to end-users for a clinical model that predicts an adverse -event, or the amount of time saved per clinician for a generative model that drafts +We are planning to add support for workflow simulation (e.g., estimating the number of +alerts that would be shown to end-users for a clinical model that predicts an adverse +event, or the amount of time saved per clinician for a generative model that drafts messages to patients) based on particular thresholds. We will also add tools to identify thresholds for models based on pre-live data and operational goals. These tools are intended to help identify when a machine learning or artificial intelligence solutions will improve @@ -45,8 +45,8 @@ current workflows and also improve efficiency when integrating models into a wor Comparing to Baselines ---------------------- -We plan to add support for comparing model performance to baseline statistics (e.g., statistics -from a model train or from model performance at a separate site). These are intended to verify +We plan to add support for comparing model performance to baseline statistics (e.g., statistics +from a model train or from model performance at a separate site). These are intended to verify that the model feature or target drift are not adversely affecting the model's performance after it goes live. @@ -56,14 +56,14 @@ Functional changes Visualizations -------------- -As ``seismometer`` grows, we will add support for new types of visualizations. Our initial focus -is to improve visualizations for interventions and outcomes stratified by sensitive groups, but +As ``seismometer`` grows, we will add support for new types of visualizations. Our initial focus +is to improve visualizations for interventions and outcomes stratified by sensitive groups, but we plan to extend our model performance visualizations as well. Data Layer ---------- -As of ``v0.1.0``, ``seismometer`` supports reading data from `parquet` files, which contain data +As of ``v0.1.0``, ``seismometer`` supports reading data from `parquet` files, which contain data type information and performance improvements that standard CSV data does not have. We plan to add support for more file formats (alongside metadata files that will describe the data types) as well as support for reading data directly from a database (e.g., through an ODBC connection). @@ -77,4 +77,15 @@ which the goal will be to minimize those breaking changes and only release break a major version bump. .. seealso:: - :ref:`release` for our Release Notes and any breaking changes. \ No newline at end of file + :ref:`release` for our Release Notes and any breaking changes. + + +Technical Plans +=============== + +Some changes are known to have potential impact on using the package, such as changing supported python +versions. As these arise, issues will be created using the `label 'compatibility'`_ for ease of discovery. This +is distinct from large functional enhancements using a `request for change process`_. + +.. _label 'compatibility': https://github.com/epic-open-source/seismometer/labels/compatibility +.. _request for change process: https://github.com/epic-open-source/seismometer-rfcs From 625cc9000255f4d6faf22aa8680141be3aa2259b Mon Sep 17 00:00:00 2001 From: diehlbw Date: Wed, 4 Dec 2024 21:31:25 +0000 Subject: [PATCH 3/7] build changelog --- changelog/100.bugfix.rst | 1 - changelog/101.bugfix.rst | 1 - changelog/102.feature.rst | 1 - changelog/108.feature.rst | 2 -- changelog/109.bugfix.rst | 6 ------ changelog/113.feature.rst | 1 - changelog/114.bugfix.rst | 1 - changelog/77.feature.rst | 1 - changelog/86.feature.rst | 1 - docs/release_notes/index.rst | 28 ++++++++++++++++++++++++++++ 10 files changed, 28 insertions(+), 15 deletions(-) delete mode 100644 changelog/100.bugfix.rst delete mode 100644 changelog/101.bugfix.rst delete mode 100644 changelog/102.feature.rst delete mode 100644 changelog/108.feature.rst delete mode 100644 changelog/109.bugfix.rst delete mode 100644 changelog/113.feature.rst delete mode 100644 changelog/114.bugfix.rst delete mode 100644 changelog/77.feature.rst delete mode 100644 changelog/86.feature.rst diff --git a/changelog/100.bugfix.rst b/changelog/100.bugfix.rst deleted file mode 100644 index 5d7e3220..00000000 --- a/changelog/100.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Remove remaining references to -1 'invalidation'; validate directly on time comparison when needed diff --git a/changelog/101.bugfix.rst b/changelog/101.bugfix.rst deleted file mode 100644 index 7d425405..00000000 --- a/changelog/101.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Fixes scaling issue for binary classfier scores that use the range 0-100 rather than 0-1. \ No newline at end of file diff --git a/changelog/102.feature.rst b/changelog/102.feature.rst deleted file mode 100644 index cad96987..00000000 --- a/changelog/102.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Reorganize methods making initial import and public api more standard diff --git a/changelog/108.feature.rst b/changelog/108.feature.rst deleted file mode 100644 index 4d08a052..00000000 --- a/changelog/108.feature.rst +++ /dev/null @@ -1,2 +0,0 @@ -Includes confusion matrix rates into Binary Fairness metrics -Renames `Flagged` to `Flag Rate` for clarity \ No newline at end of file diff --git a/changelog/109.bugfix.rst b/changelog/109.bugfix.rst deleted file mode 100644 index fe1de9f9..00000000 --- a/changelog/109.bugfix.rst +++ /dev/null @@ -1,6 +0,0 @@ -Fixes a few minor ux issues. - -- Disable dropdowns with only one valid option. -- Fix the Sensitivity/Specificity/PPV plot to move the label to the lower right. -- Fix the Legend in the new Fairness Audit table to improve readability. -- Add right border to the count column. \ No newline at end of file diff --git a/changelog/113.feature.rst b/changelog/113.feature.rst deleted file mode 100644 index 1700c09b..00000000 --- a/changelog/113.feature.rst +++ /dev/null @@ -1 +0,0 @@ -add function to load example datasets diff --git a/changelog/114.bugfix.rst b/changelog/114.bugfix.rst deleted file mode 100644 index 31479b55..00000000 --- a/changelog/114.bugfix.rst +++ /dev/null @@ -1 +0,0 @@ -Remove NotebookHost class that was no longer in use. \ No newline at end of file diff --git a/changelog/77.feature.rst b/changelog/77.feature.rst deleted file mode 100644 index 770a8a52..00000000 --- a/changelog/77.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Addresses #77 by removing Aequitas and replacing with a great_tables based fairness audit. \ No newline at end of file diff --git a/changelog/86.feature.rst b/changelog/86.feature.rst deleted file mode 100644 index 860b12e1..00000000 --- a/changelog/86.feature.rst +++ /dev/null @@ -1 +0,0 @@ -Added ExploreBinaryModelMetrics to see plots of individual metrics, including number needed to treat. \ No newline at end of file diff --git a/docs/release_notes/index.rst b/docs/release_notes/index.rst index c75df457..dff6d72c 100644 --- a/docs/release_notes/index.rst +++ b/docs/release_notes/index.rst @@ -9,6 +9,34 @@ Breaking changes may occur between minor versions prior to the v1 release; after .. towncrier release notes start +0.3.0 +------ + +Features +~~~~~~~~ + +- Addresses #77 by removing Aequitas and replacing with a great_tables based fairness audit. (`#77 `__) +- Added ExploreBinaryModelMetrics to see plots of individual metrics, including number needed to treat. (`#86 `__) +- Reorganize methods making initial import and public api more standard (`#102 `__) +- Includes confusion matrix rates into Binary Fairness metrics (`#108 `__) +- Renames `Flagged` to `Flag Rate` for clarity (`#108 `__) +- Add function to load example datasets (`#113 `__) + + +Bugfixes +~~~~~~~~ + +- Remove remaining references to -1 'invalidation'; validate directly on time comparison when needed (`#100 `__) +- Fixes scaling issue for binary classfier scores that use the range 0-100 rather than 0-1. (`#101 `__) +- Fixes a few minor ux issues. (`#109 `__) + + - Disable dropdowns with only one valid option. + - Fix the Sensitivity/Specificity/PPV plot to move the label to the lower right. + - Fix the Legend in the new Fairness Audit table to improve readability. + - Add right border to the count column. +- Remove NotebookHost class that was no longer in use. (`#114 `__) + + 0.2.2 ----- From ad874cb91a5578c41ab7c9945dec5fcd0b20e971 Mon Sep 17 00:00:00 2001 From: diehlbw Date: Wed, 4 Dec 2024 22:07:08 +0000 Subject: [PATCH 4/7] add tests for 3.12 --- .github/workflows/ci.yml | 18 +++++++++--------- setup.cfg | 2 +- 2 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index fea3d2e4..2809e738 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1,6 +1,6 @@ name: CI -on: +on: push: {} pull_request: {} workflow_dispatch: {} @@ -21,7 +21,7 @@ jobs: python-version: "3.10" - name: Install pre-commit run: | - pip install --upgrade pip + pip install --upgrade pip pip install pre-commit - name: Run pre-commit steps run: | @@ -39,7 +39,7 @@ jobs: python-version: "3.10" - name: Install dependencies run: | - pip install --upgrade pip + pip install --upgrade pip pip install -e .[dev] - name: Run tests run: | @@ -62,8 +62,8 @@ jobs: test-packaging: name: Test packaging runs-on: ubuntu-latest - - steps: + + steps: - name: Checkout uses: actions/checkout@v4 - name: Setup Python @@ -72,7 +72,7 @@ jobs: python-version: "3.10" - name: Install dependencies run: | - pip install --upgrade pip + pip install --upgrade pip pip install build twine - name: Build package run: | @@ -84,14 +84,14 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11"] + python-version: ["3.10", "3.11", "3.12"] os: - ubuntu-latest - windows-latest - macos-latest include: - experimental: false - + name: Test python-${{ matrix.python-version }} on ${{ matrix.os }} runs-on: ${{ matrix.os }} continue-on-error: ${{ matrix.experimental }} @@ -104,7 +104,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - pip install --upgrade pip + pip install --upgrade pip pip install -e .[dev] - name: Run tests run: | diff --git a/setup.cfg b/setup.cfg index 417b8903..2f2b18b2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -37,7 +37,7 @@ install_requires = pydantic>=2.6.3,<3 ydata-profiling>=4.8.3,<5 great-tables>=0.11.0,<1 - numba<0.59 + numba<0.60.0 [options.packages.find] where = src From 457f625c212df10bcbb3239a907b51e0d3b6532e Mon Sep 17 00:00:00 2001 From: diehlbw Date: Wed, 4 Dec 2024 13:53:45 +0000 Subject: [PATCH 5/7] add 12&13 to workflow --- .github/workflows/ci.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 2809e738..3e1f0702 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -84,7 +84,7 @@ jobs: strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12"] + python-version: ["3.10", "3.11", "3.12", "3.13"] os: - ubuntu-latest - windows-latest From 297082e7fc4f71454e610b6f1abc5e171cd2e4c9 Mon Sep 17 00:00:00 2001 From: diehlbw Date: Thu, 5 Dec 2024 12:59:21 +0000 Subject: [PATCH 6/7] weaken pinnings --- setup.cfg | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/setup.cfg b/setup.cfg index 2f2b18b2..687262d0 100644 --- a/setup.cfg +++ b/setup.cfg @@ -25,7 +25,7 @@ packages = find_namespace: include_package_data = True install_requires = - numpy>=1.26,<2 # >1.26 for python 3.12+ + numpy>=1.26,<3 ipython>=8.14 ipywidgets>=8.0 jupyterlab>=4.2.5,<5 @@ -37,7 +37,6 @@ install_requires = pydantic>=2.6.3,<3 ydata-profiling>=4.8.3,<5 great-tables>=0.11.0,<1 - numba<0.60.0 [options.packages.find] where = src From 214cf0c09444299ba3544fb8cad5234da56c9dc8 Mon Sep 17 00:00:00 2001 From: diehlbw Date: Thu, 5 Dec 2024 13:03:35 +0000 Subject: [PATCH 7/7] nan is lowercase --- src/seismometer/data/filter.py | 4 ++-- src/seismometer/data/performance.py | 2 +- tests/data/test_performance.py | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/seismometer/data/filter.py b/src/seismometer/data/filter.py index fce2d07c..bb0b5405 100644 --- a/src/seismometer/data/filter.py +++ b/src/seismometer/data/filter.py @@ -272,14 +272,14 @@ def notin(cls, column, values) -> "FilterRule": @classmethod def isna(cls, column) -> "FilterRule": """ - FilterRule where the column contains a na value (np.NaN or None). + FilterRule where the column contains a na value (np.nan or None). """ return cls(column, "isna") @classmethod def notna(cls, column) -> "FilterRule": """ - FilterRule where the column does not contain a na value (np.NaN or None). + FilterRule where the column does not contain a na value (np.nan or None). """ return cls(column, "notna") diff --git a/src/seismometer/data/performance.py b/src/seismometer/data/performance.py index 02dd3b25..546b9405 100644 --- a/src/seismometer/data/performance.py +++ b/src/seismometer/data/performance.py @@ -77,7 +77,7 @@ def __call__(self, dataframe: pd.DataFrame, metric_names: list[str] = None, **kw raise ValueError(f"Invalid metric names: {set(metric_names) - set(self.metric_names)}") if len(dataframe) == 0: # Universal defaults, if no data frame, return NaN - return {name: np.NaN for name in metric_names} + return {name: np.nan for name in metric_names} full_metrics = self.delegate_call(dataframe, metric_names, **kwargs) filtered_metrics = {k: v for k, v in full_metrics.items() if k in metric_names} return filtered_metrics diff --git a/tests/data/test_performance.py b/tests/data/test_performance.py index 5311d6c4..c8a7c99c 100644 --- a/tests/data/test_performance.py +++ b/tests/data/test_performance.py @@ -211,7 +211,7 @@ def test_generate_metrics_init_correctly(self): def test_generate_metrics_empty_dataframe(self): metric = undertest.MetricGenerator(["test_metric"], lambda data, names: {"test_metric": 1}) assert metric.metric_names == ["test_metric"] - assert metric(pd.DataFrame()) == {"test_metric": np.NaN} + assert metric(pd.DataFrame()) == {"test_metric": np.nan} def test_generate_named_metrics(self): metric = undertest.MetricGenerator(["metric1", "metric2"], lambda data, names: {name: 1 for name in names})