From f0df88eb3e424be1ba55791bfb8f05534bfd011d Mon Sep 17 00:00:00 2001
From: "google-labs-jules[bot]"
 <161369871+google-labs-jules[bot]@users.noreply.github.com>
Date: Wed, 24 Dec 2025 10:20:04 +0000
Subject: [PATCH] feat: Improve documentation and add getting started tutorial

This commit significantly improves the project's documentation by:

-   **Improving Docstrings:** Added detailed, NumPy-style docstrings to all public functions, which will automatically populate the API reference.
-   **Creating a "Getting Started" Tutorial:** Added a new tutorial to `docs/tutorials/getting_started.qmd` to guide new users through a basic workflow.
-   **Updating the Documentation Website:** Configured the `quartodoc` website with a new landing page, a sidebar for navigation, and a dedicated "Tutorials" section.
-   **Enhancing the README:** Updated the `README.md` with a project description, key features, and a link to the full documentation website.

All tests pass, and the documentation website renders correctly.
---
 README.md                                     |  24 +++-
 docs/_quarto.yml                              |  28 ++---
 docs/index.qmd                                |  13 ++
 docs/tutorials/getting_started.qmd            |  62 +++++++++
 src/rtichoke/discrimination/gains.py          |  91 ++++++++------
 src/rtichoke/discrimination/lift.py           |  91 ++++++++------
 .../discrimination/precision_recall.py        |  92 ++++++++------
 src/rtichoke/discrimination/roc.py            |  99 +++++++++------
 .../performance_data/performance_data.py      |  75 ++++++-----
 .../performance_data_times.py                 |  88 +++++++------
 src/rtichoke/utility/decision.py              | 118 +++++++++++-------
 11 files changed, 509 insertions(+), 272 deletions(-)
 create mode 100644 docs/index.qmd
 create mode 100644 docs/tutorials/getting_started.qmd

diff --git a/README.md b/README.md
index 43d65fd..32d2fd1 100644
--- a/README.md
+++ b/README.md
@@ -1 +1,23 @@
-# rtichoke_python
+# rtichoke
+
+`rtichoke` is a Python library for visualizing the performance of predictive models. It provides a flexible and intuitive way to create a variety of common evaluation plots, including:
+
+*   **ROC Curves**
+*   **Precision-Recall Curves**
+*   **Gains and Lift Charts**
+*   **Decision Curves**
+
+The library is designed to be easy to use, while still offering a high degree of control over the final plots.
+
+## Key Features
+
+*   **Simple API**: Create complex visualizations with just a few lines of code.
+*   **Time-to-Event Analysis**: Native support for models with time-dependent outcomes, including censoring and competing risks.
+*   **Interactive Plots**: Built on Plotly for interactive, publication-quality figures.
+*   **Flexible Data Handling**: Works seamlessly with NumPy and Polars.
+
+## Documentation
+
+For a complete guide to the library, including a "Getting Started" tutorial and a full API reference, please see the **[official documentation](https://your-documentation-url.com)**.
+
+*(Note: The documentation URL will need to be updated once the website is deployed.)*
diff --git a/docs/_quarto.yml b/docs/_quarto.yml
index bd9da8e..82c745e 100644
--- a/docs/_quarto.yml
+++ b/docs/_quarto.yml
@@ -1,30 +1,30 @@
 project:
   type: website
 
-metadata-files:
-  - _sidebar.yml
-
 website:
   title: "rtichoke"
-  navbar:
-    left:
-      - href: reference/
-        text: Reference
+  sidebar:
+    - id: user-guide
+      title: "User Guide"
+      style: "docked"
+      contents:
+        - text: "Getting Started"
+          href: tutorials/getting_started.qmd
+    - id: api-reference
+      title: "API Reference"
+      style: "docked"
+      contents:
+        - href: reference/index.qmd
+          text: "Reference"
 
 quartodoc:
-  # the name used to import the package you want to create reference docs for
   package: rtichoke
-  sidebar: "_sidebar.yml"
   sections:
     - title: Performance Data
       desc: Functions for creating performance data.
       contents:
         - prepare_performance_data
         - prepare_performance_data_times
-    # - title: Calibration
-      # desc: Functions for Calibration.
-      # contents:
-        # - create_calibration_curve
     - title: Discrimination
       desc: Functions for Discrimination.
       contents:
@@ -40,4 +40,4 @@ quartodoc:
       desc: Functions for Utility.
       contents:
         - create_decision_curve
-        - plot_decision_curve
\ No newline at end of file
+        - plot_decision_curve
diff --git a/docs/index.qmd b/docs/index.qmd
new file mode 100644
index 0000000..ad09712
--- /dev/null
+++ b/docs/index.qmd
@@ -0,0 +1,13 @@
+---
+title: "rtichoke Documentation"
+---
+
+Welcome to the official documentation for `rtichoke`, a Python library for visualizing the performance of predictive models.
+
+## Getting Started
+
+If you're new to `rtichoke`, the best place to start is the **[Getting Started Tutorial](./tutorials/getting_started.qmd)**. It will walk you through the basics of installing the library, preparing your data, and creating your first plot.
+
+## API Reference
+
+For detailed information on the functions and classes provided by `rtichoke`, please refer to the **[API Reference](./reference/index.qmd)**.
diff --git a/docs/tutorials/getting_started.qmd b/docs/tutorials/getting_started.qmd
new file mode 100644
index 0000000..86e9d51
--- /dev/null
+++ b/docs/tutorials/getting_started.qmd
@@ -0,0 +1,62 @@
+---
+title: "Getting Started with Rtichoke"
+---
+
+This tutorial provides a basic introduction to the `rtichoke` library. We'll walk through the process of preparing data, creating a decision curve, and visualizing the results.
+
+## 1. Import Libraries
+
+First, let's import the necessary libraries. We'll need `numpy` for data manipulation and `rtichoke` for the core functionality.
+
+```python
+import numpy as np
+import rtichoke as rk
+```
+
+## 2. Prepare Your Data
+
+`rtichoke` expects data in a specific format. You'll need two main components:
+
+*   **Probabilities (`probs`)**: A dictionary where keys are model names and values are NumPy arrays of predicted probabilities.
+*   **Real Outcomes (`reals`)**: A NumPy array containing the true binary outcomes (0 or 1).
+
+Let's create some sample data for two different models:
+
+```python
+# Sample data from the dcurves_example.py script
+probs_dict = {
+    "Marker": np.array([
+        0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5,
+        0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9
+    ]),
+    "Marker2": np.array([
+        0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5,
+        0.6, 0.7, 0.8, 0.9, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9
+    ])
+}
+reals = np.array([
+    1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1
+])
+```
+
+## 3. Create a Decision Curve
+
+Now that we have our data, we can create a decision curve. This is a simple one-liner with `rtichoke`:
+
+```python
+fig = rk.create_decision_curve(
+    probs=probs_dict,
+    reals=reals,
+)
+```
+
+## 4. Show the Plot
+
+Finally, let's display the plot. Since `rtichoke` uses Plotly under the hood, you can show the figure just like any other Plotly object.
+
+```python
+# To display the plot in an interactive environment (like a Jupyter notebook)
+fig.show()
+```
+
+And that's it! You've created your first decision curve with `rtichoke`. From here, you can explore the other curve types and options that the library has to offer.
diff --git a/src/rtichoke/discrimination/gains.py b/src/rtichoke/discrimination/gains.py
index 8cf7c8d..06fea38 100644
--- a/src/rtichoke/discrimination/gains.py
+++ b/src/rtichoke/discrimination/gains.py
@@ -42,39 +42,33 @@ def create_gains_curve(
         "#585123",
     ],
 ) -> Figure:
-    """Create Gains Curve.
+    """Creates a Gains curve.
+
+    A Gains curve is a marketing and business analytics tool that evaluates
+    the performance of a predictive model. It shows the percentage of
+    positive outcomes (the "gain") that can be captured by targeting a
+    certain percentage of the population, sorted by predicted probability.
 
     Parameters
     ----------
     probs : Dict[str, np.ndarray]
-        Dictionary mapping a label or group name to an array of predicted
-        probabilities for the positive class.
+        A dictionary mapping model or dataset names to 1-D numpy arrays of
+        predicted probabilities.
     reals : Union[np.ndarray, Dict[str, np.ndarray]]
-        Ground-truth binary labels (0/1) as a single array, or a dictionary
-        mapping the same label/group keys used in ``probs`` to arrays of
-        ground-truth labels.
+        The true binary labels (0 or 1).
     by : float, optional
-        Resolution for probability thresholds when computing the curve
-        (step size). Default is 0.01.
+        The step size for the probability thresholds. Defaults to 0.01.
     stratified_by : Sequence[str], optional
-        Sequence of column names to stratify the performance data by.
-        Default is ["probability_threshold"].
+        Variables for stratification. Defaults to ``["probability_threshold"]``.
     size : int, optional
-        Plot size in pixels (width and height). Default is 600.
+        The width and height of the plot in pixels. Defaults to 600.
     color_values : List[str], optional
-        List of color hex strings to use for the plotted lines. If not
-        provided, a default palette is used.
+        A list of hex color strings for the plot lines.
 
     Returns
     -------
     Figure
-        A Plotly ``Figure`` containing the Gains curve(s).
-
-    Notes
-    -----
-    The function delegates computation and plotting to
-    ``_create_rtichoke_plotly_curve_binary`` and returns the resulting
-    Plotly figure.
+        A Plotly ``Figure`` object representing the Gains curve.
     """
     fig = _create_rtichoke_plotly_curve_binary(
         probs,
@@ -93,30 +87,27 @@ def plot_gains_curve(
     stratified_by: Sequence[str] = ["probability_threshold"],
     size: int = 600,
 ) -> Figure:
-    """Plot Gains curve from performance data.
+    """Plots a Gains curve from pre-computed performance data.
+
+    This function is useful for plotting a Gains curve directly from a
+    DataFrame that already contains the necessary performance metrics.
 
     Parameters
     ----------
     performance_data : pl.DataFrame
-        A Polars DataFrame containing performance metrics for the Gains curve.
-        Expected columns include (but may not be limited to)
-        ``probability_threshold`` and gains-related metrics, plus any
-        stratification columns.
+        A Polars DataFrame with performance metrics. It must include columns
+        for the percentage of the population targeted and the corresponding
+        gain, along with any stratification variables.
     stratified_by : Sequence[str], optional
-        Sequence of column names used for stratification in the
-        ``performance_data``. Default is ["probability_threshold"].
+        The columns in `performance_data` used for stratification. Defaults to
+        ``["probability_threshold"]``.
     size : int, optional
-        Plot size in pixels (width and height). Default is 600.
+        The width and height of the plot in pixels. Defaults to 600.
 
     Returns
     -------
     Figure
-        A Plotly ``Figure`` containing the Gains plot.
-
-    Notes
-    -----
-    This function wraps ``_plot_rtichoke_curve_binary`` to produce a
-    ready-to-render Plotly figure from precomputed performance data.
+        A Plotly ``Figure`` object representing the Gains curve.
     """
     fig = _plot_rtichoke_curve_binary(
         performance_data,
@@ -163,7 +154,37 @@ def create_gains_curve_times(
         "#585123",
     ],
 ) -> Figure:
-    """Create time-dependent Lift Curve."""
+    """Creates a time-dependent Gains curve.
+
+    Generates a Gains curve for time-to-event models, which is evaluated at
+    specified time horizons and handles censored data and competing risks.
+
+    Parameters
+    ----------
+    probs : Dict[str, np.ndarray]
+        A dictionary of predicted probabilities.
+    reals : Union[np.ndarray, Dict[str, np.ndarray]]
+        The true event statuses.
+    times : Union[np.ndarray, Dict[str, np.ndarray]]
+        The event or censoring times.
+    fixed_time_horizons : list[float]
+        A list of time points for performance evaluation.
+    heuristics_sets : list[Dict], optional
+        Specifies how to handle censored data and competing events.
+    by : float, optional
+        The step size for probability thresholds. Defaults to 0.01.
+    stratified_by : Sequence[str], optional
+        Variables for stratification. Defaults to ``["probability_threshold"]``.
+    size : int, optional
+        The width and height of the plot in pixels. Defaults to 600.
+    color_values : List[str], optional
+        A list of hex color strings for the plot lines.
+
+    Returns
+    -------
+    Figure
+        A Plotly ``Figure`` object for the time-dependent Gains curve.
+    """
 
     fig = _create_rtichoke_plotly_curve_times(
         probs,
diff --git a/src/rtichoke/discrimination/lift.py b/src/rtichoke/discrimination/lift.py
index e3e394c..a8460b1 100644
--- a/src/rtichoke/discrimination/lift.py
+++ b/src/rtichoke/discrimination/lift.py
@@ -42,39 +42,33 @@ def create_lift_curve(
         "#585123",
     ],
 ) -> Figure:
-    """Create Lift Curve.
+    """Creates a Lift curve.
+
+    A Lift curve is a visual tool used to evaluate the performance of a
+    classification model. It shows how much better the model is at identifying
+    positive outcomes compared to a random guess. The "lift" is the ratio of
+    the results obtained with the model to the results from a random selection.
 
     Parameters
     ----------
     probs : Dict[str, np.ndarray]
-        Dictionary mapping a label or group name to an array of predicted
-        probabilities for the positive class.
+        A dictionary mapping model or dataset names to 1-D numpy arrays of
+        predicted probabilities.
     reals : Union[np.ndarray, Dict[str, np.ndarray]]
-        Ground-truth binary labels (0/1) as a single array, or a dictionary
-        mapping the same label/group keys used in ``probs`` to arrays of
-        ground-truth labels.
+        The true binary labels (0 or 1).
     by : float, optional
-        Resolution for probability thresholds when computing the curve
-        (step size). Default is 0.01.
+        The step size for the probability thresholds. Defaults to 0.01.
     stratified_by : Sequence[str], optional
-        Sequence of column names to stratify the performance data by.
-        Default is ["probability_threshold"].
+        Variables for stratification. Defaults to ``["probability_threshold"]``.
     size : int, optional
-        Plot size in pixels (width and height). Default is 600.
+        The width and height of the plot in pixels. Defaults to 600.
     color_values : List[str], optional
-        List of color hex strings to use for the plotted lines. If not
-        provided, a default palette is used.
+        A list of hex color strings for the plot lines.
 
     Returns
     -------
     Figure
-        A Plotly ``Figure`` containing the Lift curve(s).
-
-    Notes
-    -----
-    The function delegates computation and plotting to
-    ``_create_rtichoke_plotly_curve_binary`` and returns the resulting
-    Plotly figure.
+        A Plotly ``Figure`` object representing the Lift curve.
     """
     fig = _create_rtichoke_plotly_curve_binary(
         probs,
@@ -93,30 +87,27 @@ def plot_lift_curve(
     stratified_by: Sequence[str] = ["probability_threshold"],
     size: int = 600,
 ) -> Figure:
-    """Plot Lift curve from performance data.
+    """Plots a Lift curve from pre-computed performance data.
+
+    This function is useful for plotting a Lift curve directly from a
+    DataFrame that already contains the necessary performance metrics.
 
     Parameters
     ----------
     performance_data : pl.DataFrame
-        A Polars DataFrame containing performance metrics for the Lift curve.
-        Expected columns include (but may not be limited to)
-        ``probability_threshold`` and lift-related metrics, plus any
-        stratification columns.
+        A Polars DataFrame with performance metrics. It must include columns
+        for the lift values and the percentage of the population targeted,
+        along with any stratification variables.
     stratified_by : Sequence[str], optional
-        Sequence of column names used for stratification in the
-        ``performance_data``. Default is ["probability_threshold"].
+        The columns in `performance_data` used for stratification. Defaults to
+        ``["probability_threshold"]``.
     size : int, optional
-        Plot size in pixels (width and height). Default is 600.
+        The width and height of the plot in pixels. Defaults to 600.
 
     Returns
     -------
     Figure
-        A Plotly ``Figure`` containing the Lift plot.
-
-    Notes
-    -----
-    This function wraps ``_plot_rtichoke_curve_binary`` to produce a
-    ready-to-render Plotly figure from precomputed performance data.
+        A Plotly ``Figure`` object representing the Lift curve.
     """
     fig = _plot_rtichoke_curve_binary(
         performance_data,
@@ -163,7 +154,37 @@ def create_lift_curve_times(
         "#585123",
     ],
 ) -> Figure:
-    """Create time-dependent Lift Curve."""
+    """Creates a time-dependent Lift curve.
+
+    Generates a Lift curve for time-to-event models, which is evaluated at
+    specified time horizons and handles censored data and competing risks.
+
+    Parameters
+    ----------
+    probs : Dict[str, np.ndarray]
+        A dictionary of predicted probabilities.
+    reals : Union[np.ndarray, Dict[str, np.ndarray]]
+        The true event statuses.
+    times : Union[np.ndarray, Dict[str, np.ndarray]]
+        The event or censoring times.
+    fixed_time_horizons : list[float]
+        A list of time points for performance evaluation.
+    heuristics_sets : list[Dict], optional
+        Specifies how to handle censored data and competing events.
+    by : float, optional
+        The step size for probability thresholds. Defaults to 0.01.
+    stratified_by : Sequence[str], optional
+        Variables for stratification. Defaults to ``["probability_threshold"]``.
+    size : int, optional
+        The width and height of the plot in pixels. Defaults to 600.
+    color_values : List[str], optional
+        A list of hex color strings for the plot lines.
+
+    Returns
+    -------
+    Figure
+        A Plotly ``Figure`` object for the time-dependent Lift curve.
+    """
 
     fig = _create_rtichoke_plotly_curve_times(
         probs,
diff --git a/src/rtichoke/discrimination/precision_recall.py b/src/rtichoke/discrimination/precision_recall.py
index 1a3d7a0..7770cbd 100644
--- a/src/rtichoke/discrimination/precision_recall.py
+++ b/src/rtichoke/discrimination/precision_recall.py
@@ -42,39 +42,34 @@ def create_precision_recall_curve(
         "#585123",
     ],
 ) -> Figure:
-    """Create Precision-Recall Curve.
+    """Creates a Precision-Recall curve.
+
+    This function generates a Precision-Recall curve, which is a common
+    alternative to the ROC curve, particularly for imbalanced datasets. It
+    plots precision (Positive Predictive Value) against recall (True Positive
+    Rate) for a binary classifier at different probability thresholds.
 
     Parameters
     ----------
     probs : Dict[str, np.ndarray]
-        Dictionary mapping a label or group name to an array of predicted
-        probabilities for the positive class.
+        A dictionary mapping model or dataset names to 1-D numpy arrays of
+        predicted probabilities.
     reals : Union[np.ndarray, Dict[str, np.ndarray]]
-        Ground-truth binary labels (0/1) as a single array, or a dictionary
-        mapping the same label/group keys used in ``probs`` to arrays of
-        ground-truth labels.
+        The true binary labels (0 or 1). Can be a single array or a dictionary
+        mapping names to label arrays.
     by : float, optional
-        Resolution for probability thresholds when computing the curve
-        (step size). Default is 0.01.
+        The step size for the probability thresholds. Defaults to 0.01.
     stratified_by : Sequence[str], optional
-        Sequence of column names to stratify the performance data by.
-        Default is ["probability_threshold"].
+        Variables for stratification. Defaults to ``["probability_threshold"]``.
     size : int, optional
-        Plot size in pixels (width and height). Default is 600.
+        The width and height of the plot in pixels. Defaults to 600.
     color_values : List[str], optional
-        List of color hex strings to use for the plotted lines. If not
-        provided, a default palette is used.
+        A list of hex color strings for the plot lines.
 
     Returns
     -------
     Figure
-        A Plotly ``Figure`` containing the Precision-Recall curve(s).
-
-    Notes
-    -----
-    The function delegates computation and plotting to
-    ``_create_rtichoke_plotly_curve_binary`` and returns the resulting
-    Plotly figure.
+        A Plotly ``Figure`` object representing the Precision-Recall curve.
     """
     fig = _create_rtichoke_plotly_curve_binary(
         probs,
@@ -93,30 +88,26 @@ def plot_precision_recall_curve(
     stratified_by: Sequence[str] = ["probability_threshold"],
     size: int = 600,
 ) -> Figure:
-    """Plot Precision-Recall curve from performance data.
+    """Plots a Precision-Recall curve from pre-computed performance data.
+
+    This function is useful when you have already computed the performance
+    metrics and want to generate a Precision-Recall plot directly.
 
     Parameters
     ----------
     performance_data : pl.DataFrame
-        A Polars DataFrame containing performance metrics for the
-        Precision-Recall curve. Expected columns include (but may not be
-        limited to) ``probability_threshold``, precision and recall values,
-        plus any stratification columns.
+        A Polars DataFrame with the necessary performance metrics, including
+        precision (ppv) and recall (tpr), along with any stratification variables.
     stratified_by : Sequence[str], optional
-        Sequence of column names used for stratification in the
-        ``performance_data``. Default is ["probability_threshold"].
+        The columns in `performance_data` used for stratification. Defaults to
+        ``["probability_threshold"]``.
     size : int, optional
-        Plot size in pixels (width and height). Default is 600.
+        The width and height of the plot in pixels. Defaults to 600.
 
     Returns
     -------
     Figure
-        A Plotly ``Figure`` containing the Precision-Recall plot.
-
-    Notes
-    -----
-    This function wraps ``_plot_rtichoke_curve_binary`` to produce a
-    ready-to-render Plotly figure from precomputed performance data.
+        A Plotly ``Figure`` object representing the Precision-Recall curve.
     """
     fig = _plot_rtichoke_curve_binary(
         performance_data,
@@ -163,7 +154,38 @@ def create_precision_recall_curve_times(
         "#585123",
     ],
 ) -> Figure:
-    """Create time-dependent Lift Curve."""
+    """Creates a time-dependent Precision-Recall curve.
+
+    Generates a Precision-Recall curve for time-to-event models, evaluating
+    performance at specified time horizons. It handles censored data and
+    competing risks based on the provided heuristics.
+
+    Parameters
+    ----------
+    probs : Dict[str, np.ndarray]
+        A dictionary of predicted probabilities.
+    reals : Union[np.ndarray, Dict[str, np.ndarray]]
+        The true event statuses.
+    times : Union[np.ndarray, Dict[str, np.ndarray]]
+        The event or censoring times.
+    fixed_time_horizons : list[float]
+        A list of time points for performance evaluation.
+    heuristics_sets : list[Dict], optional
+        Specifies how to handle censored data and competing events.
+    by : float, optional
+        The step size for probability thresholds. Defaults to 0.01.
+    stratified_by : Sequence[str], optional
+        Variables for stratification. Defaults to ``["probability_threshold"]``.
+    size : int, optional
+        The width and height of the plot in pixels. Defaults to 600.
+    color_values : List[str], optional
+        A list of hex color strings for the plot lines.
+
+    Returns
+    -------
+    Figure
+        A Plotly ``Figure`` object for the time-dependent Precision-Recall curve.
+    """
 
     fig = _create_rtichoke_plotly_curve_times(
         probs,
diff --git a/src/rtichoke/discrimination/roc.py b/src/rtichoke/discrimination/roc.py
index 9bcc653..ba663a2 100644
--- a/src/rtichoke/discrimination/roc.py
+++ b/src/rtichoke/discrimination/roc.py
@@ -42,39 +42,39 @@ def create_roc_curve(
         "#585123",
     ],
 ) -> Figure:
-    """Create ROC Curve.
+    """Creates a Receiver Operating Characteristic (ROC) curve.
+
+    This function generates an ROC curve, which visualizes the diagnostic
+    ability of a binary classifier system as its discrimination threshold is
+    varied. The curve plots the True Positive Rate (TPR) against the
+    False Positive Rate (FPR) at various threshold settings.
+
+    It first calculates the performance data using the provided probabilities
+    and true labels, and then generates the plot.
 
     Parameters
     ----------
     probs : Dict[str, np.ndarray]
-        Dictionary mapping a label or group name to an array of predicted
-        probabilities for the positive class.
+        A dictionary mapping model or dataset names to 1-D numpy arrays of
+        predicted probabilities.
     reals : Union[np.ndarray, Dict[str, np.ndarray]]
-        Ground-truth binary labels (0/1) as a single array, or a dictionary
-        mapping the same label/group keys used in ``probs`` to arrays of
-        ground-truth labels.
+        The true binary labels (0 or 1). Can be a single array for all
+        probabilities or a dictionary mapping names to label arrays.
     by : float, optional
-        Resolution for probability thresholds when computing the curve
-        (step size). Default is 0.01.
+        The step size for the probability thresholds, controlling the curve's
+        granularity. Defaults to 0.01.
     stratified_by : Sequence[str], optional
-        Sequence of column names to stratify the performance data by.
-        Default is ["probability_threshold"].
+        Variables for stratification. Defaults to ``["probability_threshold"]``.
     size : int, optional
-        Plot size in pixels (width and height). Default is 600.
+        The width and height of the plot in pixels. Defaults to 600.
     color_values : List[str], optional
-        List of color hex strings to use for the plotted lines. If not
-        provided, a default palette is used.
+        A list of hex color strings for the plot lines. A default palette is
+        used if not provided.
 
     Returns
     -------
     Figure
-        A Plotly ``Figure`` containing the ROC curve(s).
-
-    Notes
-    -----
-    The function delegates computation and plotting to
-    ``_create_rtichoke_plotly_curve_binary`` and returns the resulting
-    Plotly figure.
+        A Plotly ``Figure`` object representing the ROC curve.
     """
     fig = _create_rtichoke_plotly_curve_binary(
         probs,
@@ -93,30 +93,28 @@ def plot_roc_curve(
     stratified_by: Sequence[str] = ["probability_threshold"],
     size: int = 600,
 ) -> Figure:
-    """Plot ROC curve from performance data.
+    """Plots an ROC curve from pre-computed performance data.
+
+    This function is useful when you have already computed the performance
+    metrics (TPR, FPR, etc.) and want to generate an ROC plot directly from
+    that data.
 
     Parameters
     ----------
     performance_data : pl.DataFrame
-        A Polars DataFrame containing performance metrics for the ROC curve.
-        Expected columns include (but may not be limited to) ``probability_threshold``,
-        true positive rate (TPR) and false positive rate (FPR), plus any
-        stratification columns.
+        A Polars DataFrame containing the necessary performance metrics. It must
+        include columns for the true positive rate (tpr) and false positive
+        rate (fpr), along with any stratification variables.
     stratified_by : Sequence[str], optional
-        Sequence of column names used for stratification in the
-        ``performance_data``. Default is ["probability_threshold"].
+        The columns in `performance_data` used for stratification. Defaults to
+        ``["probability_threshold"]``.
     size : int, optional
-        Plot size in pixels (width and height). Default is 600.
+        The width and height of the plot in pixels. Defaults to 600.
 
     Returns
     -------
     Figure
-        A Plotly ``Figure`` containing the ROC plot.
-
-    Notes
-    -----
-    This function wraps ``_plot_rtichoke_curve_binary`` to produce a
-    ready-to-render Plotly figure from precomputed performance data.
+        A Plotly ``Figure`` object representing the ROC curve.
     """
     fig = _plot_rtichoke_curve_binary(
         performance_data,
@@ -164,7 +162,38 @@ def create_roc_curve_times(
         "#585123",
     ],
 ) -> Figure:
-    """Create time-dependent Lift Curve."""
+    """Creates a time-dependent Receiver Operating Characteristic (ROC) curve.
+
+    This function generates an ROC curve for time-to-event models. It evaluates
+    the model's performance at specified time horizons, handling censored data
+    and competing risks according to the chosen heuristics.
+
+    Parameters
+    ----------
+    probs : Dict[str, np.ndarray]
+        A dictionary of predicted probabilities.
+    reals : Union[np.ndarray, Dict[str, np.ndarray]]
+        The true event statuses (e.g., 0=censored, 1=event, 2=competing).
+    times : Union[np.ndarray, Dict[str, np.ndarray]]
+        The event or censoring times.
+    fixed_time_horizons : list[float]
+        A list of time points for performance evaluation.
+    heuristics_sets : list[Dict], optional
+        Specifies how to handle censored data and competing events.
+    by : float, optional
+        The step size for probability thresholds. Defaults to 0.01.
+    stratified_by : Sequence[str], optional
+        Variables for stratification. Defaults to ``["probability_threshold"]``.
+    size : int, optional
+        The width and height of the plot in pixels. Defaults to 600.
+    color_values : List[str], optional
+        A list of hex color strings for the plot lines.
+
+    Returns
+    -------
+    Figure
+        A Plotly ``Figure`` object representing the time-dependent ROC curve.
+    """
 
     fig = _create_rtichoke_plotly_curve_times(
         probs,
diff --git a/src/rtichoke/performance_data/performance_data.py b/src/rtichoke/performance_data/performance_data.py
index 8fa2d30..fff255b 100644
--- a/src/rtichoke/performance_data/performance_data.py
+++ b/src/rtichoke/performance_data/performance_data.py
@@ -28,31 +28,37 @@ def prepare_binned_classification_data(
     """
     Prepare probability-binned classification data for binary outcomes.
 
-    This constructs the underlying, binned data across probability thresholds
-    (and any additional stratification variables). It returns the adjusted data
-    before cumulative Aalen–Johansen and performance computations.
+    This function serves as the foundation for many of the performance analysis
+    visualizations. It takes predicted probabilities and true binary outcomes,
+    bins them by probability thresholds, and calculates the number of true
+    positives, false positives, true negatives, and false negatives within each
+    bin. This detailed, binned data can then be used to generate calibration
+    plots or be aggregated to compute various performance metrics.
 
     Parameters
     ----------
     probs : Dict[str, np.ndarray]
-        Mapping from dataset name to predicted probabilities (1-D numpy arrays).
+        A dictionary mapping model or dataset names (str) to their predicted
+        probabilities (1-D numpy arrays).
     reals : Union[np.ndarray, Dict[str, np.ndarray]]
-        True event labels. Can be a single array aligned to pooled probabilities
-        or a dictionary mapping each dataset name to its true-label array. Labels
-        are expected to be binary integers (0/1).
+        The true event labels. This can be a single numpy array that is aligned
+        with all pooled probabilities or a dictionary mapping each dataset name
+        to its corresponding array of true labels. Labels must be binary (0 or 1).
     stratified_by : Sequence[str], optional
-        Stratification variables used to create combinations/breaks. Defaults to
-        ``("probability_threshold",)``.
+        A sequence of strings specifying the variables by which to stratify the
+        data. The default is ``("probability_threshold",)``, which bins the data
+        based on predicted probabilities.
     by : float, optional
-        Step width for probability-threshold breaks (used to create the grid of
-        cutoffs). Defaults to ``0.01``.
+        The step size to use when creating bins for the probability thresholds.
+        This determines the granularity of the analysis. Defaults to ``0.01``.
 
     Returns
     -------
     pl.DataFrame
-        A Polars DataFrame containing probability-binned classification data
-        (one row per combination of dataset / bin / strata). This is the basis
-        for histograms, calibration diagnostics, and performance curves.
+        A Polars DataFrame containing the binned classification data. Each row
+        represents a unique combination of model/dataset, probability bin, and
+        any other stratification variables. It forms the basis for subsequent
+        performance calculations.
     """
     breaks = create_breaks_values(None, "probability_threshold", by)
 
@@ -91,28 +97,40 @@ def prepare_performance_data(
     stratified_by: Sequence[str] = ("probability_threshold",),
     by: float = 0.01,
 ) -> pl.DataFrame:
-    """Prepare performance data for binary classification.
+    """Prepare performance data for binary classification models.
+
+    This function computes a comprehensive set of performance metrics for one
+    or more binary classification models across a range of probability
+    thresholds. It builds upon the binned data from
+    `prepare_binned_classification_data` by cumulatively summing the counts
+    and calculating metrics like sensitivity (TPR), specificity, precision
+    (PPV), and net benefit.
+
+    This resulting dataframe is the primary input for plotting functions like
+    `plot_roc_curve`, `plot_precision_recall_curve`, etc.
 
     Parameters
     ----------
     probs : Dict[str, np.ndarray]
-        Mapping from dataset name to predicted probabilities (1-D numpy arrays).
+        A dictionary mapping model or dataset names (str) to their predicted
+        probabilities (1-D numpy arrays).
     reals : Union[np.ndarray, Dict[str, np.ndarray]]
-        True event labels. Can be a single array aligned to pooled probabilities
-        or a dictionary mapping each dataset name to its true-label array. Labels
-        are expected to be binary integers (0/1).
+        The true event labels. This can be a single numpy array that is aligned
+        with all pooled probabilities or a dictionary mapping each dataset name
+        to its corresponding array of true labels. Labels must be binary (0 or 1).
     stratified_by : Sequence[str], optional
-        Stratification variables used to create combinations/breaks. Defaults to
-        ``("probability_threshold",)``.
+        A sequence of strings specifying the variables by which to stratify the
+        data. The default is ``("probability_threshold",)``.
     by : float, optional
-        Step width for probability-threshold breaks (used to create the grid of
-        cutoffs). Defaults to ``0.01``.
+        The step size for probability thresholds, determining the number of
+        points at which performance is evaluated. Defaults to ``0.01``.
 
     Returns
     -------
     pl.DataFrame
-        A Polars DataFrame containing performance metrics computed across probability
-        thresholds. Columns include the probability cutoff and performance measures.
+        A Polars DataFrame where each row corresponds to a probability cutoff
+        for a given model/dataset. Columns include the cutoff value and a rich
+        set of performance metrics (e.g., `tpr`, `fpr`, `ppv`, `net_benefit`).
 
     Examples
     --------
@@ -123,10 +141,9 @@ def prepare_performance_data(
     ...     )
     ... }
     >>> reals_dict_test = [1, 1, 1, 1, 0, 0, 1, 0, 0, 1]
-
-    >>> prepare_performance_data(
-    ...     probs_dict_test,
-    ...     reals_dict_test,
+    >>> performance_df = prepare_performance_data(
+    ...     probs=probs_dict_test,
+    ...     reals=reals_dict_test,
     ...     by=0.1
     ... )
     """
diff --git a/src/rtichoke/performance_data/performance_data_times.py b/src/rtichoke/performance_data/performance_data_times.py
index d1629b4..e8a5dec 100644
--- a/src/rtichoke/performance_data/performance_data_times.py
+++ b/src/rtichoke/performance_data/performance_data_times.py
@@ -34,39 +34,49 @@ def prepare_performance_data_times(
     stratified_by: Sequence[str] = ("probability_threshold",),
     by: float = 0.01,
 ) -> pl.DataFrame:
-    """Prepare performance data with a time dimension.
+    """Prepare performance data for models with time-to-event outcomes.
+
+    This function calculates a comprehensive set of performance metrics for
+    models predicting time-to-event outcomes. It handles censored data and
+    competing events by applying specified heuristics at different time
+    horizons. The function first bins the data using
+    `prepare_binned_classification_data_times` and then computes cumulative,
+    Aalen-Johansen-based performance metrics.
+
+    The resulting dataframe is the primary input for time-dependent plotting
+    functions.
 
     Parameters
     ----------
     probs : Dict[str, np.ndarray]
-        Mapping from dataset name to predicted probabilities (1-D numpy arrays).
+        A dictionary mapping model or dataset names (str) to their predicted
+        probabilities of an event occurring by a given time.
     reals : Union[np.ndarray, Dict[str, np.ndarray]]
-        True event labels. Can be a single array aligned to pooled probabilities
-        or a dictionary mapping each dataset name to its true-label array. Labels
-        are expected to be integers (e.g., 0/1 for binary, or competing event codes).
+        The true event statuses. Can be a single array or a dictionary.
+        Labels should be integers indicating the outcome (e.g., 0=censored,
+        1=event of interest, 2=competing event).
     times : Union[np.ndarray, Dict[str, np.ndarray]]
-        Event or censoring times corresponding to `reals`. Either a single array
-        or a dictionary keyed like `probs` when multiple datasets are provided.
+        The event or censoring times corresponding to the `reals`. Can be a
+        single array or a dictionary.
     fixed_time_horizons : list[float]
-        Fixed time horizons (same units as `times`) at which to evaluate performance.
+        A list of time points at which to evaluate the model's performance.
     heuristics_sets : list[Dict], optional
-        List of heuristic dictionaries controlling censoring/competing-event handling.
-        Default is a single heuristic set:
+        A list of dictionaries, each specifying how to handle censored data
+        and competing events. The default is
         ``[{"censoring_heuristic": "adjusted",
-            "competing_heuristic": "adjusted_as_negative"}]``
+        "competing_heuristic": "adjusted_as_negative"}]``.
     stratified_by : Sequence[str], optional
-        Stratification variables used to create combinations/breaks. Defaults to
+        Variables by which to stratify the analysis. Defaults to
         ``("probability_threshold",)``.
     by : float, optional
-        Step width for probability-threshold breaks (used to create the grid of
-        cutoffs). Defaults to ``0.01``.
+        The step size for probability thresholds. Defaults to ``0.01``.
 
     Returns
     -------
     pl.DataFrame
-        A Polars DataFrame containing performance metrics computed across probability
-        thresholds and fixed time horizons. Columns include the probability cutoff,
-        fixed time horizon, heuristic identifiers, and AJ-derived performance measures.
+        A Polars DataFrame with performance metrics computed across probability
+        thresholds and time horizons. It includes columns for cutoffs, time
+        points, heuristics, and performance measures.
     """
     # 1. Get the underlying binned time-dependent classification data
     final_adjusted_data = prepare_binned_classification_data_times(
@@ -105,44 +115,42 @@ def prepare_binned_classification_data_times(
     risk_set_scope: Sequence[str] = ["pooled_by_cutoff", "within_stratum"],
 ) -> pl.DataFrame:
     """
-    Prepare probability-binned, time-dependent classification data.
+    Prepare binned, time-dependent classification data.
 
-    This constructs the underlying, binned data across probability thresholds,
-    fixed time horizons, and heuristic sets. It returns the adjusted data
-    before the cumulative Aalen–Johansen and performance-transformation steps.
+    This function constructs the foundational binned data needed for
+    time-to-event performance analysis. It bins predictions by probability
+    thresholds, applies censoring and competing event heuristics, and stratifies
+    the data across specified time horizons. The output is a detailed breakdown
+    of outcomes within each bin, which can be used for calibration or passed to
+    `prepare_performance_data_times` for full performance metric calculation.
 
     Parameters
     ----------
     probs : Dict[str, np.ndarray]
-        Mapping from dataset name to predicted probabilities (1-D numpy arrays).
+        A dictionary mapping model or dataset names (str) to their predicted
+        probabilities.
     reals : Union[np.ndarray, Dict[str, np.ndarray]]
-        True event labels. Can be a single array aligned to pooled probabilities
-        or a dictionary mapping each dataset name to its true-label array. Labels
-        are expected to be integers (e.g., 0/1 for binary, or competing event codes).
+        The true event statuses (e.g., 0=censored, 1=event, 2=competing).
     times : Union[np.ndarray, Dict[str, np.ndarray]]
-        Event or censoring times corresponding to `reals`. Either a single array
-        or a dictionary keyed like `probs` when multiple datasets are provided.
+        The event or censoring times.
     fixed_time_horizons : list[float]
-        Fixed time horizons (same units as `times`) at which to evaluate performance.
+        A list of time points for performance evaluation.
     heuristics_sets : list[Dict], optional
-        List of heuristic dictionaries controlling censoring/competing-event handling.
-        Default is a single heuristic set:
-        ``[{"censoring_heuristic": "adjusted",
-            "competing_heuristic": "adjusted_as_negative"}]``
+        Specifies how to handle censored data and competing events.
     stratified_by : Sequence[str], optional
-        Stratification variables used to create combinations/breaks. Defaults to
-        ``("probability_threshold",)``.
+        Variables for stratification. Defaults to ``("probability_threshold",)``.
     by : float, optional
-        Step width for probability-threshold breaks (used to create the grid of
-        cutoffs). Defaults to ``0.01``.
+        The step size for probability thresholds. Defaults to ``0.01``.
+    risk_set_scope : Sequence[str], optional
+        Defines the scope for risk set calculations. Defaults to
+        ``["pooled_by_cutoff", "within_stratum"]``.
 
     Returns
     -------
     pl.DataFrame
-        A Polars DataFrame containing probability-binned, time-dependent
-        classification data (one row per combination of dataset / bin /
-        time horizon / heuristic / strata). This is the basis for histograms,
-        calibration diagnostics, and time-dependent performance curves.
+        A Polars DataFrame with binned, time-dependent data. Each row
+        represents a unique combination of dataset, bin, time horizon,
+        heuristic, and other strata.
     """
     breaks = create_breaks_values(None, "probability_threshold", by)
 
diff --git a/src/rtichoke/utility/decision.py b/src/rtichoke/utility/decision.py
index 57fdf53..12a0592 100644
--- a/src/rtichoke/utility/decision.py
+++ b/src/rtichoke/utility/decision.py
@@ -45,49 +45,42 @@ def create_decision_curve(
         "#585123",
     ],
 ) -> Figure:
-    """Create Decision Curve.
+    """Creates a Decision Curve.
+
+    Decision Curve Analysis is a method for evaluating and comparing prediction
+    models that incorporates the clinical consequences of a decision. The curve
+    plots the net benefit of a model against the probability threshold used to
+    determine positive cases. This helps to assess the real-world utility of a
+    model.
 
     Parameters
     ----------
     probs : Dict[str, np.ndarray]
-        Dictionary mapping a label or group name to an array of predicted
-        probabilities for the positive class.
+        A dictionary mapping model or dataset names to 1-D numpy arrays of
+        predicted probabilities.
     reals : Union[np.ndarray, Dict[str, np.ndarray]]
-        Ground-truth binary labels (0/1) as a single array, or a dictionary
-        mapping the same label/group keys used in ``probs`` to arrays of
-        ground-truth labels.
+        The true binary labels (0 or 1).
     decision_type : str, optional
-        Either ``"conventional"`` (decision curve) or another value that
-        implies the "interventions avoided" variant. Default is
+        Type of decision curve. ``"conventional"`` for a standard decision curve
+        or another value for the "interventions avoided" variant. Defaults to
         ``"conventional"``.
     min_p_threshold : float, optional
-        Minimum probability threshold to include in the curve. Default is 0.
+        The minimum probability threshold to plot. Defaults to 0.
     max_p_threshold : float, optional
-        Maximum probability threshold to include in the curve. Default is 1.
+        The maximum probability threshold to plot. Defaults to 1.
     by : float, optional
-        Resolution for probability thresholds when computing the curve
-        (step size). Default is 0.01.
+        The step size for the probability thresholds. Defaults to 0.01.
     stratified_by : Sequence[str], optional
-        Sequence of column names to stratify the performance data by.
-        Default is ["probability_threshold"].
+        Variables for stratification. Defaults to ``["probability_threshold"]``.
     size : int, optional
-        Plot size in pixels (width and height). Default is 600.
+        The width and height of the plot in pixels. Defaults to 600.
     color_values : List[str], optional
-        List of color hex strings to use for the plotted lines. If not
-        provided, a default palette is used.
+        A list of hex color strings for the plot lines.
 
     Returns
     -------
     Figure
-        A Plotly ``Figure`` containing the Decision curve.
-
-    Notes
-    -----
-    The function selects the appropriate curve name based on
-    ``decision_type`` and delegates computation and plotting to
-    ``_create_rtichoke_plotly_curve_binary``. Additional keyword arguments
-    (like ``min_p_threshold`` and ``max_p_threshold``) are forwarded to
-    the helper.
+        A Plotly ``Figure`` object representing the Decision Curve.
     """
     if decision_type == "conventional":
         curve = "decision"
@@ -116,39 +109,32 @@ def plot_decision_curve(
     stratified_by: Sequence[str] = ["probability_threshold"],
     size: int = 600,
 ) -> Figure:
-    """Plot Decision Curve from performance data.
+    """Plots a Decision Curve from pre-computed performance data.
+
+    This function is useful for plotting a Decision Curve directly from a
+    DataFrame that already contains the necessary performance metrics.
 
     Parameters
     ----------
     performance_data : pl.DataFrame
-        A Polars DataFrame containing performance metrics for the Decision
-        curve. Expected columns include (but may not be limited to)
-        ``probability_threshold`` and decision-curve metrics, plus any
-        stratification columns.
-    decision_type : str
-        ``"conventional"`` for decision curves, otherwise the
-        "interventions avoided" variant will be used.
+        A Polars DataFrame with performance metrics, including net benefit and
+        probability thresholds.
+    decision_type : str, optional
+        Type of decision curve to plot. Defaults to ``"conventional"``.
     min_p_threshold : float, optional
-        Minimum probability threshold to include in the curve. Default is 0.
+        The minimum probability threshold to plot. Defaults to 0.
     max_p_threshold : float, optional
-        Maximum probability threshold to include in the curve. Default is 1.
+        The maximum probability threshold to plot. Defaults to 1.
     stratified_by : Sequence[str], optional
-        Sequence of column names used for stratification in the
-        ``performance_data``. Default is ["probability_threshold"].
+        The columns in `performance_data` used for stratification. Defaults to
+        ``["probability_threshold"]``.
     size : int, optional
-        Plot size in pixels (width and height). Default is 600.
+        The width and height of the plot in pixels. Defaults to 600.
 
     Returns
     -------
     Figure
-        A Plotly ``Figure`` containing the Decision plot.
-
-    Notes
-    -----
-    This function wraps ``_plot_rtichoke_curve_binary`` to produce a
-    ready-to-render Plotly figure from precomputed performance data.
-    Additional keyword arguments (``min_p_threshold``, ``max_p_threshold``)
-    are forwarded to the helper.
+        A Plotly ``Figure`` object representing the Decision Curve.
     """
     if decision_type == "conventional":
         curve = "decision"
@@ -205,7 +191,43 @@ def create_decision_curve_times(
         "#585123",
     ],
 ) -> Figure:
-    """Create time-dependent Decision Curve."""
+    """Creates a time-dependent Decision Curve.
+
+    Generates a Decision Curve for time-to-event models, which is evaluated at
+    specified time horizons and handles censored data and competing risks.
+
+    Parameters
+    ----------
+    probs : Dict[str, np.ndarray]
+        A dictionary of predicted probabilities.
+    reals : Union[np.ndarray, Dict[str, np.ndarray]]
+        The true event statuses.
+    times : Union[np.ndarray, Dict[str, np.ndarray]]
+        The event or censoring times.
+    fixed_time_horizons : list[float]
+        A list of time points for performance evaluation.
+    decision_type : str, optional
+        Type of decision curve to plot. Defaults to ``"conventional"``.
+    heuristics_sets : list[Dict], optional
+        Specifies how to handle censored data and competing events.
+    min_p_threshold : float, optional
+        The minimum probability threshold to plot. Defaults to 0.
+    max_p_threshold : float, optional
+        The maximum probability threshold to plot. Defaults to 1.
+    by : float, optional
+        The step size for the probability thresholds. Defaults to 0.01.
+    stratified_by : Sequence[str], optional
+        Variables for stratification. Defaults to ``["probability_threshold"]``.
+    size : int, optional
+        The width and height of the plot in pixels. Defaults to 600.
+    color_values : List[str], optional
+        A list of hex color strings for the plot lines.
+
+    Returns
+    -------
+    Figure
+        A Plotly ``Figure`` object for the time-dependent Decision Curve.
+    """
 
     if decision_type == "conventional":
         curve = "decision"