diff --git a/upliftml/evaluation.py b/upliftml/evaluation.py index 28a09af..21cfaae 100644 --- a/upliftml/evaluation.py +++ b/upliftml/evaluation.py @@ -7,6 +7,7 @@ import pyspark.sql.functions as F import pyspark.sql.types as T import seaborn as sns # type: ignore +from scipy import stats from pyspark.ml.feature import QuantileDiscretizer from pyspark.sql import Column, DataFrame, Window from sklearn.metrics import auc # type: ignore @@ -192,10 +193,12 @@ def _compute_ci( bucket_colname: Optional[str] = None, relevant_cols: Optional[List[str]] = None, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> DataFrame: - """Computes confidence intervals from bootstrapped samples of the dataset. For more details on this procedure, - see https://ocw.mit.edu/courses/mathematics/18-05-introduction-to-probability-and-statistics-spring-2014/ - readings/MIT18_05S14_Reading24.pdf. + """Computes correct confidence intervals from bootstrapped samples of the dataset. + If `use_std_error` is True, the standard error is computed, and confidence intervals are + derived using standard-normal critical values. Otherwise, basic bootstrap (Reverse + Percentile Interval) is used https://arxiv.org/abs/1411.5279. Args: df (pyspark.sql.DataFrame): a Spark dataframe @@ -205,6 +208,9 @@ def _compute_ci( to calculate confidence intervals for ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: (pyspark.sql.DataFrame): a Spark dataframe containing point estimates and confidence intervals @@ -233,27 +239,42 @@ def _compute_ci( for col in relevant_cols: deltas = deltas.withColumn(f"{col}_delta", F.col(col) - F.col(f"{col}_pe")) - # get quantiles for the diffs - agg_exprs = [ - F.expr(f"percentile_approx({col}_delta, {ci_quantiles[1]})").alias(f"{col}_delta_lower") - for col in relevant_cols - ] - agg_exprs += [ - F.expr(f"percentile_approx({col}_delta, {ci_quantiles[0]})").alias(f"{col}_delta_upper") - for col in relevant_cols - ] - agg_exprs += [F.first(F.col(f"{col}_pe")).alias(col) for col in relevant_cols] + # add point estimate and standard error columns + agg_exprs = [F.first(F.col(f"{col}_pe")).alias(col) for col in relevant_cols] agg_exprs += [F.sqrt(F.mean(F.pow(F.col(f"{col}_delta"), 2))).alias(f"{col}_std_error") for col in relevant_cols] - deltas_ci = deltas.groupby(group_cols).agg(*agg_exprs) + if use_std_error: + crit_lower, crit_upper = stats.norm.ppf(ci_quantiles) + agg_exprs += [ + F.expr(f"{col}_pe - {crit_lower}*{col}_std_error").alias(f"{col}_lower") + for col in relevant_cols + ] + agg_exprs += [ + F.expr(f"{col}_pe + {crit_upper}*{col}_std_error").alias(f"{col}_upper") + for col in relevant_cols + ] - # calculate upper and lower bounds of the estimates, based on the quantile values for the diffs - for col in relevant_cols: - deltas_ci = ( - deltas_ci.withColumn(f"{col}_lower", F.col(col) - F.col(f"{col}_delta_lower")) - .withColumn(f"{col}_upper", F.col(col) - F.col(f"{col}_delta_upper")) - .drop(f"{col}_delta_lower", f"{col}_delta_upper") - ) + deltas_ci = deltas.groupby(group_cols).agg(*agg_exprs) + else: + # get quantiles for the diffs + agg_exprs += [ + F.expr(f"percentile_approx({col}_delta, {ci_quantiles[1]})").alias(f"{col}_delta_lower") + for col in relevant_cols + ] + agg_exprs += [ + F.expr(f"percentile_approx({col}_delta, {ci_quantiles[0]})").alias(f"{col}_delta_upper") + for col in relevant_cols + ] + + deltas_ci = deltas.groupby(group_cols).agg(*agg_exprs) + + # calculate upper and lower bounds of the estimates, based on the quantile values for the diffs + for col in relevant_cols: + deltas_ci = ( + deltas_ci.withColumn(f"{col}_lower", F.col(col) - F.col(f"{col}_delta_lower")) + .withColumn(f"{col}_upper", F.col(col) - F.col(f"{col}_delta_upper")) + .drop(f"{col}_delta_lower", f"{col}_delta_upper") + ) # add other relevant columns from the original sample if bucket_colname is None: @@ -387,6 +408,7 @@ def estimate_ate( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> Dict: """Estimates the average treatment effect in a Spark DataFrame. @@ -401,6 +423,9 @@ def estimate_ate( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: Dict with estimates of the target rate in the control group, the target rate in the treatment group, and the ATE, @@ -424,7 +449,7 @@ def estimate_ate( relevant_cols = ["target_rate_control", "target_rate_treated", "ate"] if bootstrap: # calculate confidence intervals - df_counts = _compute_ci(df_counts, relevant_cols=relevant_cols, ci_quantiles=ci_quantiles) + df_counts = _compute_ci(df_counts, relevant_cols=relevant_cols, ci_quantiles=ci_quantiles, use_std_error=use_std_error) # ensure that the order of the returned values is right select_cols = [] @@ -444,6 +469,7 @@ def estimate_roi( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> Dict: """Estimates the return on investment in a Spark DataFrame. @@ -455,6 +481,9 @@ def estimate_roi( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: Dict with estimate of the ROI, with or without lower and upper bounds. @@ -474,7 +503,7 @@ def estimate_roi( relevant_cols = ["roi"] if bootstrap: # calculate confidence intervals - df_counts = _compute_ci(df_counts, relevant_cols=relevant_cols, ci_quantiles=ci_quantiles) + df_counts = _compute_ci(df_counts, relevant_cols=relevant_cols, ci_quantiles=ci_quantiles, use_std_error=use_std_error) # ensure that the order of the returned values is right select_cols = [] @@ -497,6 +526,7 @@ def estimate_iroi( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> Dict: """Estimates the incremental return on investment in a Spark DataFrame. @@ -513,6 +543,9 @@ def estimate_iroi( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: Dict of estimates of the iROI, incremental benefit, and incremental cost, all with or without lower and upper bounds depending on whether bootstrapping is performed. @@ -536,7 +569,7 @@ def estimate_iroi( relevant_cols = ["iroi", "incremental_benefit", "incremental_cost"] if bootstrap: # calculate confidence intervals - df_counts = _compute_ci(df_counts, relevant_cols=relevant_cols, ci_quantiles=ci_quantiles) + df_counts = _compute_ci(df_counts, relevant_cols=relevant_cols, ci_quantiles=ci_quantiles, use_std_error=use_std_error) # ensure that the order of the returned values is right select_cols = [] @@ -556,6 +589,7 @@ def estimate_target_rate_per_bucket( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> pd.DataFrame: """Estimates conditional average treatment effects per bucket in a Spark DataFrame. @@ -568,6 +602,9 @@ def estimate_target_rate_per_bucket( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: (pandas.DataFrame): a dataframe containing CATE estimates (with or without confidence intervals), @@ -588,7 +625,7 @@ def estimate_target_rate_per_bucket( if bootstrap: df_counts = _compute_ci( - df_counts, bucket_colname=bucket_colname, relevant_cols=["target_rate"], ci_quantiles=ci_quantiles + df_counts, bucket_colname=bucket_colname, relevant_cols=["target_rate"], ci_quantiles=ci_quantiles, use_std_error=use_std_error ) select_cols = [F.col(bucket_colname), F.col("count"), F.col("fraction")] @@ -605,6 +642,7 @@ def estimate_target_rate_per_quantile( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> pd.DataFrame: """Divides the data into buckets based on model score quantiles and estimates average treatment effects per bucket. @@ -621,6 +659,9 @@ def estimate_target_rate_per_quantile( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: (pandas.DataFrame): a dataframe containing CATE estimates (with or without confidence intervals), @@ -641,6 +682,7 @@ def estimate_target_rate_per_quantile( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, ) @@ -694,6 +736,7 @@ def estimate_and_plot_target_rate_per_quantile( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, sort_x: bool = True, ax: Any = None, ) -> Tuple: @@ -712,6 +755,9 @@ def estimate_and_plot_target_rate_per_quantile( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. sort_x (bool, optional): if True, x-axis will be sorted from highest metric value to lowest ax (matplotlib.axes._subplots.AxesSubplot, optional): if specified, the plot will be plotted on this ax. Useful when creating a figure with subplots. @@ -733,6 +779,7 @@ def estimate_and_plot_target_rate_per_quantile( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, ) ax = plot_metric_per_bucket(quantile_df, y="target_rate", bootstrap=bootstrap, sort_x=sort_x, ax=ax) @@ -747,6 +794,7 @@ def estimate_and_plot_target_rate_per_bucket( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, sort_x: bool = True, ax: Any = None, ) -> Tuple: @@ -762,6 +810,9 @@ def estimate_and_plot_target_rate_per_bucket( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. sort_x (bool, optional): if True, x-axis will be sorted from highest metric value to lowest ax (matplotlib.axes._subplots.AxesSubplot, optional): if specified, the plot will be plotted on this ax. Useful when creating a figure with subplots. @@ -781,6 +832,7 @@ def estimate_and_plot_target_rate_per_bucket( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, ) ax = plot_metric_per_bucket(quantile_df, y="target_rate", bootstrap=bootstrap, sort_x=sort_x, ax=ax) @@ -798,6 +850,7 @@ def estimate_cate_per_bucket( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> pd.DataFrame: """Estimates the conditional average treatment effects per bucket in a Spark DataFrame. @@ -814,6 +867,9 @@ def estimate_cate_per_bucket( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: (pandas.DataFrame): a dataframe containing CATE estimates (with or without confidence intervals), @@ -837,7 +893,7 @@ def estimate_cate_per_bucket( if bootstrap: df_counts = _compute_ci( - df_counts, bucket_colname=bucket_colname, relevant_cols=["ate"], ci_quantiles=ci_quantiles + df_counts, bucket_colname=bucket_colname, relevant_cols=["ate"], ci_quantiles=ci_quantiles, use_std_error=use_std_error ) select_cols = [F.col(bucket_colname), F.col("count"), F.col("fraction")] @@ -857,6 +913,7 @@ def estimate_cate_per_quantile( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> pd.DataFrame: """Divides the data into buckets based on model score quantiles and estimates average treatment effects per bucket. @@ -877,6 +934,9 @@ def estimate_cate_per_quantile( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: (pandas.DataFrame): a dataframe containing CATE estimates (with or without confidence intervals), @@ -900,6 +960,7 @@ def estimate_cate_per_quantile( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, ) @@ -915,6 +976,7 @@ def estimate_and_plot_cate_per_quantile( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, sort_x: bool = True, ax: Any = None, ) -> Tuple: @@ -937,6 +999,9 @@ def estimate_and_plot_cate_per_quantile( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. sort_x (bool, optional): if True, x-axis will be sorted from highest metric value to lowest ax (matplotlib.axes._subplots.AxesSubplot, optional): if specified, the plot will be plotted on this ax. Useful when creating a figure with subplots. @@ -961,6 +1026,7 @@ def estimate_and_plot_cate_per_quantile( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, ) ax = plot_metric_per_bucket(quantile_df, bootstrap=bootstrap, sort_x=sort_x, ax=ax) @@ -978,6 +1044,7 @@ def estimate_and_plot_cate_per_bucket( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, sort_x: bool = True, ax: Any = None, ) -> Tuple: @@ -997,6 +1064,9 @@ def estimate_and_plot_cate_per_bucket( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. sort_x (bool, optional): if True, x-axis will be sorted from highest metric value to lowest ax (matplotlib.axes._subplots.AxesSubplot, optional): if specified, the plot will be plotted on this ax. Useful when creating a figure with subplots. @@ -1019,6 +1089,7 @@ def estimate_and_plot_cate_per_bucket( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, ) ax = plot_metric_per_bucket(quantile_df, bootstrap=bootstrap, sort_x=sort_x, ax=ax) @@ -1037,6 +1108,7 @@ def estimate_iroi_per_bucket( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> pd.DataFrame: """Estimates incremental ROI per bucket in a Spark DataFrame. @@ -1054,6 +1126,9 @@ def estimate_iroi_per_bucket( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: (pandas.DataFrame): a dataframe containing iROI estimates (with or without confidence intervals), @@ -1078,7 +1153,7 @@ def estimate_iroi_per_bucket( if bootstrap: df_counts = _compute_ci( - df_counts, bucket_colname=bucket_colname, relevant_cols=["iroi"], ci_quantiles=ci_quantiles + df_counts, bucket_colname=bucket_colname, relevant_cols=["iroi"], ci_quantiles=ci_quantiles, use_std_error=use_std_error ) select_cols = [F.col(bucket_colname), F.col("count"), F.col("fraction")] @@ -1099,6 +1174,7 @@ def estimate_iroi_per_quantile( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> pd.DataFrame: """Divides the data into buckets based on model score quantiles and estimates iROI per bucket. @@ -1119,6 +1195,9 @@ def estimate_iroi_per_quantile( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: (pandas.DataFrame): a dataframe containing iROI estimates (with or without confidence intervals), @@ -1143,6 +1222,7 @@ def estimate_iroi_per_quantile( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, ) @@ -1157,6 +1237,7 @@ def estimate_and_plot_iroi_per_bucket( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, sort_x: bool = True, ax: Any = None, ) -> Tuple: @@ -1176,6 +1257,9 @@ def estimate_and_plot_iroi_per_bucket( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. sort_x (bool, optional): if True, x-axis will be sorted from highest metric value to lowest ax (matplotlib.axes._subplots.AxesSubplot, optional): if specified, the plot will be plotted on this ax. Useful when creating a figure with subplots. @@ -1199,6 +1283,7 @@ def estimate_and_plot_iroi_per_bucket( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, ) ax = plot_metric_per_bucket(quantile_df, x="bucket", y="iroi", bootstrap=bootstrap, sort_x=sort_x, ax=ax) @@ -1219,6 +1304,7 @@ def estimate_and_plot_iroi_per_quantile( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, sort_x: bool = True, ax: Any = None, ) -> Tuple: @@ -1242,6 +1328,9 @@ def estimate_and_plot_iroi_per_quantile( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. sort_x (bool, optional): if True, x-axis will be sorted from highest metric value to lowest ax (matplotlib.axes._subplots.AxesSubplot, optional): if specified, the plot will be plotted on this ax. Useful when creating a figure with subplots. @@ -1266,6 +1355,7 @@ def estimate_and_plot_iroi_per_quantile( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, add_labels=add_labels, ) @@ -1285,6 +1375,7 @@ def estimate_cate_lift( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> pd.DataFrame: """Divides the data into buckets based on model score quantiles and cumulatively estimates CATE lift (with or without confidence intervals). @@ -1303,6 +1394,9 @@ def estimate_cate_lift( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: (pandas.DataFrame): a dataframe containing the CATE lift estimates (with or without confidence intervals), @@ -1333,7 +1427,7 @@ def estimate_cate_lift( if bootstrap: df_counts = _compute_ci( - df_counts, bucket_colname=bucket_colname, relevant_cols=["ate"], ci_quantiles=ci_quantiles + df_counts, bucket_colname=bucket_colname, relevant_cols=["ate"], ci_quantiles=ci_quantiles, use_std_error=use_std_error ) select_cols = [F.col(bucket_colname), F.col("count"), F.col("fraction")] @@ -1396,6 +1490,7 @@ def estimate_and_plot_cate_lift( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ax: Any = None, ) -> Tuple: """Divides the data into buckets based on model score quantiles, cumulatively estimates CATE lift @@ -1416,6 +1511,9 @@ def estimate_and_plot_cate_lift( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. ax (matplotlib.axes._subplots.AxesSubplot, optional): if specified, the plot will be plotted on this ax. Useful when creating a figure with subplots. Returns: @@ -1438,6 +1536,7 @@ def estimate_and_plot_cate_lift( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, ) ax = plot_cate_lift(df_lift, label=label, bootstrap=bootstrap, ax=ax) @@ -1456,6 +1555,7 @@ def estimate_qini( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> pd.DataFrame: """Divides the data into buckets based on model score quantiles and estimates Qini values (with or without confidence intervals). @@ -1474,6 +1574,9 @@ def estimate_qini( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: (pandas.DataFrame): a dataframe containing Qini estimates (with or without confidence intervals), @@ -1506,7 +1609,7 @@ def estimate_qini( if bootstrap: df_counts = _compute_ci( - df_counts, bucket_colname=bucket_colname, relevant_cols=["ate"], ci_quantiles=ci_quantiles + df_counts, bucket_colname=bucket_colname, relevant_cols=["ate"], ci_quantiles=ci_quantiles, use_std_error=use_std_error ) select_cols = [bucket_colname, "count", "fraction", "threshold"] @@ -1585,6 +1688,7 @@ def estimate_and_plot_qini( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ax: Any = None, ) -> Tuple: """Divides the data into buckets based on model score quantiles, estimates the Qini values @@ -1606,6 +1710,9 @@ def estimate_and_plot_qini( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. ax (matplotlib.axes._subplots.AxesSubplot, optional): if specified, the plot will be plotted on this ax. Useful when creating a figure with subplots. Returns: @@ -1628,6 +1735,7 @@ def estimate_and_plot_qini( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, ) qini_score = compute_qini_coefficient(df_qini) @@ -1696,6 +1804,7 @@ def estimate_cum_iroi( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ) -> pd.DataFrame: """Divides the data into buckets based on model score quantiles and estimates cumulative iROI (with or without confidence intervals). @@ -1715,6 +1824,9 @@ def estimate_cum_iroi( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. Returns: (pandas.DataFrame): a dataframe containing cumulative iROI estimates (with or without confidence intervals), @@ -1746,7 +1858,7 @@ def estimate_cum_iroi( if bootstrap: df_counts = _compute_ci( - df_counts, bucket_colname=bucket_colname, relevant_cols=["iroi"], ci_quantiles=ci_quantiles + df_counts, bucket_colname=bucket_colname, relevant_cols=["iroi"], ci_quantiles=ci_quantiles, use_std_error=use_std_error ) select_cols = [bucket_colname, "count", "fraction", "threshold"] @@ -1820,6 +1932,7 @@ def estimate_and_plot_cum_iroi( bootstrap: bool = False, n_bootstraps: int = 100, ci_quantiles: Optional[List[float]] = None, + use_std_error: bool = True, ax: Any = None, ) -> Tuple: """Divides the data into buckets based on model score quantiles, estimates cumulative iROI @@ -1842,6 +1955,9 @@ def estimate_and_plot_cum_iroi( n_bootstraps (int, optional): the number of bootstraps to perform. Only has an effect if bootstrap=True ci_quantiles (list of float, optional): the lower and upper confidence bounds. Only has an effect if bootstrap=True + use_std_error (bool, optional): if True (default), bootstrapped samples are used to calculate + the standard deviation of metrics and Normal(0, 1) critical values to derive the + confidence interval. ax (matplotlib.axes._subplots.AxesSubplot, optional): if specified, the plot will be plotted on this ax. Useful when creating a figure with subplots. Returns: @@ -1865,6 +1981,7 @@ def estimate_and_plot_cum_iroi( bootstrap=bootstrap, n_bootstraps=n_bootstraps, ci_quantiles=ci_quantiles, + use_std_error=use_std_error, ) ax = plot_cum_iroi(df_iroi, label=label, plot_overall=plot_overall, bootstrap=bootstrap, ax=ax)