-
Notifications
You must be signed in to change notification settings - Fork 2
Mitigate judge's order bias with swap mode #4
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -29,9 +29,12 @@ class CliArgs: | |
|
|
||
| n_instructions: int | None = None | ||
| provide_explanation: bool = False | ||
| swap_mode: bool = False | ||
| ignore_cache: bool = False | ||
| use_tqdm: bool = False | ||
|
|
||
| result_folder: str = "results" | ||
|
|
||
| @classmethod | ||
| def parse_args(cls): | ||
| parser = argparse.ArgumentParser( | ||
|
|
@@ -69,6 +72,15 @@ def parse_args(cls): | |
| help="If specified, judge will provide explanation before making a judgement. Does not necessarily improve" | ||
| "the accuracy of the judge but enables some result interpretation.", | ||
| ) | ||
| parser.add_argument( | ||
| "--swap_mode", | ||
| type=str, | ||
| choices=["fixed", "both"], | ||
| default="fixed", | ||
| help="Model comparison order mode. 'fixed': always use model order A-B. 'both': correct for model order " | ||
| "bias by evaluating each instruction twice, once as A-B and once as B-A, and average. This helps account " | ||
| "for judge position bias. Default is 'fixed'.", | ||
| ) | ||
| parser.add_argument( | ||
| "--ignore_cache", | ||
| action="store_true", | ||
|
|
@@ -79,7 +91,14 @@ def parse_args(cls): | |
| action="store_true", | ||
| help="If specified, use tqdm, does not work with all model providers, vLLM in particular.", | ||
| ) | ||
|
|
||
| parser.add_argument( | ||
| "--result_folder", | ||
| type=str, | ||
| required=False, | ||
| default="results", | ||
| help="The folder to save the results. Defaults to `results`. Evaluation results will be saved in" | ||
| " `[result_folder]/[evaluation_name]`.", | ||
| ) | ||
| args = parser.parse_args() | ||
|
|
||
| return cls( | ||
|
|
@@ -89,6 +108,7 @@ def parse_args(cls): | |
| judge_model=args.judge_model, | ||
| n_instructions=args.n_instructions, | ||
| provide_explanation=args.provide_explanation, | ||
| swap_mode=args.swap_mode, | ||
| ignore_cache=args.ignore_cache, | ||
| use_tqdm=args.use_tqdm, | ||
| ) | ||
|
|
@@ -216,11 +236,27 @@ def main(args: CliArgs): | |
| use_tqdm=args.use_tqdm, | ||
| ) | ||
|
|
||
| name = f"{args.dataset}-{args.model_A}-{args.model_B}-{args.judge_model}".replace( | ||
| if args.swap_mode: | ||
| print("Correction for judge bias towards a certain model position is set.") | ||
| print(f"Evaluating completions with models reversed with judge {args.judge_model}.") | ||
| annotations_reversed = annotate_battles( | ||
| judge_chat_model=judge_chat_model, | ||
| instructions=instructions.head(n_instructions).tolist(), | ||
| completions_A=completions_B.head(n_instructions).tolist(), | ||
| completions_B=completions_A.head(n_instructions).tolist(), | ||
| provide_explanation=args.provide_explanation, | ||
| system_prompt=system_prompt, | ||
| max_len=200, | ||
| use_tqdm=args.use_tqdm, | ||
| ) | ||
|
|
||
| name = f"{args.dataset}-{args.model_A}-{args.model_B}-{args.judge_model}" | ||
| name += f"-{args.swap_mode}" | ||
| name = name.replace( | ||
| "/", "_" | ||
| ) | ||
|
|
||
| res_folder = Path("results") / name | ||
| res_folder = Path(args.result_folder) / name | ||
| res_folder.mkdir(parents=True, exist_ok=True) | ||
|
|
||
| # save argument for results analysis | ||
|
|
@@ -233,6 +269,15 @@ def main(args: CliArgs): | |
| df["model_A"] = args.model_A | ||
| df["model_B"] = args.model_B | ||
| df["judge"] = args.judge_model | ||
|
|
||
| if args.swap_mode: | ||
| df_reversed = pd.DataFrame(annotations_reversed) | ||
| df_reversed["instruction_index"] = instructions.head(n_instructions).index.tolist() | ||
| df_reversed["model_A"] = args.model_B | ||
| df_reversed["model_B"] = args.model_A | ||
| df_reversed["judge"] = args.judge_model | ||
| df = pd.concat([df, df_reversed]) | ||
|
|
||
| df.to_csv(res_folder / f"{name}-annotations.csv", index=False) | ||
|
|
||
| # compute preferences between A and B | ||
|
|
@@ -244,6 +289,15 @@ def main(args: CliArgs): | |
| ] | ||
| ) | ||
|
|
||
| if args.swap_mode: | ||
| prefs_reversed = pd.Series( | ||
| [ | ||
| score_parser.parse_model_raw(annotation.judge_completion) | ||
| for annotation in annotations_reversed | ||
| ] | ||
| ) | ||
| prefs = (prefs + (1 - prefs_reversed)) / 2.0 | ||
|
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Does it work in case of nans? Seems like return
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, it does: >>> pd.Series([1, 2, 3, None]) + pd.Series([1, 2, None, 4])
0 2.0
1 4.0
2 NaN
3 NaN
dtype: float64Nonetheless, I think your suggestion is, generally, cleaner. How about the following? prefs = pd.concat([prefs, (1 - prefs_reversed)]).reset_index(drop=True)
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. that sounds good to me |
||
|
|
||
| # compute and report statistics | ||
| num_wins = sum(prefs < 0.5) | ||
| num_losses = sum(prefs > 0.5) | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
At this point args.swap_mode is a string but we map it to a boolean, we should probably rather test
args.swap_mode == "fixed"rather?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yes, you are right. This got lost in the change from
correct_order_bias(bool) toswap_mode(str).