From 47bcfb2c5fe45d2936dcc7a730d45413e4428738 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 13 Jul 2023 14:42:14 +0100 Subject: [PATCH 01/20] added cli arg --- elk/evaluation/evaluate.py | 2 +- elk/run.py | 11 +++++++++-- elk/training/train.py | 1 + 3 files changed, 11 insertions(+), 3 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index d6054e33..8462cc00 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -30,7 +30,7 @@ def execute(self, highlight_color: Color = "cyan"): @torch.inference_mode() def apply_to_layer( - self, layer: int, devices: list[str], world_size: int + self, layer: int, devices: list[str], world_size: int, probe_per_prompt: bool ) -> dict[str, pd.DataFrame]: """Evaluate a single reporter on a single layer.""" device = self.get_device(devices, world_size) diff --git a/elk/run.py b/elk/run.py index fb8903cc..1659e392 100644 --- a/elk/run.py +++ b/elk/run.py @@ -46,6 +46,10 @@ class Run(ABC, Serializable): prompt_indices: tuple[int, ...] = () """The indices of the prompt templates to use. If empty, all prompts are used.""" + probe_per_prompt: bool = False + """If true, a probe is trained per prompt template. Otherwise, a single probe is + trained for all prompt templates.""" + concatenated_layer_offset: int = 0 debug: bool = False min_gpu_mem: int | None = None # in bytes @@ -99,13 +103,16 @@ def execute( devices = select_usable_devices(self.num_gpus, min_memory=self.min_gpu_mem) num_devices = len(devices) func: Callable[[int], dict[str, pd.DataFrame]] = partial( - self.apply_to_layer, devices=devices, world_size=num_devices + self.apply_to_layer, + devices=devices, + world_size=num_devices, + probe_per_prompt=self.probe_per_prompt, ) self.apply_to_layers(func=func, num_devices=num_devices) @abstractmethod def apply_to_layer( - self, layer: int, devices: list[str], world_size: int + self, layer: int, devices: list[str], world_size: int, probe_per_prompt: bool ) -> dict[str, pd.DataFrame]: """Train or eval a reporter on a single layer.""" diff --git a/elk/training/train.py b/elk/training/train.py index 8392f2d9..2292309b 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -53,6 +53,7 @@ def apply_to_layer( layer: int, devices: list[str], world_size: int, + probe_per_prompt: bool, ) -> dict[str, pd.DataFrame]: """Train a single reporter on a single layer.""" From a50fe5772adc9e59f66e066a7df22f42863335d2 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 13 Jul 2023 15:23:58 +0100 Subject: [PATCH 02/20] refactor reporter training --- elk/training/train.py | 88 ++++++++++++++++++++++++++++++++----------- 1 file changed, 65 insertions(+), 23 deletions(-) diff --git a/elk/training/train.py b/elk/training/train.py index 2292309b..3eb018bc 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -1,7 +1,7 @@ """Main training loop.""" from collections import defaultdict -from dataclasses import dataclass +from dataclasses import dataclass, replace from pathlib import Path from typing import Literal @@ -11,15 +11,22 @@ from simple_parsing import subgroups from simple_parsing.helpers.serialization import save +from ..evaluation import Eval from ..metrics import evaluate_preds, to_one_hot from ..run import Run from ..training.supervised import train_supervised from ..utils.typing import assert_type from .ccs_reporter import CcsConfig, CcsReporter -from .common import FitterConfig +from .common import FitterConfig, Reporter from .eigen_reporter import EigenFitter, EigenFitterConfig +@dataclass +class ReporterTrainResult: + reporter: CcsReporter | Reporter + train_loss: float | None + + @dataclass class Elicit(Run): """Full specification of a reporter training run.""" @@ -48,22 +55,31 @@ def create_models_dir(self, out_dir: Path): return reporter_dir, lr_dir - def apply_to_layer( - self, - layer: int, - devices: list[str], - world_size: int, - probe_per_prompt: bool, - ) -> dict[str, pd.DataFrame]: - """Train a single reporter on a single layer.""" - - self.make_reproducible(seed=self.net.seed + layer) - device = self.get_device(devices, world_size) - + def make_eval(self, model, eval_dataset): + assert self.out_dir is not None + return Eval( + data=replace( + self.data, + model=model, + datasets=(eval_dataset,), + ), + source=self.out_dir, + out_dir=self.out_dir / "transfer" / eval_dataset, + num_gpus=self.num_gpus, + min_gpu_mem=self.min_gpu_mem, + skip_supervised=self.supervised == "none", + prompt_indices=self.prompt_indices, + concatenated_layer_offset=self.concatenated_layer_offset, + # datasets isn't needed because it's immediately overwritten + debug=self.debug, + disable_cache=self.disable_cache, + ) + + # Create a separate function to handle the reporter training. + def train_reporter(self, device, layer, out_dir) -> ReporterTrainResult: train_dict = self.prepare_data(device, layer, "train") - val_dict = self.prepare_data(device, layer, "val") - (first_train_h, train_gt, _), *rest = train_dict.values() + (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): raise ValueError("All datasets must have the same hidden state size") @@ -75,16 +91,12 @@ def apply_to_layer( if not all(other_h.shape[-2] == k for other_h, _, _ in rest): raise ValueError("All datasets must have the same number of classes") - reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) train_loss = None - if isinstance(self.net, CcsConfig): assert len(train_dict) == 1, "CCS only supports single-task training" - reporter = CcsReporter(self.net, d, device=device, num_variants=v) train_loss = reporter.fit(first_train_h) - (_, v, k, _) = first_train_h.shape reporter.platt_scale( to_one_hot(repeat(train_gt, "n -> (n v)", v=v), k).flatten(), rearrange(first_train_h, "n v k d -> (n v k) d"), @@ -116,20 +128,50 @@ def apply_to_layer( raise ValueError(f"Unknown reporter config type: {type(self.net)}") # Save reporter checkpoint to disk - torch.save(reporter, reporter_dir / f"layer_{layer}.pt") + torch.save(reporter, out_dir / f"layer_{layer}.pt") - # Fit supervised logistic regression model + return ReporterTrainResult(reporter, train_loss) + + def train_lr_model(self, train_dict, device, layer, out_dir): if self.supervised != "none": lr_models = train_supervised( train_dict, device=device, mode=self.supervised, ) - with open(lr_dir / f"layer_{layer}.pt", "wb") as file: + with open(out_dir / f"layer_{layer}.pt", "wb") as file: torch.save(lr_models, file) else: lr_models = [] + return lr_models + + def apply_to_layer( + self, + layer: int, + devices: list[str], + world_size: int, + probe_per_prompt: bool, + ) -> dict[str, pd.DataFrame]: + """Train a single reporter on a single layer.""" + + self.make_reproducible(seed=self.net.seed + layer) + device = self.get_device(devices, world_size) + + train_dict = self.prepare_data(device, layer, "train") + val_dict = self.prepare_data(device, layer, "val") + + (first_train_h, train_gt, _), *rest = train_dict.values() + (_, v, k, d) = first_train_h.shape + + reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) + + reporter_train_result = self.train_reporter(device, layer, reporter_dir) + reporter = reporter_train_result.reporter + train_loss = reporter_train_result.train_loss + + lr_models = self.train_lr_model(train_dict, device, layer, lr_dir) + row_bufs = defaultdict(list) for ds_name in val_dict: val_h, val_gt, val_lm_preds = val_dict[ds_name] From 52b1394232b2f23ab5867be1b6729bfd249ff306 Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 14 Jul 2023 14:19:15 +0100 Subject: [PATCH 03/20] WIP add multiprobe training --- elk/training/train.py | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) diff --git a/elk/training/train.py b/elk/training/train.py index 3eb018bc..5d2a6b50 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -76,9 +76,9 @@ def make_eval(self, model, eval_dataset): ) # Create a separate function to handle the reporter training. - def train_reporter(self, device, layer, out_dir) -> ReporterTrainResult: - train_dict = self.prepare_data(device, layer, "train") - + def train_and_save_reporter( + self, device, layer, out_dir, train_dict + ) -> ReporterTrainResult: (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): @@ -128,6 +128,7 @@ def train_reporter(self, device, layer, out_dir) -> ReporterTrainResult: raise ValueError(f"Unknown reporter config type: {type(self.net)}") # Save reporter checkpoint to disk + # TODO have to change this torch.save(reporter, out_dir / f"layer_{layer}.pt") return ReporterTrainResult(reporter, train_loss) @@ -166,13 +167,36 @@ def apply_to_layer( reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) - reporter_train_result = self.train_reporter(device, layer, reporter_dir) + probe_per_prompt = True + if probe_per_prompt: + train_dicts = [ + { + ds_name: ( + train_h[:, i : i + 1, ...], + train_gt, + lm_preds[:, i : i + 1, ...], + ) + } + for ds_name, (train_h, _, lm_preds) in train_dict.items() + for i in range(v) # v is number of variants + ] + + [ + self.train_and_save_reporter(device, layer, reporter_dir, train_dict) + for train_dict in train_dicts + ] + else: + reporter_train_result = self.train_and_save_reporter( + device, layer, reporter_dir, train_dict + ) + reporter = reporter_train_result.reporter train_loss = reporter_train_result.train_loss lr_models = self.train_lr_model(train_dict, device, layer, lr_dir) row_bufs = defaultdict(list) + for ds_name in val_dict: val_h, val_gt, val_lm_preds = val_dict[ds_name] train_h, train_gt, train_lm_preds = train_dict[ds_name] From 2420ae0fba1a4de1321b7ebd9c7c52c91f499537 Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 14 Jul 2023 16:38:15 +0100 Subject: [PATCH 04/20] multiprobe elicit works --- elk/run.py | 16 ++-- elk/training/train.py | 184 +++++++++++++++++++++++++++--------------- 2 files changed, 128 insertions(+), 72 deletions(-) diff --git a/elk/run.py b/elk/run.py index 1659e392..b444d80b 100644 --- a/elk/run.py +++ b/elk/run.py @@ -187,13 +187,19 @@ def apply_to_layers( df_buffers = defaultdict(list) try: - for df_dict in tqdm(mapper(func, layers), total=len(layers)): - for k, v in df_dict.items(): - df_buffers[k].append(v) + for df_dicts in tqdm(mapper(func, layers), total=len(layers)): + for df_dict in df_dicts: + for k, v in df_dict.items(): + df_buffers[k].append(v) finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): - df = pd.concat(dfs).sort_values(by=["layer", "ensembling"]) - df.round(4).to_csv(self.out_dir / f"{name}.csv", index=False) + sortby = ["layer", "ensembling"] + if "prompt_index" in dfs[0].columns: + sortby.append("prompt_index") + # TODO make the prompt index third col + df = pd.concat(dfs).sort_values(by=sortby) + out_path = self.out_dir / f"{name}.csv" + df.round(4).to_csv(out_path, index=False) if self.debug: save_debug_log(self.datasets, self.out_dir) diff --git a/elk/training/train.py b/elk/training/train.py index 5d2a6b50..2d85cc13 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -15,7 +15,6 @@ from ..metrics import evaluate_preds, to_one_hot from ..run import Run from ..training.supervised import train_supervised -from ..utils.typing import assert_type from .ccs_reporter import CcsConfig, CcsReporter from .common import FitterConfig, Reporter from .eigen_reporter import EigenFitter, EigenFitterConfig @@ -41,6 +40,81 @@ class Elicit(Run): cross-validation. Defaults to "single", which means to train a single classifier on the training data. "cv" means to use cross-validation.""" + def evaluate_and_save( + self, + train_loss, + reporter, + train_dict, + val_dict, + lr_models, + layer, + prompt_index=None, + ): + row_bufs = defaultdict(list) + for ds_name in val_dict: + val_h, val_gt, val_lm_preds = val_dict[ds_name] + train_h, train_gt, train_lm_preds = train_dict[ds_name] + meta = {"dataset": ds_name, "layer": layer} + + val_credences = reporter(val_h) + train_credences = reporter(train_h) + maybe_prompt_index = ( + {} if prompt_index is None else {"prompt_index": prompt_index} + ) + for mode in ("none", "partial", "full"): + row_bufs["eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_credences, mode).to_dict(), + "train_loss": train_loss, + **maybe_prompt_index, + } + ) + + row_bufs["train_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(train_gt, train_credences, mode).to_dict(), + "train_loss": train_loss, + **maybe_prompt_index, + } + ) + + if val_lm_preds is not None: + row_bufs["lm_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), + **maybe_prompt_index, + } + ) + + if train_lm_preds is not None: + row_bufs["train_lm_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), + **maybe_prompt_index, + } + ) + + for i, model in enumerate(lr_models): + row_bufs["lr_eval"].append( + { + **meta, + "ensembling": mode, + "inlp_iter": i, + **evaluate_preds(val_gt, model(val_h), mode).to_dict(), + **maybe_prompt_index, + } + ) + + return {k: pd.DataFrame(v) for k, v in row_bufs.items()} + def create_models_dir(self, out_dir: Path): lr_dir = None lr_dir = out_dir / "lr_models" @@ -129,6 +203,7 @@ def train_and_save_reporter( # Save reporter checkpoint to disk # TODO have to change this + out_dir.mkdir(parents=True, exist_ok=True) torch.save(reporter, out_dir / f"layer_{layer}.pt") return ReporterTrainResult(reporter, train_loss) @@ -140,6 +215,8 @@ def train_lr_model(self, train_dict, device, layer, out_dir): device=device, mode=self.supervised, ) + # make dir if not exists + out_dir.mkdir(parents=True, exist_ok=True) with open(out_dir / f"layer_{layer}.pt", "wb") as file: torch.save(lr_models, file) else: @@ -153,7 +230,7 @@ def apply_to_layer( devices: list[str], world_size: int, probe_per_prompt: bool, - ) -> dict[str, pd.DataFrame]: + ) -> list[dict[str, pd.DataFrame]]: """Train a single reporter on a single layer.""" self.make_reproducible(seed=self.net.seed + layer) @@ -165,7 +242,8 @@ def apply_to_layer( (first_train_h, train_gt, _), *rest = train_dict.values() (_, v, k, d) = first_train_h.shape - reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) + # TODO is this even needed + # reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) probe_per_prompt = True if probe_per_prompt: @@ -174,81 +252,53 @@ def apply_to_layer( ds_name: ( train_h[:, i : i + 1, ...], train_gt, - lm_preds[:, i : i + 1, ...], + lm_preds[:, i : i + 1, ...] if lm_preds is not None else None, ) } for ds_name, (train_h, _, lm_preds) in train_dict.items() for i in range(v) # v is number of variants ] - [ - self.train_and_save_reporter(device, layer, reporter_dir, train_dict) - for train_dict in train_dicts - ] - else: - reporter_train_result = self.train_and_save_reporter( - device, layer, reporter_dir, train_dict - ) - - reporter = reporter_train_result.reporter - train_loss = reporter_train_result.train_loss - - lr_models = self.train_lr_model(train_dict, device, layer, lr_dir) - - row_bufs = defaultdict(list) - - for ds_name in val_dict: - val_h, val_gt, val_lm_preds = val_dict[ds_name] - train_h, train_gt, train_lm_preds = train_dict[ds_name] - meta = {"dataset": ds_name, "layer": layer} + res = [] + for i, train_dict in enumerate(train_dicts): + reporters_path = self.out_dir / str(i) / "reporters" + lr_path = self.out_dir / str(i) / "lr_models" - val_credences = reporter(val_h) - train_credences = reporter(train_h) - for mode in ("none", "partial", "full"): - row_bufs["eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_credences, mode).to_dict(), - "train_loss": train_loss, - } + reporter_train_result = self.train_and_save_reporter( + device, layer, reporters_path, train_dict ) - row_bufs["train_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(train_gt, train_credences, mode).to_dict(), - "train_loss": train_loss, - } - ) + reporter = reporter_train_result.reporter + train_loss = reporter_train_result.train_loss - if val_lm_preds is not None: - row_bufs["lm_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), - } - ) + lr_models = self.train_lr_model(train_dict, device, layer, lr_path) - if train_lm_preds is not None: - row_bufs["train_lm_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), - } + res.append( + self.evaluate_and_save( + train_loss, + reporter, + train_dict, + val_dict, + lr_models, + layer, + prompt_index=i, ) + ) + return res + else: + reporter_train_result = self.train_and_save_reporter( + device, layer, self.out_dir / "reporters", train_dict + ) - for i, model in enumerate(lr_models): - row_bufs["lr_eval"].append( - { - **meta, - "ensembling": mode, - "inlp_iter": i, - **evaluate_preds(val_gt, model(val_h), mode).to_dict(), - } - ) + reporter = reporter_train_result.reporter + train_loss = reporter_train_result.train_loss - return {k: pd.DataFrame(v) for k, v in row_bufs.items()} + lr_models = self.train_lr_model( + train_dict, device, layer, self.out_dir / "lr_models" + ) + + return [ + self.evaluate_and_save( + train_loss, reporter, train_dict, val_dict, lr_models, layer + ) + ] From f35626a8bc73ba3b1408b20b8794ac7f39543116 Mon Sep 17 00:00:00 2001 From: jon Date: Fri, 14 Jul 2023 16:46:23 +0100 Subject: [PATCH 05/20] fix pyright --- elk/run.py | 6 +++--- elk/training/train.py | 2 ++ 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/elk/run.py b/elk/run.py index b444d80b..fd723569 100644 --- a/elk/run.py +++ b/elk/run.py @@ -102,7 +102,7 @@ def execute( devices = select_usable_devices(self.num_gpus, min_memory=self.min_gpu_mem) num_devices = len(devices) - func: Callable[[int], dict[str, pd.DataFrame]] = partial( + func: Callable[[int], list[dict[str, pd.DataFrame]]] = partial( self.apply_to_layer, devices=devices, world_size=num_devices, @@ -113,7 +113,7 @@ def execute( @abstractmethod def apply_to_layer( self, layer: int, devices: list[str], world_size: int, probe_per_prompt: bool - ) -> dict[str, pd.DataFrame]: + ) -> list[dict[str, pd.DataFrame]]: """Train or eval a reporter on a single layer.""" def make_reproducible(self, seed: int): @@ -162,7 +162,7 @@ def concatenate(self, layers): def apply_to_layers( self, - func: Callable[[int], dict[str, pd.DataFrame]], + func: Callable[[int], list[dict[str, pd.DataFrame]]], num_devices: int, ): """Apply a function to each layer of the datasets in parallel diff --git a/elk/training/train.py b/elk/training/train.py index 2d85cc13..902edfd4 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -232,6 +232,8 @@ def apply_to_layer( probe_per_prompt: bool, ) -> list[dict[str, pd.DataFrame]]: """Train a single reporter on a single layer.""" + assert self.out_dir is not None # TODO this is really annoying, why can it be + # None? self.make_reproducible(seed=self.net.seed + layer) device = self.get_device(devices, world_size) From 898c3f1c7a6094b71e5166ce43e9aa98a20f0aea Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 18 Jul 2023 14:32:01 +0100 Subject: [PATCH 06/20] implemented multi probe for elicit --- elk/training/train.py | 151 +++++++++++++++++++++++++----------------- 1 file changed, 89 insertions(+), 62 deletions(-) diff --git a/elk/training/train.py b/elk/training/train.py index 902edfd4..277567c4 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -19,13 +19,30 @@ from .common import FitterConfig, Reporter from .eigen_reporter import EigenFitter, EigenFitterConfig +# declare AnyReporter as CcsReporter | Reporter type alias +AnyReporter = CcsReporter | Reporter + @dataclass class ReporterTrainResult: - reporter: CcsReporter | Reporter + reporter: AnyReporter train_loss: float | None +class MultiReporter: + def __init__(self, reporter_results: list[ReporterTrainResult]): + self.reporter_results: list[ReporterTrainResult] = reporter_results + self.reporters = [r.reporter for r in reporter_results] + train_losses = [r.train_loss for r in reporter_results] + self.train_loss = ( + None if train_losses[0] is None else sum(train_losses) / len(train_losses) + ) + + def __call__(self, h): + credences = [r(h) for r in self.reporters] + return torch.stack(credences).mean(dim=0) + + @dataclass class Elicit(Run): """Full specification of a reporter training run.""" @@ -43,12 +60,11 @@ class Elicit(Run): def evaluate_and_save( self, train_loss, - reporter, + reporter: AnyReporter | MultiReporter, train_dict, val_dict, lr_models, layer, - prompt_index=None, ): row_bufs = defaultdict(list) for ds_name in val_dict: @@ -56,62 +72,74 @@ def evaluate_and_save( train_h, train_gt, train_lm_preds = train_dict[ds_name] meta = {"dataset": ds_name, "layer": layer} - val_credences = reporter(val_h) - train_credences = reporter(train_h) - maybe_prompt_index = ( - {} if prompt_index is None else {"prompt_index": prompt_index} - ) - for mode in ("none", "partial", "full"): - row_bufs["eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_credences, mode).to_dict(), - "train_loss": train_loss, - **maybe_prompt_index, - } - ) - - row_bufs["train_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(train_gt, train_credences, mode).to_dict(), - "train_loss": train_loss, - **maybe_prompt_index, - } - ) - - if val_lm_preds is not None: - row_bufs["lm_eval"].append( + def eval_all( + reporter: AnyReporter | MultiReporter, + prompt_index: int | Literal["multi"], + ): + val_credences = reporter(val_h) + train_credences = reporter(train_h) + prompt_index = {"prompt_index": prompt_index} + for mode in ("none", "partial", "full"): + row_bufs["eval"].append( { **meta, "ensembling": mode, - **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), - **maybe_prompt_index, + **evaluate_preds(val_gt, val_credences, mode).to_dict(), + "train_loss": train_loss, + **prompt_index, } ) - if train_lm_preds is not None: - row_bufs["train_lm_eval"].append( + row_bufs["train_eval"].append( { **meta, "ensembling": mode, - **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), - **maybe_prompt_index, + **evaluate_preds(train_gt, train_credences, mode).to_dict(), + "train_loss": train_loss, + **prompt_index, } ) - for i, model in enumerate(lr_models): - row_bufs["lr_eval"].append( - { - **meta, - "ensembling": mode, - "inlp_iter": i, - **evaluate_preds(val_gt, model(val_h), mode).to_dict(), - **maybe_prompt_index, - } - ) + if val_lm_preds is not None: + row_bufs["lm_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), + **prompt_index, + } + ) + + if train_lm_preds is not None: + row_bufs["train_lm_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds( + train_gt, train_lm_preds, mode + ).to_dict(), + **prompt_index, + } + ) + + for i, model in enumerate(lr_models): + row_bufs["lr_eval"].append( + { + **meta, + "ensembling": mode, + "inlp_iter": i, + **evaluate_preds(val_gt, model(val_h), mode).to_dict(), + **prompt_index, + } + ) + + if isinstance(reporter, MultiReporter): + for prompt_index, reporter_result in enumerate( + reporter.reporter_results + ): + eval_all(reporter_result.reporter, prompt_index) + + eval_all(reporter, "multi") return {k: pd.DataFrame(v) for k, v in row_bufs.items()} @@ -261,7 +289,7 @@ def apply_to_layer( for i in range(v) # v is number of variants ] - res = [] + results = [] for i, train_dict in enumerate(train_dicts): reporters_path = self.out_dir / str(i) / "reporters" lr_path = self.out_dir / str(i) / "lr_models" @@ -269,24 +297,23 @@ def apply_to_layer( reporter_train_result = self.train_and_save_reporter( device, layer, reporters_path, train_dict ) - - reporter = reporter_train_result.reporter - train_loss = reporter_train_result.train_loss + results.append(reporter_train_result) lr_models = self.train_lr_model(train_dict, device, layer, lr_path) - res.append( - self.evaluate_and_save( - train_loss, - reporter, - train_dict, - val_dict, - lr_models, - layer, - prompt_index=i, - ) + multi_reporter = MultiReporter(results) + train_loss = multi_reporter.train_loss + + return [ + self.evaluate_and_save( + train_loss, + multi_reporter, + train_dict, + val_dict, + lr_models, # TODO I don't care about this right now but + layer, ) - return res + ] else: reporter_train_result = self.train_and_save_reporter( device, layer, self.out_dir / "reporters", train_dict From 01d5baadebe4247cd00dac0c4bd1e4c052f515b5 Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 18 Jul 2023 15:35:54 +0100 Subject: [PATCH 07/20] undo list --- elk/run.py | 16 ++-- elk/training/train.py | 207 +++++++++++++++++++++--------------------- 2 files changed, 110 insertions(+), 113 deletions(-) diff --git a/elk/run.py b/elk/run.py index fd723569..0084ad31 100644 --- a/elk/run.py +++ b/elk/run.py @@ -102,7 +102,7 @@ def execute( devices = select_usable_devices(self.num_gpus, min_memory=self.min_gpu_mem) num_devices = len(devices) - func: Callable[[int], list[dict[str, pd.DataFrame]]] = partial( + func: Callable[[int], dict[str, pd.DataFrame]] = partial( self.apply_to_layer, devices=devices, world_size=num_devices, @@ -113,7 +113,7 @@ def execute( @abstractmethod def apply_to_layer( self, layer: int, devices: list[str], world_size: int, probe_per_prompt: bool - ) -> list[dict[str, pd.DataFrame]]: + ) -> dict[str, pd.DataFrame]: """Train or eval a reporter on a single layer.""" def make_reproducible(self, seed: int): @@ -162,7 +162,7 @@ def concatenate(self, layers): def apply_to_layers( self, - func: Callable[[int], list[dict[str, pd.DataFrame]]], + func: Callable[[int], dict[str, pd.DataFrame]], num_devices: int, ): """Apply a function to each layer of the datasets in parallel @@ -187,17 +187,17 @@ def apply_to_layers( df_buffers = defaultdict(list) try: - for df_dicts in tqdm(mapper(func, layers), total=len(layers)): - for df_dict in df_dicts: - for k, v in df_dict.items(): - df_buffers[k].append(v) + for df_dict in tqdm(mapper(func, layers), total=len(layers)): + for k, v in df_dict.items(): + df_buffers[k].append(v) finally: # Make sure the CSVs are written even if we crash or get interrupted for name, dfs in df_buffers.items(): sortby = ["layer", "ensembling"] if "prompt_index" in dfs[0].columns: sortby.append("prompt_index") - # TODO make the prompt index third col + # make the prompt index third col + df = pd.concat(dfs).sort_values(by=sortby) out_path = self.out_dir / f"{name}.csv" df.round(4).to_csv(out_path, index=False) diff --git a/elk/training/train.py b/elk/training/train.py index 277567c4..69f651b2 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -33,115 +33,117 @@ class MultiReporter: def __init__(self, reporter_results: list[ReporterTrainResult]): self.reporter_results: list[ReporterTrainResult] = reporter_results self.reporters = [r.reporter for r in reporter_results] - train_losses = [r.train_loss for r in reporter_results] - self.train_loss = ( - None if train_losses[0] is None else sum(train_losses) / len(train_losses) - ) + train_losses = [r.train_loss for r in reporter_results] if reporter_results[ + 0].train_loss \ + is not None else None + self.train_loss = sum(train_losses) / len( + train_losses + ) if train_losses is not None else None def __call__(self, h): credences = [r(h) for r in self.reporters] return torch.stack(credences).mean(dim=0) -@dataclass -class Elicit(Run): - """Full specification of a reporter training run.""" +def evaluate_and_save( + train_loss, + reporter: AnyReporter | MultiReporter, + train_dict, + val_dict, + lr_models, + layer, +): + row_bufs = defaultdict(list) + for ds_name in val_dict: + val_h, val_gt, val_lm_preds = val_dict[ds_name] + train_h, train_gt, train_lm_preds = train_dict[ds_name] + meta = {"dataset": ds_name, "layer": layer} + + def eval_all( + reporter: AnyReporter | MultiReporter, + prompt_index: int | Literal["multi"], + ): + val_credences = reporter(val_h) + train_credences = reporter(train_h) + prompt_index = {"prompt_index": prompt_index} + for mode in ("none", "partial", "full"): + row_bufs["eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_credences, mode).to_dict(), + "train_loss": train_loss, + **prompt_index, + } + ) - net: FitterConfig = subgroups( - {"ccs": CcsConfig, "eigen": EigenFitterConfig}, default="eigen" - ) - """Config for building the reporter network.""" + row_bufs["train_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(train_gt, train_credences, mode).to_dict(), + "train_loss": train_loss, + **prompt_index, + } + ) - supervised: Literal["none", "single", "inlp", "cv"] = "single" - """Whether to train a supervised classifier, and if so, whether to use - cross-validation. Defaults to "single", which means to train a single classifier - on the training data. "cv" means to use cross-validation.""" + if val_lm_preds is not None: + row_bufs["lm_eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), + **prompt_index, + } + ) - def evaluate_and_save( - self, - train_loss, - reporter: AnyReporter | MultiReporter, - train_dict, - val_dict, - lr_models, - layer, - ): - row_bufs = defaultdict(list) - for ds_name in val_dict: - val_h, val_gt, val_lm_preds = val_dict[ds_name] - train_h, train_gt, train_lm_preds = train_dict[ds_name] - meta = {"dataset": ds_name, "layer": layer} - - def eval_all( - reporter: AnyReporter | MultiReporter, - prompt_index: int | Literal["multi"], - ): - val_credences = reporter(val_h) - train_credences = reporter(train_h) - prompt_index = {"prompt_index": prompt_index} - for mode in ("none", "partial", "full"): - row_bufs["eval"].append( + if train_lm_preds is not None: + row_bufs["train_lm_eval"].append( { **meta, "ensembling": mode, - **evaluate_preds(val_gt, val_credences, mode).to_dict(), - "train_loss": train_loss, + **evaluate_preds( + train_gt, train_lm_preds, mode + ).to_dict(), **prompt_index, } ) - row_bufs["train_eval"].append( + for i, model in enumerate(lr_models): + row_bufs["lr_eval"].append( { **meta, "ensembling": mode, - **evaluate_preds(train_gt, train_credences, mode).to_dict(), - "train_loss": train_loss, + "inlp_iter": i, + **evaluate_preds(val_gt, model(val_h), mode).to_dict(), **prompt_index, } ) - if val_lm_preds is not None: - row_bufs["lm_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), - **prompt_index, - } - ) - - if train_lm_preds is not None: - row_bufs["train_lm_eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds( - train_gt, train_lm_preds, mode - ).to_dict(), - **prompt_index, - } - ) - - for i, model in enumerate(lr_models): - row_bufs["lr_eval"].append( - { - **meta, - "ensembling": mode, - "inlp_iter": i, - **evaluate_preds(val_gt, model(val_h), mode).to_dict(), - **prompt_index, - } - ) - - if isinstance(reporter, MultiReporter): - for prompt_index, reporter_result in enumerate( - reporter.reporter_results - ): - eval_all(reporter_result.reporter, prompt_index) - - eval_all(reporter, "multi") - - return {k: pd.DataFrame(v) for k, v in row_bufs.items()} + if isinstance(reporter, MultiReporter): + for prompt_index, reporter_result in enumerate( + reporter.reporter_results + ): + eval_all(reporter_result.reporter, prompt_index) + + eval_all(reporter, "multi") + + return {k: pd.DataFrame(v) for k, v in row_bufs.items()} + + +@dataclass +class Elicit(Run): + """Full specification of a reporter training run.""" + + net: FitterConfig = subgroups( + {"ccs": CcsConfig, "eigen": EigenFitterConfig}, default="eigen" + ) + """Config for building the reporter network.""" + + supervised: Literal["none", "single", "inlp", "cv"] = "single" + """Whether to train a supervised classifier, and if so, whether to use + cross-validation. Defaults to "single", which means to train a single classifier + on the training data. "cv" means to use cross-validation.""" def create_models_dir(self, out_dir: Path): lr_dir = None @@ -258,7 +260,7 @@ def apply_to_layer( devices: list[str], world_size: int, probe_per_prompt: bool, - ) -> list[dict[str, pd.DataFrame]]: + ) -> dict[str, pd.DataFrame]: """Train a single reporter on a single layer.""" assert self.out_dir is not None # TODO this is really annoying, why can it be # None? @@ -275,14 +277,13 @@ def apply_to_layer( # TODO is this even needed # reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) - probe_per_prompt = True if probe_per_prompt: train_dicts = [ { ds_name: ( - train_h[:, i : i + 1, ...], + train_h[:, i: i + 1, ...], train_gt, - lm_preds[:, i : i + 1, ...] if lm_preds is not None else None, + lm_preds[:, i: i + 1, ...] if lm_preds is not None else None, ) } for ds_name, (train_h, _, lm_preds) in train_dict.items() @@ -304,16 +305,14 @@ def apply_to_layer( multi_reporter = MultiReporter(results) train_loss = multi_reporter.train_loss - return [ - self.evaluate_and_save( - train_loss, - multi_reporter, - train_dict, - val_dict, - lr_models, # TODO I don't care about this right now but - layer, - ) - ] + return evaluate_and_save( + train_loss, + multi_reporter, + train_dict, + val_dict, + lr_models, # TODO I don't care about this right now but + layer, + ) else: reporter_train_result = self.train_and_save_reporter( device, layer, self.out_dir / "reporters", train_dict @@ -326,8 +325,6 @@ def apply_to_layer( train_dict, device, layer, self.out_dir / "lr_models" ) - return [ - self.evaluate_and_save( - train_loss, reporter, train_dict, val_dict, lr_models, layer - ) - ] + return evaluate_and_save( + train_loss, reporter, train_dict, val_dict, lr_models, layer + ) From 7701c291fda586b5263096ad5f514992105a3256 Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 18 Jul 2023 16:12:02 +0100 Subject: [PATCH 08/20] add more types and sorting --- elk/run.py | 13 ++++++-- elk/training/train.py | 72 ++++++++++++++++++++----------------------- 2 files changed, 44 insertions(+), 41 deletions(-) diff --git a/elk/run.py b/elk/run.py index 0084ad31..07eb45df 100644 --- a/elk/run.py +++ b/elk/run.py @@ -30,6 +30,8 @@ select_usable_devices, ) +PreparedData = dict[str, tuple[Tensor, Tensor, Tensor | None]] + @dataclass class Run(ABC, Serializable): @@ -132,7 +134,7 @@ def get_device(self, devices, world_size: int) -> str: def prepare_data( self, device: str, layer: int, split_type: Literal["train", "val"] - ) -> dict[str, tuple[Tensor, Tensor, Tensor | None]]: + ) -> PreparedData: """Prepare data for the specified layer and split type.""" out = {} @@ -196,9 +198,14 @@ def apply_to_layers( sortby = ["layer", "ensembling"] if "prompt_index" in dfs[0].columns: sortby.append("prompt_index") - # make the prompt index third col - df = pd.concat(dfs).sort_values(by=sortby) + + # Move prompt_index to the 2'th column + cols = list(df.columns) + cols.insert(2, cols.pop(cols.index("prompt_index"))) + df = df.reindex(columns=cols) + + # Save the CSV out_path = self.out_dir / f"{name}.csv" df.round(4).to_csv(out_path, index=False) if self.debug: diff --git a/elk/training/train.py b/elk/training/train.py index 69f651b2..be840223 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -13,13 +13,13 @@ from ..evaluation import Eval from ..metrics import evaluate_preds, to_one_hot -from ..run import Run +from ..run import PreparedData, Run from ..training.supervised import train_supervised +from . import Classifier from .ccs_reporter import CcsConfig, CcsReporter from .common import FitterConfig, Reporter from .eigen_reporter import EigenFitter, EigenFitterConfig -# declare AnyReporter as CcsReporter | Reporter type alias AnyReporter = CcsReporter | Reporter @@ -33,12 +33,14 @@ class MultiReporter: def __init__(self, reporter_results: list[ReporterTrainResult]): self.reporter_results: list[ReporterTrainResult] = reporter_results self.reporters = [r.reporter for r in reporter_results] - train_losses = [r.train_loss for r in reporter_results] if reporter_results[ - 0].train_loss \ - is not None else None - self.train_loss = sum(train_losses) / len( - train_losses - ) if train_losses is not None else None + train_losses = ( + [r.train_loss for r in reporter_results] + if reporter_results[0].train_loss is not None + else None + ) + self.train_loss = ( + sum(train_losses) / len(train_losses) if train_losses is not None else None + ) def __call__(self, h): credences = [r(h) for r in self.reporters] @@ -46,12 +48,12 @@ def __call__(self, h): def evaluate_and_save( - train_loss, + train_loss: float | None, reporter: AnyReporter | MultiReporter, - train_dict, - val_dict, - lr_models, - layer, + train_dict: PreparedData, + val_dict: PreparedData, + lr_models: list[Classifier], + layer: int, ): row_bufs = defaultdict(list) for ds_name in val_dict: @@ -102,9 +104,7 @@ def eval_all( { **meta, "ensembling": mode, - **evaluate_preds( - train_gt, train_lm_preds, mode - ).to_dict(), + **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), **prompt_index, } ) @@ -121,9 +121,7 @@ def eval_all( ) if isinstance(reporter, MultiReporter): - for prompt_index, reporter_result in enumerate( - reporter.reporter_results - ): + for prompt_index, reporter_result in enumerate(reporter.reporter_results): eval_all(reporter_result.reporter, prompt_index) eval_all(reporter, "multi") @@ -238,7 +236,7 @@ def train_and_save_reporter( return ReporterTrainResult(reporter, train_loss) - def train_lr_model(self, train_dict, device, layer, out_dir): + def train_lr_model(self, train_dict, device, layer, out_dir) -> list[Classifier]: if self.supervised != "none": lr_models = train_supervised( train_dict, @@ -281,9 +279,9 @@ def apply_to_layer( train_dicts = [ { ds_name: ( - train_h[:, i: i + 1, ...], + train_h[:, i : i + 1, ...], train_gt, - lm_preds[:, i: i + 1, ...] if lm_preds is not None else None, + lm_preds[:, i : i + 1, ...] if lm_preds is not None else None, ) } for ds_name, (train_h, _, lm_preds) in train_dict.items() @@ -292,8 +290,12 @@ def apply_to_layer( results = [] for i, train_dict in enumerate(train_dicts): - reporters_path = self.out_dir / str(i) / "reporters" - lr_path = self.out_dir / str(i) / "lr_models" + # format i as a 2 digit string, assumes that there will never be more + # than 100 prompts + str_i = str(i).zfill(2) + base = self.out_dir / "reporters" / f"prompt_{str_i}" + reporters_path = base / "reporters" + lr_path = base / "lr_models" reporter_train_result = self.train_and_save_reporter( device, layer, reporters_path, train_dict @@ -302,29 +304,23 @@ def apply_to_layer( lr_models = self.train_lr_model(train_dict, device, layer, lr_path) - multi_reporter = MultiReporter(results) - train_loss = multi_reporter.train_loss + maybe_multi_reporter = MultiReporter(results) + train_loss = maybe_multi_reporter.train_loss + + # TODO fix lr_models - return evaluate_and_save( - train_loss, - multi_reporter, - train_dict, - val_dict, - lr_models, # TODO I don't care about this right now but - layer, - ) else: reporter_train_result = self.train_and_save_reporter( device, layer, self.out_dir / "reporters", train_dict ) - reporter = reporter_train_result.reporter + maybe_multi_reporter = reporter_train_result.reporter train_loss = reporter_train_result.train_loss lr_models = self.train_lr_model( train_dict, device, layer, self.out_dir / "lr_models" ) - return evaluate_and_save( - train_loss, reporter, train_dict, val_dict, lr_models, layer - ) + return evaluate_and_save( + train_loss, maybe_multi_reporter, train_dict, val_dict, lr_models, layer + ) From 4310def1352b2464e0a4b126c2a95e3c830d7ad7 Mon Sep 17 00:00:00 2001 From: jon Date: Tue, 18 Jul 2023 16:16:22 +0100 Subject: [PATCH 09/20] weird duplicate arg --- elk/run.py | 1 - 1 file changed, 1 deletion(-) diff --git a/elk/run.py b/elk/run.py index 07eb45df..7e48cfa2 100644 --- a/elk/run.py +++ b/elk/run.py @@ -56,7 +56,6 @@ class Run(ABC, Serializable): debug: bool = False min_gpu_mem: int | None = None # in bytes num_gpus: int = -1 - out_dir: Path | None = None disable_cache: bool = field(default=False, to_dict=False) def execute( From 96a3dabc783d0a3f803bdadaebc59deb880148b6 Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 19 Jul 2023 10:44:09 +0100 Subject: [PATCH 10/20] resolved circular import --- elk/evaluation/evaluate.py | 94 +++++++++++++++++++++++----------- elk/training/multi_reporter.py | 44 ++++++++++++++++ elk/training/train.py | 29 +---------- 3 files changed, 109 insertions(+), 58 deletions(-) create mode 100644 elk/training/multi_reporter.py diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 8462cc00..dc2d2f80 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -1,6 +1,7 @@ from collections import defaultdict from dataclasses import dataclass from pathlib import Path +from typing import Literal import pandas as pd import torch @@ -9,6 +10,7 @@ from ..files import elk_reporter_dir from ..metrics import evaluate_preds from ..run import Run +from ..training.multi_reporter import AnyReporter, MultiReporter from ..utils import Color @@ -38,39 +40,69 @@ def apply_to_layer( experiment_dir = elk_reporter_dir() / self.source - reporter_path = experiment_dir / "reporters" / f"layer_{layer}.pt" - reporter = torch.load(reporter_path, map_location=device) + def load_reporter() -> AnyReporter | MultiReporter: + # check if experiment_dir / "reporters" has .pt files + first = next((experiment_dir / "reporters").iterdir()) + if not first.suffix == ".pt": + return MultiReporter.load( + experiment_dir / "reporters", layer, device=device + ) + else: + path = experiment_dir / "reporters" / f"layer_{layer}.pt" + return torch.load(path, map_location=device) + + reporter = load_reporter() row_bufs = defaultdict(list) - for ds_name, (val_h, val_gt, _) in val_output.items(): - meta = {"dataset": ds_name, "layer": layer} - - val_credences = reporter(val_h) - for mode in ("none", "partial", "full"): - row_bufs["eval"].append( - { - **meta, - "ensembling": mode, - **evaluate_preds(val_gt, val_credences, mode).to_dict(), - } - ) - lr_dir = experiment_dir / "lr_models" - if not self.skip_supervised and lr_dir.exists(): - with open(lr_dir / f"layer_{layer}.pt", "rb") as f: - lr_models = torch.load(f, map_location=device) - if not isinstance(lr_models, list): # backward compatibility - lr_models = [lr_models] - - for i, model in enumerate(lr_models): - model.eval() - row_bufs["lr_eval"].append( - { - "ensembling": mode, - "inlp_iter": i, - **meta, - **evaluate_preds(val_gt, model(val_h), mode).to_dict(), - } - ) + def eval_all( + reporter: AnyReporter | MultiReporter, + prompt_index: int | Literal["multi"] | None = None, + ): + prompt_index = ( + {"prompt_index": prompt_index} if prompt_index is not None else {} + ) + for ds_name, (val_h, val_gt, _) in val_output.items(): + meta = {"dataset": ds_name, "layer": layer} + + val_credences = reporter(val_h) + for mode in ("none", "partial", "full"): + row_bufs["eval"].append( + { + **meta, + "ensembling": mode, + **evaluate_preds(val_gt, val_credences, mode).to_dict(), + **prompt_index, + } + ) + + lr_dir = experiment_dir / "lr_models" + if not self.skip_supervised and lr_dir.exists(): + with open(lr_dir / f"layer_{layer}.pt", "rb") as f: + lr_models = torch.load(f, map_location=device) + if not isinstance( + lr_models, list + ): # backward compatibility + lr_models = [lr_models] + + for i, model in enumerate(lr_models): + model.eval() + row_bufs["lr_eval"].append( + { + "ensembling": mode, + "inlp_iter": i, + **meta, + **evaluate_preds( + val_gt, model(val_h), mode + ).to_dict(), + } + ) + + if isinstance(reporter, MultiReporter): + for prompt_index, single_reporter in enumerate(reporter.reporters): + eval_all(single_reporter, prompt_index) + eval_all(reporter, "multi") + else: + eval_all(reporter) return {k: pd.DataFrame(v) for k, v in row_bufs.items()} diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py new file mode 100644 index 00000000..8d599f87 --- /dev/null +++ b/elk/training/multi_reporter.py @@ -0,0 +1,44 @@ +from dataclasses import dataclass +from pathlib import Path + +import torch as t + +from elk.training import CcsReporter +from elk.training.common import Reporter + +AnyReporter = CcsReporter | Reporter + + +@dataclass +class ReporterTrainResult: + reporter: AnyReporter + train_loss: float | None + + +class MultiReporter: + def __init__(self, reporter_results: list[ReporterTrainResult]): + self.reporter_results: list[ReporterTrainResult] = reporter_results + self.reporters = [r.reporter for r in reporter_results] + train_losses = ( + [r.train_loss for r in reporter_results] + if reporter_results[0].train_loss is not None + else None + ) + self.train_loss = ( + sum(train_losses) / len(train_losses) if train_losses is not None else None + ) + + def __call__(self, h): + credences = [r(h) for r in self.reporters] + return t.stack(credences).mean(dim=0) + + @staticmethod + def load(path: Path, layer: int, device: str): + prompt_folders = [p for p in path.iterdir() if p.is_dir()] + reporters = [] + for folder in prompt_folders: + path = folder / "reporters" / f"layer_{layer}.pt" + reporter = t.load(path, map_location=device) + reporters.append(reporter) + # TODO for now I don't care about the train losses + return MultiReporter([ReporterTrainResult(r, None) for r in reporters]) diff --git a/elk/training/train.py b/elk/training/train.py index be840223..6df85ef1 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -17,34 +17,9 @@ from ..training.supervised import train_supervised from . import Classifier from .ccs_reporter import CcsConfig, CcsReporter -from .common import FitterConfig, Reporter +from .common import FitterConfig from .eigen_reporter import EigenFitter, EigenFitterConfig - -AnyReporter = CcsReporter | Reporter - - -@dataclass -class ReporterTrainResult: - reporter: AnyReporter - train_loss: float | None - - -class MultiReporter: - def __init__(self, reporter_results: list[ReporterTrainResult]): - self.reporter_results: list[ReporterTrainResult] = reporter_results - self.reporters = [r.reporter for r in reporter_results] - train_losses = ( - [r.train_loss for r in reporter_results] - if reporter_results[0].train_loss is not None - else None - ) - self.train_loss = ( - sum(train_losses) / len(train_losses) if train_losses is not None else None - ) - - def __call__(self, h): - credences = [r(h) for r in self.reporters] - return torch.stack(credences).mean(dim=0) +from .multi_reporter import AnyReporter, MultiReporter, ReporterTrainResult def evaluate_and_save( From 9c2def0df61dd966a5901df29a5e2c1519c6673d Mon Sep 17 00:00:00 2001 From: jon Date: Wed, 19 Jul 2023 20:42:49 +0100 Subject: [PATCH 11/20] fixed index passing --- elk/evaluation/evaluate.py | 9 +++++ elk/run.py | 2 - elk/training/multi_reporter.py | 1 + elk/training/train.py | 70 +++++++++++++++++++++++----------- 4 files changed, 58 insertions(+), 24 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index dc2d2f80..18485f36 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -38,6 +38,15 @@ def apply_to_layer( device = self.get_device(devices, world_size) val_output = self.prepare_data(device, layer, "val") + val_output = { + ds_name: ( + train_h[:, self.prompt_indices, ...], + train_gt, + lm_preds[:, self.prompt_indices, ...] if lm_preds is not None else None, + ) + for ds_name, (train_h, train_gt, lm_preds) in val_output.items() + } + experiment_dir = elk_reporter_dir() / self.source def load_reporter() -> AnyReporter | MultiReporter: diff --git a/elk/run.py b/elk/run.py index 7e48cfa2..343360a3 100644 --- a/elk/run.py +++ b/elk/run.py @@ -143,8 +143,6 @@ def prepare_data( split = ds[key].with_format("torch", device=device, dtype=torch.int16) labels = assert_type(Tensor, split["label"]) hiddens = int16_to_float32(assert_type(Tensor, split[f"hidden_{layer}"])) - if self.prompt_indices: - hiddens = hiddens[:, self.prompt_indices] with split.formatted_as("torch", device=device): has_preds = "model_logits" in split.features diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 8d599f87..7b75a5bd 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -13,6 +13,7 @@ class ReporterTrainResult: reporter: AnyReporter train_loss: float | None + prompt_index: int | None class MultiReporter: diff --git a/elk/training/train.py b/elk/training/train.py index 6df85ef1..cdcda76f 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -38,10 +38,16 @@ def evaluate_and_save( def eval_all( reporter: AnyReporter | MultiReporter, - prompt_index: int | Literal["multi"], + prompt_index: int | Literal["multi"] | None = None, + i: int = 0, ): - val_credences = reporter(val_h) - train_credences = reporter(train_h) + if isinstance(prompt_index, int): + val_credences = reporter(val_h[:, [prompt_index], :, :]) + train_credences = reporter(train_h[:, [prompt_index], :, :]) + else: + # TODO implement diagonal + val_credences = reporter(val_h) + train_credences = reporter(train_h) prompt_index = {"prompt_index": prompt_index} for mode in ("none", "partial", "full"): row_bufs["eval"].append( @@ -96,10 +102,11 @@ def eval_all( ) if isinstance(reporter, MultiReporter): - for prompt_index, reporter_result in enumerate(reporter.reporter_results): - eval_all(reporter_result.reporter, prompt_index) - - eval_all(reporter, "multi") + for reporter_result in reporter.reporter_results: + eval_all(reporter_result.reporter, reporter_result.prompt_index) + eval_all(reporter, prompt_index="multi") + else: + eval_all(reporter, prompt_index=None) return {k: pd.DataFrame(v) for k, v in row_bufs.items()} @@ -154,9 +161,10 @@ def make_eval(self, model, eval_dataset): # Create a separate function to handle the reporter training. def train_and_save_reporter( - self, device, layer, out_dir, train_dict + self, device, layer, out_dir, train_dict, prompt_index=None ) -> ReporterTrainResult: (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? + breakpoint() (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): raise ValueError("All datasets must have the same hidden state size") @@ -209,7 +217,7 @@ def train_and_save_reporter( out_dir.mkdir(parents=True, exist_ok=True) torch.save(reporter, out_dir / f"layer_{layer}.pt") - return ReporterTrainResult(reporter, train_loss) + return ReporterTrainResult(reporter, train_loss, prompt_index) def train_lr_model(self, train_dict, device, layer, out_dir) -> list[Classifier]: if self.supervised != "none": @@ -241,7 +249,8 @@ def apply_to_layer( self.make_reproducible(seed=self.net.seed + layer) device = self.get_device(devices, world_size) - train_dict = self.prepare_data(device, layer, "train") + train_dict = self.prepare_data(device, layer, "train") # prepare data no + # longer does anything on prompt indices val_dict = self.prepare_data(device, layer, "val") (first_train_h, train_gt, _), *rest = train_dict.values() @@ -251,33 +260,40 @@ def apply_to_layer( # reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) if probe_per_prompt: - train_dicts = [ + prompt_indices = self.prompt_indices if self.prompt_indices else range(v) + prompt_train_dicts = [ { ds_name: ( - train_h[:, i : i + 1, ...], + train_h[:, [prompt_index], ...], train_gt, - lm_preds[:, i : i + 1, ...] if lm_preds is not None else None, + lm_preds[:, [prompt_index], ...] + if lm_preds is not None + else None, ) } for ds_name, (train_h, _, lm_preds) in train_dict.items() - for i in range(v) # v is number of variants + for prompt_index in prompt_indices # v is number of variants ] results = [] - for i, train_dict in enumerate(train_dicts): - # format i as a 2 digit string, assumes that there will never be more - # than 100 prompts - str_i = str(i).zfill(2) + + for prompt_index, prompt_train_dict in zip( + prompt_indices, prompt_train_dicts + ): + assert prompt_index < 100 # format i as a 2 digit string + str_i = str(prompt_index).zfill(2) base = self.out_dir / "reporters" / f"prompt_{str_i}" reporters_path = base / "reporters" lr_path = base / "lr_models" reporter_train_result = self.train_and_save_reporter( - device, layer, reporters_path, train_dict + device, layer, reporters_path, prompt_train_dict, prompt_index ) results.append(reporter_train_result) - lr_models = self.train_lr_model(train_dict, device, layer, lr_path) + lr_models = self.train_lr_model( + prompt_train_dict, device, layer, lr_path + ) maybe_multi_reporter = MultiReporter(results) train_loss = maybe_multi_reporter.train_loss @@ -285,15 +301,25 @@ def apply_to_layer( # TODO fix lr_models else: + prompt_train_dict = { + ds_name: ( + train_h[:, self.prompt_indices, ...], + train_gt, + lm_preds[:, self.prompt_indices, ...] + if lm_preds is not None + else None, + ) + for ds_name, (train_h, _, lm_preds) in train_dict.items() + } reporter_train_result = self.train_and_save_reporter( - device, layer, self.out_dir / "reporters", train_dict + device, layer, self.out_dir / "reporters", prompt_train_dict ) maybe_multi_reporter = reporter_train_result.reporter train_loss = reporter_train_result.train_loss lr_models = self.train_lr_model( - train_dict, device, layer, self.out_dir / "lr_models" + prompt_train_dict, device, layer, self.out_dir / "lr_models" ) return evaluate_and_save( From 29eeb7f964c2810b9864df300fdff124ac43e1de Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 14:24:06 +0100 Subject: [PATCH 12/20] fixed index passing again --- elk/evaluation/evaluate.py | 11 ++--------- elk/run.py | 13 +++++++++++++ elk/training/multi_reporter.py | 8 ++++++-- elk/training/train.py | 22 +++++++++------------- 4 files changed, 30 insertions(+), 24 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 18485f36..f5a592e7 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -9,7 +9,7 @@ from ..files import elk_reporter_dir from ..metrics import evaluate_preds -from ..run import Run +from ..run import Run, select_data from ..training.multi_reporter import AnyReporter, MultiReporter from ..utils import Color @@ -38,14 +38,7 @@ def apply_to_layer( device = self.get_device(devices, world_size) val_output = self.prepare_data(device, layer, "val") - val_output = { - ds_name: ( - train_h[:, self.prompt_indices, ...], - train_gt, - lm_preds[:, self.prompt_indices, ...] if lm_preds is not None else None, - ) - for ds_name, (train_h, train_gt, lm_preds) in val_output.items() - } + val_output = select_data(val_output, self.prompt_indices) experiment_dir = elk_reporter_dir() / self.source diff --git a/elk/run.py b/elk/run.py index 343360a3..cece5d9c 100644 --- a/elk/run.py +++ b/elk/run.py @@ -33,6 +33,17 @@ PreparedData = dict[str, tuple[Tensor, Tensor, Tensor | None]] +def select_data(prepared_data: PreparedData, prompt_indices: list[int]): + return { + ds_name: ( + train_h[:, prompt_indices, ...], + train_gt, + lm_preds[:, prompt_indices, ...] if lm_preds is not None else None, + ) + for ds_name, (train_h, train_gt, lm_preds) in prepared_data.items() + } + + @dataclass class Run(ABC, Serializable): data: Extract @@ -143,6 +154,8 @@ def prepare_data( split = ds[key].with_format("torch", device=device, dtype=torch.int16) labels = assert_type(Tensor, split["label"]) hiddens = int16_to_float32(assert_type(Tensor, split[f"hidden_{layer}"])) + if self.prompt_indices: + hiddens = hiddens[:, self.prompt_indices, ...] with split.formatted_as("torch", device=device): has_preds = "model_logits" in split.features diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 7b75a5bd..0269d1e5 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -30,8 +30,12 @@ def __init__(self, reporter_results: list[ReporterTrainResult]): ) def __call__(self, h): - credences = [r(h) for r in self.reporters] - return t.stack(credences).mean(dim=0) + num_variants = h.shape[1] + assert len(self.reporters) == num_variants + credences = [] + for i, reporter in enumerate(self.reporters): + credences.append(reporter(h[:, [i], :, :])) + return t.stack(credences, dim=0).mean(dim=0) @staticmethod def load(path: Path, layer: int, device: str): diff --git a/elk/training/train.py b/elk/training/train.py index cdcda76f..a8e9ce55 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -42,10 +42,9 @@ def eval_all( i: int = 0, ): if isinstance(prompt_index, int): - val_credences = reporter(val_h[:, [prompt_index], :, :]) - train_credences = reporter(train_h[:, [prompt_index], :, :]) + val_credences = reporter(val_h[:, [i], :, :]) + train_credences = reporter(train_h[:, [i], :, :]) else: - # TODO implement diagonal val_credences = reporter(val_h) train_credences = reporter(train_h) prompt_index = {"prompt_index": prompt_index} @@ -90,20 +89,20 @@ def eval_all( } ) - for i, model in enumerate(lr_models): + for lr_model_num, model in enumerate(lr_models): row_bufs["lr_eval"].append( { **meta, "ensembling": mode, - "inlp_iter": i, + "inlp_iter": lr_model_num, **evaluate_preds(val_gt, model(val_h), mode).to_dict(), **prompt_index, } ) if isinstance(reporter, MultiReporter): - for reporter_result in reporter.reporter_results: - eval_all(reporter_result.reporter, reporter_result.prompt_index) + for i, reporter_result in enumerate(reporter.reporter_results): + eval_all(reporter_result.reporter, reporter_result.prompt_index, i) eval_all(reporter, prompt_index="multi") else: eval_all(reporter, prompt_index=None) @@ -164,7 +163,6 @@ def train_and_save_reporter( self, device, layer, out_dir, train_dict, prompt_index=None ) -> ReporterTrainResult: (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? - breakpoint() (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): raise ValueError("All datasets must have the same hidden state size") @@ -264,15 +262,13 @@ def apply_to_layer( prompt_train_dicts = [ { ds_name: ( - train_h[:, [prompt_index], ...], + train_h[:, [i], ...], train_gt, - lm_preds[:, [prompt_index], ...] - if lm_preds is not None - else None, + lm_preds[:, [i], ...] if lm_preds is not None else None, ) } for ds_name, (train_h, _, lm_preds) in train_dict.items() - for prompt_index in prompt_indices # v is number of variants + for i, _ in enumerate(prompt_indices) ] results = [] From f533418f9907c8816233d912c3ae47a50a06ee96 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 14:24:57 +0100 Subject: [PATCH 13/20] add assert --- elk/training/multi_reporter.py | 1 + 1 file changed, 1 insertion(+) diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 0269d1e5..3a021dc0 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -18,6 +18,7 @@ class ReporterTrainResult: class MultiReporter: def __init__(self, reporter_results: list[ReporterTrainResult]): + assert len(reporter_results) > 0, "Must have at least one reporter" self.reporter_results: list[ReporterTrainResult] = reporter_results self.reporters = [r.reporter for r in reporter_results] train_losses = ( From 0f5ce0b2a6aacf5f7338c168b13a07ae1b7e53a3 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 14:29:17 +0100 Subject: [PATCH 14/20] fix prompt index in loading --- elk/training/multi_reporter.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 3a021dc0..7b2e9508 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -45,6 +45,7 @@ def load(path: Path, layer: int, device: str): for folder in prompt_folders: path = folder / "reporters" / f"layer_{layer}.pt" reporter = t.load(path, map_location=device) - reporters.append(reporter) + prompt_index = int(folder.name.split("_")[-1]) + reporters.append((reporter, prompt_index)) # TODO for now I don't care about the train losses - return MultiReporter([ReporterTrainResult(r, None) for r in reporters]) + return MultiReporter([ReporterTrainResult(r, None, pi) for r, pi in reporters]) From 327d1eb17e112c79004f6041a5829d7203801628 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 14:38:20 +0100 Subject: [PATCH 15/20] remove redundant method --- elk/training/train.py | 33 ++------------------------------- 1 file changed, 2 insertions(+), 31 deletions(-) diff --git a/elk/training/train.py b/elk/training/train.py index a8e9ce55..741405f3 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -2,14 +2,12 @@ from collections import defaultdict from dataclasses import dataclass, replace -from pathlib import Path from typing import Literal import pandas as pd import torch from einops import rearrange, repeat from simple_parsing import subgroups -from simple_parsing.helpers.serialization import save from ..evaluation import Eval from ..metrics import evaluate_preds, to_one_hot @@ -124,20 +122,6 @@ class Elicit(Run): cross-validation. Defaults to "single", which means to train a single classifier on the training data. "cv" means to use cross-validation.""" - def create_models_dir(self, out_dir: Path): - lr_dir = None - lr_dir = out_dir / "lr_models" - reporter_dir = out_dir / "reporters" - - lr_dir.mkdir(parents=True, exist_ok=True) - reporter_dir.mkdir(parents=True, exist_ok=True) - - # Save the reporter config separately in the reporter directory - # for convenient loading of reporters later. - save(self.net, reporter_dir / "cfg.yaml", save_dc_types=True) - - return reporter_dir, lr_dir - def make_eval(self, model, eval_dataset): assert self.out_dir is not None return Eval( @@ -254,9 +238,6 @@ def apply_to_layer( (first_train_h, train_gt, _), *rest = train_dict.values() (_, v, k, d) = first_train_h.shape - # TODO is this even needed - # reporter_dir, lr_dir = self.create_models_dir(assert_type(Path, self.out_dir)) - if probe_per_prompt: prompt_indices = self.prompt_indices if self.prompt_indices else range(v) prompt_train_dicts = [ @@ -297,25 +278,15 @@ def apply_to_layer( # TODO fix lr_models else: - prompt_train_dict = { - ds_name: ( - train_h[:, self.prompt_indices, ...], - train_gt, - lm_preds[:, self.prompt_indices, ...] - if lm_preds is not None - else None, - ) - for ds_name, (train_h, _, lm_preds) in train_dict.items() - } reporter_train_result = self.train_and_save_reporter( - device, layer, self.out_dir / "reporters", prompt_train_dict + device, layer, self.out_dir / "reporters", train_dict ) maybe_multi_reporter = reporter_train_result.reporter train_loss = reporter_train_result.train_loss lr_models = self.train_lr_model( - prompt_train_dict, device, layer, self.out_dir / "lr_models" + train_dict, device, layer, self.out_dir / "lr_models" ) return evaluate_and_save( From 51b7d3c59ae45ca58284da81351a3d189cd1dc6b Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 16:22:55 +0100 Subject: [PATCH 16/20] correctly eval with multiple probes and some renaming --- elk/evaluation/evaluate.py | 13 +++++---- elk/training/multi_reporter.py | 44 ++++++++++++++++-------------- elk/training/train.py | 50 ++++++++++++++++------------------ 3 files changed, 53 insertions(+), 54 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index f5a592e7..49aa5818 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -10,7 +10,7 @@ from ..files import elk_reporter_dir from ..metrics import evaluate_preds from ..run import Run, select_data -from ..training.multi_reporter import AnyReporter, MultiReporter +from ..training.multi_reporter import MultiReporter, SingleReporter from ..utils import Color @@ -42,7 +42,7 @@ def apply_to_layer( experiment_dir = elk_reporter_dir() / self.source - def load_reporter() -> AnyReporter | MultiReporter: + def load_reporter() -> SingleReporter | MultiReporter: # check if experiment_dir / "reporters" has .pt files first = next((experiment_dir / "reporters").iterdir()) if not first.suffix == ".pt": @@ -58,16 +58,17 @@ def load_reporter() -> AnyReporter | MultiReporter: row_bufs = defaultdict(list) def eval_all( - reporter: AnyReporter | MultiReporter, + reporter: SingleReporter | MultiReporter, prompt_index: int | Literal["multi"] | None = None, + i: int = 0, ): prompt_index = ( {"prompt_index": prompt_index} if prompt_index is not None else {} ) for ds_name, (val_h, val_gt, _) in val_output.items(): meta = {"dataset": ds_name, "layer": layer} + val_credences = reporter(val_h[:, [i], :, :]) - val_credences = reporter(val_h) for mode in ("none", "partial", "full"): row_bufs["eval"].append( { @@ -101,8 +102,8 @@ def eval_all( ) if isinstance(reporter, MultiReporter): - for prompt_index, single_reporter in enumerate(reporter.reporters): - eval_all(single_reporter, prompt_index) + for i, res in enumerate(reporter.reporter_w_infos): + eval_all(res.model, res.prompt_index, i) eval_all(reporter, "multi") else: eval_all(reporter) diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 7b2e9508..bb329d12 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -6,24 +6,25 @@ from elk.training import CcsReporter from elk.training.common import Reporter -AnyReporter = CcsReporter | Reporter +SingleReporter = CcsReporter | Reporter @dataclass -class ReporterTrainResult: - reporter: AnyReporter - train_loss: float | None - prompt_index: int | None +class ReporterWithInfo: # I don't love this name but I have no choice because + # of the other Reporter + model: SingleReporter + train_loss: float | None = None + prompt_index: int | None = None class MultiReporter: - def __init__(self, reporter_results: list[ReporterTrainResult]): - assert len(reporter_results) > 0, "Must have at least one reporter" - self.reporter_results: list[ReporterTrainResult] = reporter_results - self.reporters = [r.reporter for r in reporter_results] + def __init__(self, reporter: list[ReporterWithInfo]): + assert len(reporter) > 0, "Must have at least one reporter" + self.reporter_w_infos: list[ReporterWithInfo] = reporter + self.models = [r.model for r in reporter] train_losses = ( - [r.train_loss for r in reporter_results] - if reporter_results[0].train_loss is not None + [r.train_loss for r in reporter] + if reporter[0].train_loss is not None else None ) self.train_loss = ( @@ -32,20 +33,21 @@ def __init__(self, reporter_results: list[ReporterTrainResult]): def __call__(self, h): num_variants = h.shape[1] - assert len(self.reporters) == num_variants + assert len(self.models) == num_variants credences = [] - for i, reporter in enumerate(self.reporters): + for i, reporter in enumerate(self.models): credences.append(reporter(h[:, [i], :, :])) return t.stack(credences, dim=0).mean(dim=0) @staticmethod def load(path: Path, layer: int, device: str): prompt_folders = [p for p in path.iterdir() if p.is_dir()] - reporters = [] - for folder in prompt_folders: - path = folder / "reporters" / f"layer_{layer}.pt" - reporter = t.load(path, map_location=device) - prompt_index = int(folder.name.split("_")[-1]) - reporters.append((reporter, prompt_index)) - # TODO for now I don't care about the train losses - return MultiReporter([ReporterTrainResult(r, None, pi) for r, pi in reporters]) + reporters = [ + ( + t.load(folder / "reporters" / f"layer_{layer}.pt", map_location=device), + int(folder.name.split("_")[-1]), # prompt index + ) + for folder in prompt_folders + ] + # we don't care about the train losses for evaluating + return MultiReporter([ReporterWithInfo(r, None, pi) for r, pi in reporters]) diff --git a/elk/training/train.py b/elk/training/train.py index 741405f3..cf97b0a6 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -17,12 +17,12 @@ from .ccs_reporter import CcsConfig, CcsReporter from .common import FitterConfig from .eigen_reporter import EigenFitter, EigenFitterConfig -from .multi_reporter import AnyReporter, MultiReporter, ReporterTrainResult +from .multi_reporter import MultiReporter, ReporterWithInfo, SingleReporter def evaluate_and_save( train_loss: float | None, - reporter: AnyReporter | MultiReporter, + reporter: SingleReporter | MultiReporter, train_dict: PreparedData, val_dict: PreparedData, lr_models: list[Classifier], @@ -35,7 +35,7 @@ def evaluate_and_save( meta = {"dataset": ds_name, "layer": layer} def eval_all( - reporter: AnyReporter | MultiReporter, + reporter: SingleReporter | MultiReporter, prompt_index: int | Literal["multi"] | None = None, i: int = 0, ): @@ -45,7 +45,9 @@ def eval_all( else: val_credences = reporter(val_h) train_credences = reporter(train_h) - prompt_index = {"prompt_index": prompt_index} + prompt_index_dict = ( + {"prompt_index": prompt_index} if prompt_index is not None else {} + ) for mode in ("none", "partial", "full"): row_bufs["eval"].append( { @@ -53,7 +55,7 @@ def eval_all( "ensembling": mode, **evaluate_preds(val_gt, val_credences, mode).to_dict(), "train_loss": train_loss, - **prompt_index, + **prompt_index_dict, } ) @@ -63,7 +65,7 @@ def eval_all( "ensembling": mode, **evaluate_preds(train_gt, train_credences, mode).to_dict(), "train_loss": train_loss, - **prompt_index, + **prompt_index_dict, } ) @@ -73,7 +75,7 @@ def eval_all( **meta, "ensembling": mode, **evaluate_preds(val_gt, val_lm_preds, mode).to_dict(), - **prompt_index, + **prompt_index_dict, } ) @@ -83,7 +85,7 @@ def eval_all( **meta, "ensembling": mode, **evaluate_preds(train_gt, train_lm_preds, mode).to_dict(), - **prompt_index, + **prompt_index_dict, } ) @@ -94,13 +96,13 @@ def eval_all( "ensembling": mode, "inlp_iter": lr_model_num, **evaluate_preds(val_gt, model(val_h), mode).to_dict(), - **prompt_index, + **prompt_index_dict, } ) if isinstance(reporter, MultiReporter): - for i, reporter_result in enumerate(reporter.reporter_results): - eval_all(reporter_result.reporter, reporter_result.prompt_index, i) + for i, reporter_result in enumerate(reporter.reporter_w_infos): + eval_all(reporter_result.model, reporter_result.prompt_index, i) eval_all(reporter, prompt_index="multi") else: eval_all(reporter, prompt_index=None) @@ -145,7 +147,7 @@ def make_eval(self, model, eval_dataset): # Create a separate function to handle the reporter training. def train_and_save_reporter( self, device, layer, out_dir, train_dict, prompt_index=None - ) -> ReporterTrainResult: + ) -> ReporterWithInfo: (first_train_h, train_gt, _), *rest = train_dict.values() # TODO can remove? (_, v, k, d) = first_train_h.shape if not all(other_h.shape[-1] == d for other_h, _, _ in rest): @@ -199,7 +201,7 @@ def train_and_save_reporter( out_dir.mkdir(parents=True, exist_ok=True) torch.save(reporter, out_dir / f"layer_{layer}.pt") - return ReporterTrainResult(reporter, train_loss, prompt_index) + return ReporterWithInfo(reporter, train_loss, prompt_index) def train_lr_model(self, train_dict, device, layer, out_dir) -> list[Classifier]: if self.supervised != "none": @@ -231,14 +233,14 @@ def apply_to_layer( self.make_reproducible(seed=self.net.seed + layer) device = self.get_device(devices, world_size) - train_dict = self.prepare_data(device, layer, "train") # prepare data no - # longer does anything on prompt indices + train_dict = self.prepare_data(device, layer, "train") val_dict = self.prepare_data(device, layer, "val") (first_train_h, train_gt, _), *rest = train_dict.values() (_, v, k, d) = first_train_h.shape if probe_per_prompt: + # self.prompt_indices being () actually means "all prompts" prompt_indices = self.prompt_indices if self.prompt_indices else range(v) prompt_train_dicts = [ { @@ -261,33 +263,27 @@ def apply_to_layer( str_i = str(prompt_index).zfill(2) base = self.out_dir / "reporters" / f"prompt_{str_i}" reporters_path = base / "reporters" - lr_path = base / "lr_models" + base / "lr_models" reporter_train_result = self.train_and_save_reporter( device, layer, reporters_path, prompt_train_dict, prompt_index ) results.append(reporter_train_result) - lr_models = self.train_lr_model( - prompt_train_dict, device, layer, lr_path - ) - + # it is called maybe_multi_reporter because it might be a single reporter maybe_multi_reporter = MultiReporter(results) train_loss = maybe_multi_reporter.train_loss - - # TODO fix lr_models - else: reporter_train_result = self.train_and_save_reporter( device, layer, self.out_dir / "reporters", train_dict ) - maybe_multi_reporter = reporter_train_result.reporter + maybe_multi_reporter = reporter_train_result.model train_loss = reporter_train_result.train_loss - lr_models = self.train_lr_model( - train_dict, device, layer, self.out_dir / "lr_models" - ) + lr_models = self.train_lr_model( + train_dict, device, layer, self.out_dir / "lr_models" + ) return evaluate_and_save( train_loss, maybe_multi_reporter, train_dict, val_dict, lr_models, layer From 75fe56099306368c81db40fd99409fd4607b8432 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 16:25:48 +0100 Subject: [PATCH 17/20] remove wrong function --- elk/evaluation/evaluate.py | 4 +--- elk/run.py | 11 ----------- 2 files changed, 1 insertion(+), 14 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 49aa5818..8581462d 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -9,7 +9,7 @@ from ..files import elk_reporter_dir from ..metrics import evaluate_preds -from ..run import Run, select_data +from ..run import Run from ..training.multi_reporter import MultiReporter, SingleReporter from ..utils import Color @@ -38,8 +38,6 @@ def apply_to_layer( device = self.get_device(devices, world_size) val_output = self.prepare_data(device, layer, "val") - val_output = select_data(val_output, self.prompt_indices) - experiment_dir = elk_reporter_dir() / self.source def load_reporter() -> SingleReporter | MultiReporter: diff --git a/elk/run.py b/elk/run.py index cece5d9c..03a0a46b 100644 --- a/elk/run.py +++ b/elk/run.py @@ -33,17 +33,6 @@ PreparedData = dict[str, tuple[Tensor, Tensor, Tensor | None]] -def select_data(prepared_data: PreparedData, prompt_indices: list[int]): - return { - ds_name: ( - train_h[:, prompt_indices, ...], - train_gt, - lm_preds[:, prompt_indices, ...] if lm_preds is not None else None, - ) - for ds_name, (train_h, train_gt, lm_preds) in prepared_data.items() - } - - @dataclass class Run(ABC, Serializable): data: Extract From 1b6757a78518dcb035f3107b1486e1df0a43e993 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 16:35:23 +0100 Subject: [PATCH 18/20] pyright --- elk/evaluation/evaluate.py | 4 ++-- elk/training/multi_reporter.py | 5 ++++- elk/training/train.py | 1 - 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/elk/evaluation/evaluate.py b/elk/evaluation/evaluate.py index 8581462d..44aa0411 100644 --- a/elk/evaluation/evaluate.py +++ b/elk/evaluation/evaluate.py @@ -60,7 +60,7 @@ def eval_all( prompt_index: int | Literal["multi"] | None = None, i: int = 0, ): - prompt_index = ( + prompt_index_dict = ( {"prompt_index": prompt_index} if prompt_index is not None else {} ) for ds_name, (val_h, val_gt, _) in val_output.items(): @@ -73,7 +73,7 @@ def eval_all( **meta, "ensembling": mode, **evaluate_preds(val_gt, val_credences, mode).to_dict(), - **prompt_index, + **prompt_index_dict, } ) diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index bb329d12..4f373b12 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -27,8 +27,11 @@ def __init__(self, reporter: list[ReporterWithInfo]): if reporter[0].train_loss is not None else None ) + self.train_loss = ( - sum(train_losses) / len(train_losses) if train_losses is not None else None + sum(train_losses) / len(train_losses) + if train_losses is not None # type: ignore + else None ) def __call__(self, h): diff --git a/elk/training/train.py b/elk/training/train.py index cf97b0a6..b0af8b39 100644 --- a/elk/training/train.py +++ b/elk/training/train.py @@ -263,7 +263,6 @@ def apply_to_layer( str_i = str(prompt_index).zfill(2) base = self.out_dir / "reporters" / f"prompt_{str_i}" reporters_path = base / "reporters" - base / "lr_models" reporter_train_result = self.train_and_save_reporter( device, layer, reporters_path, prompt_train_dict, prompt_index From 0d6c8b9ae897346f2caacf6068e8f7ebcf0ae6a1 Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 16:44:32 +0100 Subject: [PATCH 19/20] pytest --- elk/run.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/elk/run.py b/elk/run.py index 03a0a46b..a17b01cb 100644 --- a/elk/run.py +++ b/elk/run.py @@ -199,10 +199,10 @@ def apply_to_layers( sortby.append("prompt_index") df = pd.concat(dfs).sort_values(by=sortby) - # Move prompt_index to the 2'th column - cols = list(df.columns) - cols.insert(2, cols.pop(cols.index("prompt_index"))) - df = df.reindex(columns=cols) + if "prompt_index" in df.columns: + cols = list(df.columns) + cols.insert(2, cols.pop(cols.index("prompt_index"))) + df = df.reindex(columns=cols) # Save the CSV out_path = self.out_dir / f"{name}.csv" From 785537b4882e9996e7c6b70aeb0e988d5c173acf Mon Sep 17 00:00:00 2001 From: jon Date: Thu, 20 Jul 2023 16:49:43 +0100 Subject: [PATCH 20/20] pyright --- elk/training/multi_reporter.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/elk/training/multi_reporter.py b/elk/training/multi_reporter.py index 4f373b12..602b23e9 100644 --- a/elk/training/multi_reporter.py +++ b/elk/training/multi_reporter.py @@ -29,8 +29,8 @@ def __init__(self, reporter: list[ReporterWithInfo]): ) self.train_loss = ( - sum(train_losses) / len(train_losses) - if train_losses is not None # type: ignore + sum(train_losses) / len(train_losses) # type: ignore + if train_losses is not None else None )