Spaces:

AlignmentResearch
/

tuned-lens

Running

Lev McKinney commited on Apr 25, 2023

Commit

b9dc122

1 Parent(s): dba1d6e

added more logging to migration process

Files changed (1) hide show

lens_migration.py CHANGED Viewed

@@ -5,6 +5,7 @@ from copy import deepcopy
 import inspect
 from logging import warn
 from pathlib import Path
 import json
 from tuned_lens.model_surgery import get_final_norm, get_transformer_layers
@@ -352,13 +353,15 @@ if __name__ == "__main__":
     device = th.device("cuda:0" if th.cuda.is_available() else "cpu")
     tuned_lens_old = TunedLensOld.load(args.resource_id, map_location=device)
     tuned_lens = TunedLens.from_model(
         model, bias=tuned_lens_old.config['bias'], revision=revision
     )
-    for i in range(len(tuned_lens_old)):
         tuned_lens[i].load_state_dict(tuned_lens_old[i].state_dict())
@@ -368,7 +371,7 @@ if __name__ == "__main__":
     # Fuzz the new lens against the old one's
     with th.no_grad():
-        for i in range(len(tuned_lens)):
             for _ in range(10):
                 a = th.randn(1, 1, tuned_lens.config.d_model, device=device)
                 logits_new = tuned_lens(a, i)
@@ -377,5 +380,5 @@ if __name__ == "__main__":
                 log_ps_old = logits_old.log_softmax(-1)
                 assert (th.allclose(log_ps_new, log_ps_old))
                 print("js div", js_divergence(log_ps_new, log_ps_old))
     tuned_lens.to(th.device("cpu")).save(args.output_dir)

 import inspect
 from logging import warn
 from pathlib import Path
+import tqdm
 import json
 from tuned_lens.model_surgery import get_final_norm, get_transformer_layers
     device = th.device("cuda:0" if th.cuda.is_available() else "cpu")
+    print("Loading old lens")
     tuned_lens_old = TunedLensOld.load(args.resource_id, map_location=device)
+    print("Initializing new lens")
     tuned_lens = TunedLens.from_model(
         model, bias=tuned_lens_old.config['bias'], revision=revision
     )
+    for i in tqdm(range(len(tuned_lens_old)), desc="Copying parameters"):
         tuned_lens[i].load_state_dict(tuned_lens_old[i].state_dict())
     # Fuzz the new lens against the old one's
     with th.no_grad():
+        for i in tqdm(range(len(tuned_lens)), desc="Fuzzing layers"):
             for _ in range(10):
                 a = th.randn(1, 1, tuned_lens.config.d_model, device=device)
                 logits_new = tuned_lens(a, i)
                 log_ps_old = logits_old.log_softmax(-1)
                 assert (th.allclose(log_ps_new, log_ps_old))
                 print("js div", js_divergence(log_ps_new, log_ps_old))
+    print("Saving new lens to", args.output_dir)
     tuned_lens.to(th.device("cpu")).save(args.output_dir)