Spaces:
Running
Running
Lev McKinney
commited on
Commit
·
b9dc122
1
Parent(s):
dba1d6e
added more logging to migration process
Browse files- lens_migration.py +6 -3
lens_migration.py
CHANGED
|
@@ -5,6 +5,7 @@ from copy import deepcopy
|
|
| 5 |
import inspect
|
| 6 |
from logging import warn
|
| 7 |
from pathlib import Path
|
|
|
|
| 8 |
import json
|
| 9 |
|
| 10 |
from tuned_lens.model_surgery import get_final_norm, get_transformer_layers
|
|
@@ -352,13 +353,15 @@ if __name__ == "__main__":
|
|
| 352 |
|
| 353 |
device = th.device("cuda:0" if th.cuda.is_available() else "cpu")
|
| 354 |
|
|
|
|
| 355 |
tuned_lens_old = TunedLensOld.load(args.resource_id, map_location=device)
|
| 356 |
|
|
|
|
| 357 |
tuned_lens = TunedLens.from_model(
|
| 358 |
model, bias=tuned_lens_old.config['bias'], revision=revision
|
| 359 |
)
|
| 360 |
|
| 361 |
-
for i in range(len(tuned_lens_old)):
|
| 362 |
tuned_lens[i].load_state_dict(tuned_lens_old[i].state_dict())
|
| 363 |
|
| 364 |
|
|
@@ -368,7 +371,7 @@ if __name__ == "__main__":
|
|
| 368 |
|
| 369 |
# Fuzz the new lens against the old one's
|
| 370 |
with th.no_grad():
|
| 371 |
-
for i in range(len(tuned_lens)):
|
| 372 |
for _ in range(10):
|
| 373 |
a = th.randn(1, 1, tuned_lens.config.d_model, device=device)
|
| 374 |
logits_new = tuned_lens(a, i)
|
|
@@ -377,5 +380,5 @@ if __name__ == "__main__":
|
|
| 377 |
log_ps_old = logits_old.log_softmax(-1)
|
| 378 |
assert (th.allclose(log_ps_new, log_ps_old))
|
| 379 |
print("js div", js_divergence(log_ps_new, log_ps_old))
|
| 380 |
-
|
| 381 |
tuned_lens.to(th.device("cpu")).save(args.output_dir)
|
|
|
|
| 5 |
import inspect
|
| 6 |
from logging import warn
|
| 7 |
from pathlib import Path
|
| 8 |
+
import tqdm
|
| 9 |
import json
|
| 10 |
|
| 11 |
from tuned_lens.model_surgery import get_final_norm, get_transformer_layers
|
|
|
|
| 353 |
|
| 354 |
device = th.device("cuda:0" if th.cuda.is_available() else "cpu")
|
| 355 |
|
| 356 |
+
print("Loading old lens")
|
| 357 |
tuned_lens_old = TunedLensOld.load(args.resource_id, map_location=device)
|
| 358 |
|
| 359 |
+
print("Initializing new lens")
|
| 360 |
tuned_lens = TunedLens.from_model(
|
| 361 |
model, bias=tuned_lens_old.config['bias'], revision=revision
|
| 362 |
)
|
| 363 |
|
| 364 |
+
for i in tqdm(range(len(tuned_lens_old)), desc="Copying parameters"):
|
| 365 |
tuned_lens[i].load_state_dict(tuned_lens_old[i].state_dict())
|
| 366 |
|
| 367 |
|
|
|
|
| 371 |
|
| 372 |
# Fuzz the new lens against the old one's
|
| 373 |
with th.no_grad():
|
| 374 |
+
for i in tqdm(range(len(tuned_lens)), desc="Fuzzing layers"):
|
| 375 |
for _ in range(10):
|
| 376 |
a = th.randn(1, 1, tuned_lens.config.d_model, device=device)
|
| 377 |
logits_new = tuned_lens(a, i)
|
|
|
|
| 380 |
log_ps_old = logits_old.log_softmax(-1)
|
| 381 |
assert (th.allclose(log_ps_new, log_ps_old))
|
| 382 |
print("js div", js_divergence(log_ps_new, log_ps_old))
|
| 383 |
+
print("Saving new lens to", args.output_dir)
|
| 384 |
tuned_lens.to(th.device("cpu")).save(args.output_dir)
|