| from transformers import AutoModelForCausalLM, AutoTokenizer, QuantoConfig |
| from alphalora.expert_number import calculate_expert |
| import json |
| from tqdm import tqdm |
| import torch |
| from lsaq_quant_assign import quantize_llama_like |
| import os |
|
|
| from datasets import load_dataset |
|
|
| def compute_bi(x_in, x_out): |
| x_in = x_in.view(-1, x_in.size(-1)) |
| x_out = x_out.view(-1, x_out.size(-1)) |
| cos_sim = torch.nn.functional.cosine_similarity(x_in, x_out, dim=-1) |
| return (1 - cos_sim.mean()).item() |
|
|
| import argparse |
| import numpy as np |
|
|
| parser = argparse.ArgumentParser(description="parser") |
| parser.add_argument("--model_id", type=str, required=True) |
| parser.add_argument("--cuda_id", type=int, default=1) |
|
|
| args = parser.parse_args() |
|
|
| |
| import os |
|
|
| |
| |
| os.environ["HF_HOME"] = "/mnt/bn/life-mllm/users/cxr/.cache/huggingface" |
| os.environ["XDG_CACHE_HOME"] = "/mnt/bn/life-mllm/users/cxr/.cache" |
|
|
|
|
| my_cache_dir = "./data_cache" |
|
|
| dataset = load_dataset( |
| "wikitext", |
| "wikitext-2-raw-v1", |
| split="train[:1%]", |
| cache_dir=my_cache_dir |
| ) |
| |
| |
| model_ids = [args.model_id] |
| for model_id in model_ids: |
| |
| model = AutoModelForCausalLM.from_pretrained( |
| model_id, |
| device_map=f"cuda:{args.cuda_id}", |
| torch_dtype=torch.float16 |
| ) |
| model.eval() |
| tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
|
| num_layers = len(model.model.layers) |
| bi_sums = torch.zeros(num_layers, device=model.device) |
| count = 0 |
| batch_size = 8 |
| max_len = 128 |
|
|
| for i in tqdm(range(0, len(dataset), batch_size), desc="Processing batches"): |
| |
| texts = dataset[i: i + batch_size]["text"] |
| |
| if tokenizer.pad_token is None: |
| tokenizer.pad_token = tokenizer.eos_token |
| model.config.pad_token_id = tokenizer.pad_token_id |
| inputs = tokenizer(texts, return_tensors="pt", padding=True, truncation=True, max_length=max_len).to(model.device) |
|
|
| hidden_states_in, hidden_states_out = {}, {} |
|
|
| def make_hook(layer_id): |
| def hook(module, input, output): |
| hidden_states_in[layer_id] = input[0].detach() |
| hidden_states_out[layer_id] = output.detach() |
| return hook |
|
|
| hooks = [block.register_forward_hook(make_hook(i)) for i, block in enumerate(model.model.layers)] |
|
|
| with torch.no_grad(): |
| _ = model(**inputs) |
|
|
| for h in hooks: h.remove() |
|
|
| |
| for l in range(num_layers): |
| bi_sums[l] += compute_bi(hidden_states_in[l], hidden_states_out[l]) |
| count += 1 |
|
|
| |
|
|
| bi_scores = (bi_sums / count).cpu().tolist() |
| sorted_bi = sorted(enumerate(bi_scores), key=lambda x: x[1]) |
| print(sorted_bi) |
|
|
| model_name = os.path.basename(model_id.rstrip("/")) |
| os.makedirs(f"metrics/{model_name}", exist_ok=True) |
| with open(f"metrics/{model_name}/BI.json", "w") as f: |
| json.dump(bi_scores, f, indent=4) |