Update README.md
Browse files
README.md
CHANGED
|
@@ -5,6 +5,72 @@ tags: []
|
|
| 5 |
|
| 6 |
# Model Card for Model ID
|
| 7 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
this model isn't really made for benchmarks, it's worse on everything besides ARC-C and TruthfulQA
|
| 9 |
|
| 10 |
| Model | ARC-C | HellaSwag | MMLU | TruthfulQA | Winogrande | GSM8k |
|
|
|
|
| 5 |
|
| 6 |
# Model Card for Model ID
|
| 7 |
|
| 8 |
+
```python
|
| 9 |
+
import torch
|
| 10 |
+
from torch import nn
|
| 11 |
+
import random
|
| 12 |
+
from transformers import AutoModelForCausalLM, AutoTokenizer
|
| 13 |
+
|
| 14 |
+
# make sure the model doesn't generate mask tokens
|
| 15 |
+
bias = torch.zeros(34048)
|
| 16 |
+
bias[32000:] = -100
|
| 17 |
+
model.lm_head.bias = nn.Parameter(bias)
|
| 18 |
+
|
| 19 |
+
# --------------------------------------------------------------------------------
|
| 20 |
+
# Generation without masking
|
| 21 |
+
input_ids = tokenizer("Once upon a time, in a land far far away...", return_tensors='pt').input_ids
|
| 22 |
+
print(input_ids)
|
| 23 |
+
# tensor([[ 1, 5713, 3714, 264, 727, 28725, 297, 264, 2533, 2082,
|
| 24 |
+
# 2082, 1753, 1101]])
|
| 25 |
+
output = model.generate(input_ids, max_new_tokens=64)
|
| 26 |
+
print(tokenizer.decode(output[0]))
|
| 27 |
+
# '<s> Once upon a time, in a land far far away...\n\nThere was a magical place called Disneyland.\n\nIt was a place where dreams came true, where fairy tales became reality, and where magic was all around.\n\nBut one day, something terrible happened.\n\nThe magic began to fade.\n\nThe fairy tales became dull, the'
|
| 28 |
+
|
| 29 |
+
# --------------------------------------------------------------------------------
|
| 30 |
+
# replace "far far" with two random indices instead (anything after 32k up to 34,048)
|
| 31 |
+
# the model should pick up that two repeating words after "Once upon a time, in a land-"
|
| 32 |
+
# and before "away" would probably be "far far"
|
| 33 |
+
|
| 34 |
+
input_ids[input_ids==2082] = 32_001
|
| 35 |
+
print(input_ids)
|
| 36 |
+
# tensor([[ 1, 5713, 3714, 264, 727, 28725, 297, 264, 2533, 32001,
|
| 37 |
+
# 32001, 1753, 1101]])
|
| 38 |
+
output = model.generate(input_ids, max_new_tokens=64)
|
| 39 |
+
print(tokenizer.decode(output[0]))
|
| 40 |
+
# '<s> Once upon a time, in a land<ID-000001><ID-000001> away...\n\nOnce upon a time, in a land far, far away, there was a magical kingdom called Flanders. It was a peaceful land, where everyone lived happily ever after.\n\nBut one day, a terrible thing happened. A terrible, terrible thing.\n\nA terrible, terrible thing happened.'
|
| 41 |
+
|
| 42 |
+
# --------------------------------------------------------------------------------
|
| 43 |
+
# we can also get rid of everything except "<s>", "Once", "upon", "away", "..."
|
| 44 |
+
def create_masked_ids(input_ids, token_offset, ids_to_mask):
|
| 45 |
+
unique_ids = torch.unique(input_ids).tolist()
|
| 46 |
+
unique_id_map = random.sample([i for i in range(2048)], len(unique_ids))
|
| 47 |
+
|
| 48 |
+
id_to_shuffled = {id: shuffled for id, shuffled in zip(unique_ids, unique_id_map)}
|
| 49 |
+
|
| 50 |
+
def map_to_shuffled(id):
|
| 51 |
+
return id_to_shuffled[id] + token_offset
|
| 52 |
+
|
| 53 |
+
shuffled_ids = input_ids.clone().apply_(map_to_shuffled)
|
| 54 |
+
|
| 55 |
+
mask = torch.zeros_like(input_ids, dtype=torch.bool)
|
| 56 |
+
for id_to_mask in ids_to_mask:
|
| 57 |
+
mask |= (input_ids == id_to_mask)
|
| 58 |
+
|
| 59 |
+
masked_ids = torch.where(mask, input_ids, shuffled_ids)
|
| 60 |
+
|
| 61 |
+
return masked_ids
|
| 62 |
+
|
| 63 |
+
masked_ids = create_masked_ids(input_ids, 32_000, [1, 5713, 3714, 1753, 1101])
|
| 64 |
+
print(masked_ids)
|
| 65 |
+
# tensor([[ 1, 5713, 3714, 33048, 34032, 32238, 32016, 33048, 33013, 33299,
|
| 66 |
+
# 33299, 1753, 1101]])
|
| 67 |
+
|
| 68 |
+
output = model.generate(masked_ids, max_new_tokens=64)
|
| 69 |
+
print(tokenizer.decode(output[0]))
|
| 70 |
+
# '<s> Once upon<ID-000418><ID-0007F0><ID-0000EE><ID-000010><ID-000418><ID-0003F5><ID-000513><ID-000513> away...\n\nOnce upon a time, there was a young man named Alex. He was a very curious young man, and loved to explore the world around him. One day, he stumbled upon a magical book called "The Book of Secrets." This book contained all sorts of secrets about the world, and Alex was fasc'
|
| 71 |
+
|
| 72 |
+
```
|
| 73 |
+
|
| 74 |
this model isn't really made for benchmarks, it's worse on everything besides ARC-C and TruthfulQA
|
| 75 |
|
| 76 |
| Model | ARC-C | HellaSwag | MMLU | TruthfulQA | Winogrande | GSM8k |
|