Phi2-Fine-Tuning / training_phi2.py
cranky-coder08's picture
Add files using upload-large-folder tool
b48a35b verified
raw
history blame
2.91 kB
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, TrainingArguments
from transformers import EarlyStoppingCallback
from peft import LoraConfig
from trl import SFTTrainer
from datasets import load_dataset
import os
NAME_OF_MODEL = "./merged_tinyllama_logger"
DATASET_PATH = "/app/data/log_dataset.jsonl"
OUTPUT_DIR = "/app/model_output/incremental_1_logs"
os.makedirs(OUTPUT_DIR, exist_ok=True)
#QUANTIZATION CONFIGURATION:
bnb_config = BitsAndBytesConfig(
load_in_4bit = True,
bnb_4bit_quant_type = "nf4",
bnb_4bit_compute_dtype = torch.float16,
bnb_4bit_use_double_quant=True
)
lora_config = LoraConfig(
r=32,
lora_alpha=124,
bias="none",
lora_dropout=0.15,
task_type="CAUSAL_LM"
)
training_args = TrainingArguments(
output_dir = OUTPUT_DIR,
per_device_train_batch_size=4,
gradient_accumulation_steps=16,
learning_rate=1e-4,
weight_decay=0.001,
bf16=False,
max_grad_norm=0.3,
max_steps=-1,
warmup_ratio=0.03,
group_by_length=True,
lr_scheduler_type="cosine",
num_train_epochs=4,
logging_steps=10,
save_steps=25,
fp16=True,
optim="paged_adamw_8bit",
report_to=["tensorboard"],
eval_strategy="steps",
eval_steps=25,
load_best_model_at_end=True,
metric_for_best_model="eval_loss",
greater_is_better=False
)
try:
dataset = load_dataset("json", data_files=DATASET_PATH)
split_dataset = dataset["train"].train_test_split(test_size=0.1, seed=42)
train_dataset = split_dataset["train"]
eval_dataset = split_dataset["test"]
except Exception as e:
print(f"error loading dataset from {DATASET_PATH}: {e}")
exit(1)
print("Loading model with Quantization")
try:
model=AutoModelForCausalLM.from_pretrained(
NAME_OF_MODEL,
quantization_config = bnb_config,
device_map="auto",
trust_remote_code = True,
torch_dtype = torch.float16
)
model.config.pretraining_p=1
print("Model loaded successfully")
except Exception as e:
print("ERROR LOADING MODEL: {e}")
exit(1)
try:
tokenizer = AutoTokenizer.from_pretrained(NAME_OF_MODEL, trust_remote_code=True)
tokenizer.pad_token = tokenizer.eos_token
tokenizer.padding_side = "right"
except Exception as e:
print('ERROR LOADING TOKENIZER: {e}')
exit(1)
trainer=SFTTrainer(
model=model,
train_dataset= train_dataset,
eval_dataset=eval_dataset,
peft_config = lora_config,
dataset_text_field="text",
max_seq_length = 512,
tokenizer = tokenizer,
args=training_args,
packing=False,
callbacks=[EarlyStoppingCallback(early_stopping_patience=7)]
)
print("training started")
trainer.train()
print("fine tuning complete")
trainer.save_model(OUTPUT_DIR)