Spaces:
Sleeping
Sleeping
File size: 4,242 Bytes
6cd256f 7719014 6cd256f 15984ea 6cd256f f701b3b 6cd256f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
# - https://www.manning.com/books/build-a-large-language-model-from-scratch
# Code: https://github.com/rasbt/LLMs-from-scratch
from pathlib import Path
import sys
import tiktoken
import torch
import gradio as gr
# For llms_from_scratch installation instructions, see:
# https://github.com/rasbt/LLMs-from-scratch/tree/main/pkg
from utils import GPTModel
from utils import (
generate,
text_to_token_ids,
token_ids_to_text,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def get_model_and_tokenizer():
"""
Code to load a GPT-2 model with finetuned weights generated in chapter 7.
This requires that you run the code in chapter 7 first, which generates the necessary gpt2-medium355M-sft.pth file.
"""
GPT_CONFIG_355M = {
"vocab_size": 50257, # Vocabulary size
"context_length": 1024, # Shortened context length (orig: 1024)
"emb_dim": 768, # Embedding dimension
"n_heads": 12, # Number of attention heads
"n_layers": 12, # Number of layers
"drop_rate": 0.0, # Dropout rate
"qkv_bias": True # Query-key-value bias
}
tokenizer = tiktoken.get_encoding("gpt2")
# For local development
model_path = Path("gpt2-small124M-sft.pth")
# For Hugging Face deployment
hf_model_path = Path("gpt2-small124M-sft.pth")
# Try loading from the Hugging Face model path first, then fall back to local
if hf_model_path.exists():
model_path = hf_model_path
elif not model_path.exists():
print(
f"Could not find the model file. Please run the chapter 7 code "
"to generate the gpt2-medium355M-sft.pth file or upload it to this directory."
)
sys.exit()
checkpoint = torch.load(model_path, weights_only=True)
model = GPTModel(GPT_CONFIG_355M)
model.load_state_dict(checkpoint)
model.to(device)
model.eval() # Set to evaluation mode
return tokenizer, model, GPT_CONFIG_355M
def extract_response(response_text, input_text):
return response_text[len(input_text):].replace("### Response:", "").strip()
# Load model and tokenizer
tokenizer, model, model_config = get_model_and_tokenizer()
def generate_response(message, max_new_tokens=100):
"""Generate a response using the fine-tuned GPT model"""
torch.manual_seed(123)
prompt = f"""Below is an instruction that describes a task. Write a response
that appropriately completes the request.
### Instruction:
{message}
"""
with torch.no_grad(): # Ensure no gradients are computed during inference
token_ids = generate(
model=model,
idx=text_to_token_ids(prompt, tokenizer).to(device),
max_new_tokens=max_new_tokens,
context_size=model_config["context_length"],
eos_id=50256
)
text = token_ids_to_text(token_ids, tokenizer)
response = extract_response(text, prompt)
return response
# Create a custom chat interface without using ChatInterface class
def respond(message, chat_history):
bot_message = generate_response(message)
chat_history.append((message, bot_message))
return "", chat_history
with gr.Blocks(theme="soft") as demo:
gr.Markdown("# Fine-tuned GPT-2 125M Model for Chat")
#gr.Markdown("Chat with a fine-tuned GPT model from 'Build a Large Language Model From Scratch' by Sebastian Raschka")
chatbot = gr.Chatbot(height=600)
msg = gr.Textbox(placeholder="Ask me something...", container=False, scale=7)
clear = gr.Button("Clear")
msg.submit(respond, [msg, chatbot], [msg, chatbot])
clear.click(lambda: [], None, chatbot)
gr.Examples(
examples=[
"What is the capital of France",
"What is the opposite of 'wet'?",
"Write a short poem about AI",
"Explain the concept of attention in neural networks"
],
inputs=msg
)
# Launch the interface
if __name__ == "__main__":
demo.launch(share=True)
|