doggdad's picture
Update app.py
15984ea verified
# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
# - https://www.manning.com/books/build-a-large-language-model-from-scratch
# Code: https://github.com/rasbt/LLMs-from-scratch
from pathlib import Path
import sys
import tiktoken
import torch
import gradio as gr
# For llms_from_scratch installation instructions, see:
# https://github.com/rasbt/LLMs-from-scratch/tree/main/pkg
from utils import GPTModel
from utils import (
generate,
text_to_token_ids,
token_ids_to_text,
)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
def get_model_and_tokenizer():
"""
Code to load a GPT-2 model with finetuned weights generated in chapter 7.
This requires that you run the code in chapter 7 first, which generates the necessary gpt2-medium355M-sft.pth file.
"""
GPT_CONFIG_355M = {
"vocab_size": 50257, # Vocabulary size
"context_length": 1024, # Shortened context length (orig: 1024)
"emb_dim": 768, # Embedding dimension
"n_heads": 12, # Number of attention heads
"n_layers": 12, # Number of layers
"drop_rate": 0.0, # Dropout rate
"qkv_bias": True # Query-key-value bias
}
tokenizer = tiktoken.get_encoding("gpt2")
# For local development
model_path = Path("gpt2-small124M-sft.pth")
# For Hugging Face deployment
hf_model_path = Path("gpt2-small124M-sft.pth")
# Try loading from the Hugging Face model path first, then fall back to local
if hf_model_path.exists():
model_path = hf_model_path
elif not model_path.exists():
print(
f"Could not find the model file. Please run the chapter 7 code "
"to generate the gpt2-medium355M-sft.pth file or upload it to this directory."
)
sys.exit()
checkpoint = torch.load(model_path, weights_only=True)
model = GPTModel(GPT_CONFIG_355M)
model.load_state_dict(checkpoint)
model.to(device)
model.eval() # Set to evaluation mode
return tokenizer, model, GPT_CONFIG_355M
def extract_response(response_text, input_text):
return response_text[len(input_text):].replace("### Response:", "").strip()
# Load model and tokenizer
tokenizer, model, model_config = get_model_and_tokenizer()
def generate_response(message, max_new_tokens=100):
"""Generate a response using the fine-tuned GPT model"""
torch.manual_seed(123)
prompt = f"""Below is an instruction that describes a task. Write a response
that appropriately completes the request.
### Instruction:
{message}
"""
with torch.no_grad(): # Ensure no gradients are computed during inference
token_ids = generate(
model=model,
idx=text_to_token_ids(prompt, tokenizer).to(device),
max_new_tokens=max_new_tokens,
context_size=model_config["context_length"],
eos_id=50256
)
text = token_ids_to_text(token_ids, tokenizer)
response = extract_response(text, prompt)
return response
# Create a custom chat interface without using ChatInterface class
def respond(message, chat_history):
bot_message = generate_response(message)
chat_history.append((message, bot_message))
return "", chat_history
with gr.Blocks(theme="soft") as demo:
gr.Markdown("# Fine-tuned GPT-2 125M Model for Chat")
#gr.Markdown("Chat with a fine-tuned GPT model from 'Build a Large Language Model From Scratch' by Sebastian Raschka")
chatbot = gr.Chatbot(height=600)
msg = gr.Textbox(placeholder="Ask me something...", container=False, scale=7)
clear = gr.Button("Clear")
msg.submit(respond, [msg, chatbot], [msg, chatbot])
clear.click(lambda: [], None, chatbot)
gr.Examples(
examples=[
"What is the capital of France",
"What is the opposite of 'wet'?",
"Write a short poem about AI",
"Explain the concept of attention in neural networks"
],
inputs=msg
)
# Launch the interface
if __name__ == "__main__":
demo.launch(share=True)