File size: 4,242 Bytes
6cd256f
 
 
 
 
 
 
 
 
 
 
 
 
 
7719014
6cd256f
15984ea
6cd256f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f701b3b
 
6cd256f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
#   - https://www.manning.com/books/build-a-large-language-model-from-scratch
# Code: https://github.com/rasbt/LLMs-from-scratch

from pathlib import Path
import sys

import tiktoken
import torch
import gradio as gr

# For llms_from_scratch installation instructions, see:
# https://github.com/rasbt/LLMs-from-scratch/tree/main/pkg
from utils import GPTModel

from utils import (
    generate,
    text_to_token_ids,
    token_ids_to_text,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def get_model_and_tokenizer():
    """
    Code to load a GPT-2 model with finetuned weights generated in chapter 7.
    This requires that you run the code in chapter 7 first, which generates the necessary gpt2-medium355M-sft.pth file.
    """

    GPT_CONFIG_355M = {
        "vocab_size": 50257,     # Vocabulary size
        "context_length": 1024,  # Shortened context length (orig: 1024)
        "emb_dim": 768,         # Embedding dimension
        "n_heads": 12,           # Number of attention heads
        "n_layers": 12,          # Number of layers
        "drop_rate": 0.0,        # Dropout rate
        "qkv_bias": True         # Query-key-value bias
    }

    tokenizer = tiktoken.get_encoding("gpt2")

    # For local development
    model_path = Path("gpt2-small124M-sft.pth")
    
    # For Hugging Face deployment
    hf_model_path = Path("gpt2-small124M-sft.pth")
    
    # Try loading from the Hugging Face model path first, then fall back to local
    if hf_model_path.exists():
        model_path = hf_model_path
    elif not model_path.exists():
        print(
            f"Could not find the model file. Please run the chapter 7 code "
            "to generate the gpt2-medium355M-sft.pth file or upload it to this directory."
        )
        sys.exit()

    checkpoint = torch.load(model_path, weights_only=True)
    model = GPTModel(GPT_CONFIG_355M)
    model.load_state_dict(checkpoint)
    model.to(device)
    model.eval()  # Set to evaluation mode

    return tokenizer, model, GPT_CONFIG_355M


def extract_response(response_text, input_text):
    return response_text[len(input_text):].replace("### Response:", "").strip()


# Load model and tokenizer
tokenizer, model, model_config = get_model_and_tokenizer()


def generate_response(message, max_new_tokens=100):
    """Generate a response using the fine-tuned GPT model"""
    torch.manual_seed(123)
    
    prompt = f"""Below is an instruction that describes a task. Write a response
    that appropriately completes the request.

    ### Instruction:
    {message}
    """
    
    with torch.no_grad():  # Ensure no gradients are computed during inference
        token_ids = generate(
            model=model,
            idx=text_to_token_ids(prompt, tokenizer).to(device),
            max_new_tokens=max_new_tokens,
            context_size=model_config["context_length"],
            eos_id=50256
        )

    text = token_ids_to_text(token_ids, tokenizer)
    response = extract_response(text, prompt)
    
    return response


# Create a custom chat interface without using ChatInterface class
def respond(message, chat_history):
    bot_message = generate_response(message)
    chat_history.append((message, bot_message))
    return "", chat_history


with gr.Blocks(theme="soft") as demo:
    gr.Markdown("# Fine-tuned GPT-2 125M Model for Chat")
    #gr.Markdown("Chat with a fine-tuned GPT model from 'Build a Large Language Model From Scratch' by Sebastian Raschka")
    
    chatbot = gr.Chatbot(height=600)
    msg = gr.Textbox(placeholder="Ask me something...", container=False, scale=7)
    clear = gr.Button("Clear")
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: [], None, chatbot)
    
    gr.Examples(
        examples=[
            "What is the capital of France",
            "What is the opposite of 'wet'?",
            "Write a short poem about AI",
            "Explain the concept of attention in neural networks"
        ],
        inputs=msg
    )


# Launch the interface
if __name__ == "__main__":
    demo.launch(share=True)