# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
# Source for "Build a Large Language Model From Scratch"
#   - https://www.manning.com/books/build-a-large-language-model-from-scratch
# Code: https://github.com/rasbt/LLMs-from-scratch

from pathlib import Path
import sys

import tiktoken
import torch
import gradio as gr

# For llms_from_scratch installation instructions, see:
# https://github.com/rasbt/LLMs-from-scratch/tree/main/pkg
from utils import GPTModel

from utils import (
    generate,
    text_to_token_ids,
    token_ids_to_text,
)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def get_model_and_tokenizer():
    """
    Code to load a GPT-2 model with finetuned weights generated in chapter 7.
    This requires that you run the code in chapter 7 first, which generates the necessary gpt2-medium355M-sft.pth file.
    """

    GPT_CONFIG_355M = {
        "vocab_size": 50257,     # Vocabulary size
        "context_length": 1024,  # Shortened context length (orig: 1024)
        "emb_dim": 768,         # Embedding dimension
        "n_heads": 12,           # Number of attention heads
        "n_layers": 12,          # Number of layers
        "drop_rate": 0.0,        # Dropout rate
        "qkv_bias": True         # Query-key-value bias
    }

    tokenizer = tiktoken.get_encoding("gpt2")

    # For local development
    model_path = Path("gpt2-small124M-sft.pth")
    
    # For Hugging Face deployment
    hf_model_path = Path("gpt2-small124M-sft.pth")
    
    # Try loading from the Hugging Face model path first, then fall back to local
    if hf_model_path.exists():
        model_path = hf_model_path
    elif not model_path.exists():
        print(
            f"Could not find the model file. Please run the chapter 7 code "
            "to generate the gpt2-medium355M-sft.pth file or upload it to this directory."
        )
        sys.exit()

    checkpoint = torch.load(model_path, weights_only=True)
    model = GPTModel(GPT_CONFIG_355M)
    model.load_state_dict(checkpoint)
    model.to(device)
    model.eval()  # Set to evaluation mode

    return tokenizer, model, GPT_CONFIG_355M


def extract_response(response_text, input_text):
    return response_text[len(input_text):].replace("### Response:", "").strip()


# Load model and tokenizer
tokenizer, model, model_config = get_model_and_tokenizer()


def generate_response(message, max_new_tokens=100):
    """Generate a response using the fine-tuned GPT model"""
    torch.manual_seed(123)
    
    prompt = f"""Below is an instruction that describes a task. Write a response
    that appropriately completes the request.

    ### Instruction:
    {message}
    """
    
    with torch.no_grad():  # Ensure no gradients are computed during inference
        token_ids = generate(
            model=model,
            idx=text_to_token_ids(prompt, tokenizer).to(device),
            max_new_tokens=max_new_tokens,
            context_size=model_config["context_length"],
            eos_id=50256
        )

    text = token_ids_to_text(token_ids, tokenizer)
    response = extract_response(text, prompt)
    
    return response


# Create a custom chat interface without using ChatInterface class
def respond(message, chat_history):
    bot_message = generate_response(message)
    chat_history.append((message, bot_message))
    return "", chat_history


with gr.Blocks(theme="soft") as demo:
    gr.Markdown("# Fine-tuned GPT-2 125M Model for Chat")
    #gr.Markdown("Chat with a fine-tuned GPT model from 'Build a Large Language Model From Scratch' by Sebastian Raschka")
    
    chatbot = gr.Chatbot(height=600)
    msg = gr.Textbox(placeholder="Ask me something...", container=False, scale=7)
    clear = gr.Button("Clear")
    
    msg.submit(respond, [msg, chatbot], [msg, chatbot])
    clear.click(lambda: [], None, chatbot)
    
    gr.Examples(
        examples=[
            "What is the capital of France",
            "What is the opposite of 'wet'?",
            "Write a short poem about AI",
            "Explain the concept of attention in neural networks"
        ],
        inputs=msg
    )


# Launch the interface
if __name__ == "__main__":
    demo.launch(share=True)