Spaces:

doggdad
/

InstructGPTFinetuned

Running

App Files Files Community

InstructGPTFinetuned / app.py

doggdad

Update app.py

15984ea verified about 2 months ago

raw

history blame contribute delete

4.24 kB

	# Copyright (c) Sebastian Raschka under Apache License 2.0 (see LICENSE.txt).
	# Source for "Build a Large Language Model From Scratch"
	# - https://www.manning.com/books/build-a-large-language-model-from-scratch
	# Code: https://github.com/rasbt/LLMs-from-scratch

	from pathlib import Path
	import sys

	import tiktoken
	import torch
	import gradio as gr

	# For llms_from_scratch installation instructions, see:
	# https://github.com/rasbt/LLMs-from-scratch/tree/main/pkg
	from utils import GPTModel

	from utils import (
	generate,
	text_to_token_ids,
	token_ids_to_text,
	)

	device = torch.device("cuda" if torch.cuda.is_available() else "cpu")


	def get_model_and_tokenizer():
	"""
	Code to load a GPT-2 model with finetuned weights generated in chapter 7.
	This requires that you run the code in chapter 7 first, which generates the necessary gpt2-medium355M-sft.pth file.
	"""

	GPT_CONFIG_355M = {
	"vocab_size": 50257, # Vocabulary size
	"context_length": 1024, # Shortened context length (orig: 1024)
	"emb_dim": 768, # Embedding dimension
	"n_heads": 12, # Number of attention heads
	"n_layers": 12, # Number of layers
	"drop_rate": 0.0, # Dropout rate
	"qkv_bias": True # Query-key-value bias
	}

	tokenizer = tiktoken.get_encoding("gpt2")

	# For local development
	model_path = Path("gpt2-small124M-sft.pth")

	# For Hugging Face deployment
	hf_model_path = Path("gpt2-small124M-sft.pth")

	# Try loading from the Hugging Face model path first, then fall back to local
	if hf_model_path.exists():
	model_path = hf_model_path
	elif not model_path.exists():
	print(
	f"Could not find the model file. Please run the chapter 7 code "
	"to generate the gpt2-medium355M-sft.pth file or upload it to this directory."
	)
	sys.exit()

	checkpoint = torch.load(model_path, weights_only=True)
	model = GPTModel(GPT_CONFIG_355M)
	model.load_state_dict(checkpoint)
	model.to(device)
	model.eval() # Set to evaluation mode

	return tokenizer, model, GPT_CONFIG_355M


	def extract_response(response_text, input_text):
	return response_text[len(input_text):].replace("### Response:", "").strip()


	# Load model and tokenizer
	tokenizer, model, model_config = get_model_and_tokenizer()


	def generate_response(message, max_new_tokens=100):
	"""Generate a response using the fine-tuned GPT model"""
	torch.manual_seed(123)

	prompt = f"""Below is an instruction that describes a task. Write a response
	that appropriately completes the request.

	### Instruction:
	{message}
	"""

	with torch.no_grad(): # Ensure no gradients are computed during inference
	token_ids = generate(
	model=model,
	idx=text_to_token_ids(prompt, tokenizer).to(device),
	max_new_tokens=max_new_tokens,
	context_size=model_config["context_length"],
	eos_id=50256
	)

	text = token_ids_to_text(token_ids, tokenizer)
	response = extract_response(text, prompt)

	return response


	# Create a custom chat interface without using ChatInterface class
	def respond(message, chat_history):
	bot_message = generate_response(message)
	chat_history.append((message, bot_message))
	return "", chat_history


	with gr.Blocks(theme="soft") as demo:
	gr.Markdown("# Fine-tuned GPT-2 125M Model for Chat")
	#gr.Markdown("Chat with a fine-tuned GPT model from 'Build a Large Language Model From Scratch' by Sebastian Raschka")

	chatbot = gr.Chatbot(height=600)
	msg = gr.Textbox(placeholder="Ask me something...", container=False, scale=7)
	clear = gr.Button("Clear")

	msg.submit(respond, [msg, chatbot], [msg, chatbot])
	clear.click(lambda: [], None, chatbot)

	gr.Examples(
	examples=[
	"What is the capital of France",
	"What is the opposite of 'wet'?",
	"Write a short poem about AI",
	"Explain the concept of attention in neural networks"
	],
	inputs=msg
	)


	# Launch the interface
	if __name__ == "__main__":
	demo.launch(share=True)