import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer import torch # ------------------------------ # Load Model & Tokenizer # ------------------------------ model_name = "kaitchup/Llama-3.2-3B-Instruct-educational-chatbot" tokenizer = AutoTokenizer.from_pretrained(model_name) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=torch.float16, device_map="auto" ) # ------------------------------ # Define Chat Function # ------------------------------ def chat_with_ai(user_input, history): history = history or [] messages = [f"User: {msg[0]}\nAI: {msg[1]}" for msg in history] messages.append(f"User: {user_input}\nAI:") prompt = "\n".join(messages) inputs = tokenizer(prompt, return_tensors="pt").to(model.device) outputs = model.generate(**inputs, max_new_tokens=256, temperature=0.7, top_p=0.9) reply = tokenizer.decode(outputs[0], skip_special_tokens=True) # Extract the latest AI response if "AI:" in reply: reply = reply.split("AI:")[-1].strip() history.append((user_input, reply)) return reply, history # ------------------------------ # Create Gradio Chat Interface # ------------------------------ with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("