# app.py import gradio as gr from transformers import pipeline # ---- Load model via pipeline ---- MODEL_NAME = "vicgalle/gpt2-open-instruct-v1" pipe = pipeline("text-generation", model=MODEL_NAME, device_map="auto") # ---- Inference function ---- def generate_response(instruction, max_new_tokens=150, temperature=0.7, top_k=50, top_p=0.9, rep_pty=1.2): system_prompt = f"""Below is an instruction that describes a task. Write a response that appropriately completes the request. ### Instruction: {instruction} ### Response: """ output = pipe( system_prompt, max_new_tokens=max_new_tokens, temperature=temperature, top_k=top_k, top_p=top_p, do_sample=True, pad_token_id=pipe.tokenizer.eos_token_id, repetition_penalty=rep_pty ) # Clean up output text text = output[0]["generated_text"] return text.split("### Response:")[-1].strip() # ---- Gradio UI ---- with gr.Blocks() as demo: gr.Markdown("# 🛸 GPT-2 Open Instruct Playground\nFine-tuned with Open Instruct v1!") with gr.Row(): with gr.Column(scale=4): instruction = gr.Textbox(label="Instruction", value="What is the capital city of France?", lines=6) output_box = gr.Textbox(label="Model Output", lines=10) with gr.Column(scale=1): generate_btn = gr.Button("Generate ✨") max_new_tokens = gr.Slider(50, 500, value=150, step=10, label="Max new tokens") temperature = gr.Slider(0.1, 1.5, value=0.7, step=0.05, label="Temperature") top_k = gr.Slider(10, 100, value=50, step=5, label="Top-K sampling") top_p = gr.Slider(0.1, 1.0, value=0.9, step=0.05, label="Top-P (nucleus) sampling") rep_pty = gr.Slider(label="Repetition Penalty",info="Token repetition penalty. 1.0 means no penalty.",minimum=1.0, maximum=2.0, step=0.01, value=1.2) generate_btn.click(generate_response, [instruction, max_new_tokens, temperature, top_k, top_p, rep_pty], output_box) # ---- Launch ---- if __name__ == "__main__": demo.launch()