import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline # Modelo leve que roda em CPU (bom p/ demo). Se preferir PT “de verdade”, troque por: # MODEL_ID = "unicamp-dl/ptt5-small-portuguese-vocab" MODEL_ID = "google/flan-t5-small" tok = AutoTokenizer.from_pretrained(MODEL_ID) mdl = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID) pipe = pipeline( "text2text-generation", model=mdl, tokenizer=tok ) def gen(prompt: str): if not prompt or not prompt.strip(): return "" out = pipe( prompt, max_new_tokens=120, do_sample=True, top_p=0.9, temperature=0.7, repetition_penalty=1.15, num_return_sequences=1, ) return out[0]["generated_text"] # Gradio já expõe /api/predict automaticamente demo = gr.Interface( fn=gen, inputs=gr.Textbox(label="Prompt"), outputs=gr.Textbox(label="Saída"), title="T5 Mini Reply", description="Geração de respostas curtas (CPU)." ) if __name__ == "__main__": demo.launch()