Spaces:

rahul7star
/

GITA

Sleeping

App Files Files Community

GITA / app1.py

rahul7star

Update app1.py

27caa5d verified 2 months ago

raw

history blame contribute delete

3.72 kB

	# app.py (LoRA-only loading)
	import gradio as gr
	from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline
	import torch
	import os
	import re
	import json
	import time
	from datetime import datetime
	from huggingface_hub import model_info

	# ===== Settings =====
	device = 0 if torch.cuda.is_available() else -1
	lora_repo = "rahul7star/Qwen2.5-0.5B-Gita" # ONLY LoRA fine-tuned repo

	log_lines = []

	def log(msg):
	line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}"
	print(line)
	log_lines.append(line)

	log(f"🚀 Loading LoRA-only model from {lora_repo}")
	log(f"Device: {'GPU' if device==0 else 'CPU'}")

	# ====== Tokenizer ======
	try:
	tokenizer = AutoTokenizer.from_pretrained(lora_repo, trust_remote_code=True)
	if tokenizer.pad_token is None:
	tokenizer.pad_token = tokenizer.eos_token
	log(f"✅ Tokenizer loaded: vocab size {tokenizer.vocab_size}")
	except Exception as e:
	log(f"❌ Tokenizer load failed: {e}")
	tokenizer = None

	# ====== LoRA-only model ======
	model = None
	pipe = None
	try:
	model = AutoModelForCausalLM.from_pretrained(
	lora_repo,
	trust_remote_code=True,
	torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
	device_map="auto" if torch.cuda.is_available() else None,
	)
	model.eval()
	log("✅ LoRA-only model loaded successfully")
	pipe = pipeline(
	"text-generation",
	model=model,
	tokenizer=tokenizer,
	device=device,
	)
	log("✅ Pipeline ready for inference")
	except Exception as e:
	log(f"❌ LoRA model load failed: {e}")

	# ====== Chat Function ======
	def chat_with_model(message, history):
	log_lines.clear()
	log(f"💭 User message: {message}")

	if pipe is None:
	return "", history, "⚠️ Model pipeline not loaded."

	context = "The following is a conversation between a user and an AI assistant trained on Bhagavad Gita excerpts.\n"
	for user, bot in history:
	context += f"User: {user}\nAssistant: {bot}\n"
	context += f"User: {message}\nAssistant:"

	log("📄 Built conversation context")
	log(context)

	start_time = time.time()
	try:
	output = pipe(
	context,
	max_new_tokens=200,
	do_sample=True,
	temperature=0.7,
	top_p=0.9,
	repetition_penalty=1.1,
	)[0]["generated_text"]
	log(f"⏱️ Inference took {time.time() - start_time:.2f}s")
	except Exception as e:
	log(f"❌ Generation failed: {e}")
	return "", history, "\n".join(log_lines)

	# Clean reply
	reply = output[len(context):].strip()
	reply = re.sub(r"(ContentLoaded\|<\/?[^>]+>\|[\r\n]{2,})", " ", reply)
	reply = re.sub(r"\s{2,}", " ", reply).strip()
	reply = reply.split("User:")[0].split("Assistant:")[0].strip()

	log(f"🪄 Model reply: {reply}")
	history.append((message, reply))
	return "", history, "\n".join(log_lines)

	# ===== Gradio =====
	with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo:
	gr.Markdown("## 💬 Qwen LoRA-only — Bhagavad Gita Assistant")

	with gr.Row():
	with gr.Column(scale=2):
	chatbot = gr.Chatbot(height=500)
	msg = gr.Textbox(placeholder="Ask about the Gita...", label="Your Message")
	clear = gr.Button("Clear")
	with gr.Column(scale=1):
	log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False)

	msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box])
	clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False)

	if __name__ == "__main__":
	demo.launch(server_name="0.0.0.0", server_port=7860)