# app.py import gradio as gr from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, pipeline import torch import os import re import json import time from datetime import datetime from huggingface_hub import hf_hub_download, model_info # ====== Load Model ====== device = 0 if torch.cuda.is_available() else -1 model_name = "rahul7star/Qwen2.5-3B-Instruct" log_lines = [] def log(msg): """Append timestamped message to log.""" line = f"[{datetime.now().strftime('%H:%M:%S')}] {msg}" print(line) log_lines.append(line) log("๐Ÿ” Initializing model load sequence...") log(f"Using model: {model_name}") log(f"Detected device: {'GPU' if device == 0 else 'CPU'}") hf_cache = os.path.expanduser("~/.cache/huggingface/hub") log(f"Model cache directory: {hf_cache}") # ====== Inspect Hugging Face repo ====== try: info = model_info(model_name) log("๐Ÿ“ฆ Hugging Face model card info loaded:") log(f" - Model ID: {info.id}") log(f" - Private: {info.private}") log(f" - Last modified: {info.last_modified}") log(f" - Files count: {len(info.siblings)}") for s in info.siblings[:5]: log(f" ยท {s.rfilename}") except Exception as e: log(f"โš ๏ธ Could not fetch model card info: {e}") # ====== Load Config ====== try: config = AutoConfig.from_pretrained(model_name, trust_remote_code=True) log("โœ… Loaded model configuration:") log(json.dumps(config.to_dict(), indent=2)[:800] + " ...") except Exception as e: log(f"โš ๏ธ Could not read model config: {e}") config = None # ====== Load Tokenizer ====== try: tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) log("โœ… Tokenizer loaded successfully.") log(f"Tokenizer vocab size: {tokenizer.vocab_size}") except Exception as e: log(f"โš ๏ธ Could not load tokenizer: {e}") tokenizer = None # ====== Load Model ====== model = None pipe = None try: start_load = time.time() model = AutoModelForCausalLM.from_pretrained( model_name, trust_remote_code=True, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto" if torch.cuda.is_available() else None, ) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, device=device, ) log(f"โœ… Model pipeline fully loaded in {time.time() - start_load:.2f} seconds.") log(f"๐Ÿ“‚ Actual model source: {model.name_or_path}") log(f"๐Ÿงฉ Architecture: {getattr(model.config, 'architectures', ['Unknown'])}") except Exception as e: log(f"โŒ Model failed to load: {e}") # ====== Detect if custom fine-tune ====== try: repo_base = model_name.split("/")[0] if "rahul7star" in model.name_or_path: log("โœ… Verified: Model files are correctly loaded from your custom repo.") elif "Qwen" in model.name_or_path: log("โš ๏ธ Warning: The model resolved to the base model Qwen โ€” your fine-tuned weights may be missing.") log(" โ†’ Check if 'pytorch_model.bin' or 'adapter_model.safetensors' exists in your repo.") else: log("โ„น๏ธ Loaded from unknown source, verify repository structure manually.") except Exception as e: log(f"โš ๏ธ Source verification failed: {e}") # ====== Try to extract dataset/training info ====== def extract_training_info(model_name): """Try to read training details (dataset, fine-tuning source) from model repo files.""" data = {} try: # Try README readme_path = hf_hub_download(model_name, filename="README.md") with open(readme_path, "r", encoding="utf-8") as f: readme_text = f.read() log("๐Ÿ“– Found README.md โ€” scanning for dataset references...") matches = re.findall(r"(rahul7star/\w+|dataset|fine[- ]?tune|trained on|data:)", readme_text, re.I) if matches: data["readme_mentions"] = matches[:5] log(f"โœ… README mentions possible dataset: {matches[:5]}") else: log("โ„น๏ธ No explicit dataset mention found in README.") except Exception as e: log(f"โš ๏ธ No README.md found or could not read: {e}") # Try config.json or adapter_config.json for fname in ["config.json", "adapter_config.json"]: try: fpath = hf_hub_download(model_name, filename=fname) with open(fpath, "r", encoding="utf-8") as f: content = json.load(f) for k in ["dataset", "train_data", "base_model_name_or_path"]: if k in content: data[k] = content[k] log(f"โœ… Found '{k}' in {fname}: {content[k]}") except Exception: pass if not data: log("โš ๏ธ No training dataset info detected in model files.") return data training_info = extract_training_info(model_name) # ====== Chat Function ====== def chat_with_model(message, history): log_lines.clear() log("๐Ÿ’ญ Starting chat generation process...") log(f"User message: {message}") if pipe is None: return "", history, "โš ๏ธ Model pipeline not loaded. Please check initialization logs." # 1๏ธโƒฃ Build conversation context context = "The following is a conversation between a user and an AI assistant inspired by the Bhagavad Gita.\n" for user, bot in history: context += f"User: {user}\nAssistant: {bot}\n" context += f"User: {message}\nAssistant:" log("๐Ÿ“„ Built conversation context:") log(context) # 2๏ธโƒฃ Generate response log("๐Ÿง  Encoding input and generating response...") start_time = time.time() try: output = pipe( context, max_new_tokens=200, do_sample=True, temperature=0.7, top_p=0.9, repetition_penalty=1.1, truncation=True, )[0]["generated_text"] log(f"โฑ๏ธ Inference took {time.time() - start_time:.2f} seconds") except Exception as e: log(f"โŒ Generation failed: {e}") return "", history, "\n".join(log_lines) # 3๏ธโƒฃ Clean model output reply = output[len(context):].strip() reply = re.sub(r"(ContentLoaded|<\/?[^>]+>|[\r\n]{2,})", " ", reply) reply = re.sub(r"\s{2,}", " ", reply).strip() reply = reply.split("User:")[0].split("Assistant:")[0].strip() log("๐Ÿช„ Cleaned model output successfully.") log(f"Model reply: {reply}") history.append((message, reply)) return "", history, "\n".join(log_lines) # ====== Gradio Interface ====== with gr.Blocks(theme=gr.themes.Soft(primary_hue="blue")) as demo: gr.Markdown("## ๐Ÿ’ฌ Qwen0.5-3B-Gita โ€” Conversational Assistant with Detailed Debug Log") with gr.Row(): with gr.Column(scale=2): chatbot = gr.Chatbot(height=500) msg = gr.Textbox(placeholder="Ask about the Gita, life, or philosophy...", label="Your Message") clear = gr.Button("Clear") with gr.Column(scale=1): log_box = gr.Textbox(label="Detailed Model Log", lines=25, interactive=False) msg.submit(chat_with_model, [msg, chatbot], [msg, chatbot, log_box]) clear.click(lambda: (None, None, ""), None, [chatbot, log_box], queue=False) # ====== Launch ====== if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)