Spaces:

eddybaraka
/

nouvas-ai-llm

Sleeping

App Files Files Community

eddybaraka commited on Jul 30

Commit

fcf60fd

verified ·

1 Parent(s): 40d3d90

Update app.py

Browse files

Files changed (1) hide show

app.py +12 -19

app.py CHANGED Viewed

@@ -13,8 +13,8 @@ import ast
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
-# ✅ Model name - using a simpler, reliable GLM-4 model
-model_name = "THUDM/glm-4-9b"
 def load_model():
     logger.info(f"🔄 Loading model: {model_name}")
@@ -118,19 +118,12 @@ def generate_response(prompt, max_new_tokens=512, temperature=0.4, top_p=0.9, re
     try:
         logger.info(f"📝 Prompt: {prompt[:80]}...")
-        # Create message format for GLM-4
-        message = [
-            {"role": "system", "content": system_prompt},
-            {"role": "user", "content": prompt}
-        ]
-        # Apply chat template
-        inputs = tokenizer.apply_chat_template(
-            message,
-            return_tensors="pt",
-            add_generation_prompt=True,
-            return_dict=True,
-        ).to(model.device)
         with torch.no_grad():
             outputs = model.generate(
@@ -149,7 +142,7 @@ def generate_response(prompt, max_new_tokens=512, temperature=0.4, top_p=0.9, re
             )
         # Decode the response
-        decoded = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
         # Check for function calls
         function_calls = []
@@ -185,12 +178,12 @@ iface = gr.Interface(
         gr.Slider(1.0, 1.5, value=1.1, step=0.05, label="Repetition Penalty")
     ],
     outputs=gr.Textbox(label="AI Response", lines=10, show_copy_button=True),
-    title="🤖 GLM-4 9B AI Assistant",
-    description="Ask questions in English or 中文 — Powered by THUDM/glm-4-9b",
     theme=gr.themes.Soft()
 )
 # ✅ Run the app
 if __name__ == "__main__":
-    logger.info("🚀 Starting GLM-4 9B Assistant...")
     iface.launch(server_name="0.0.0.0", server_port=7860, share=False)

 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# ✅ Model name - using Microsoft Phi-4 multimodal model
+model_name = "microsoft/Phi-4-multimodal-instruct"
 def load_model():
     logger.info(f"🔄 Loading model: {model_name}")
     try:
         logger.info(f"📝 Prompt: {prompt[:80]}...")
+        # For Phi-4 multimodal, we'll use a simpler approach
+        # Format the prompt for Phi-4
+        formatted_prompt = f"<|user|>\n{prompt}<|end|>\n<|assistant|>\n"
+        # Tokenize
+        inputs = tokenizer(formatted_prompt, return_tensors="pt").to(model.device)
         with torch.no_grad():
             outputs = model.generate(
             )
         # Decode the response
+        decoded = tokenizer.decode(outputs[0][inputs.input_ids.shape[1]:], skip_special_tokens=True)
         # Check for function calls
         function_calls = []
         gr.Slider(1.0, 1.5, value=1.1, step=0.05, label="Repetition Penalty")
     ],
     outputs=gr.Textbox(label="AI Response", lines=10, show_copy_button=True),
+    title="🤖 Microsoft Phi-4 Multimodal AI Assistant",
+    description="Ask questions in English or 中文 — Powered by microsoft/Phi-4-multimodal-instruct",
     theme=gr.themes.Soft()
 )
 # ✅ Run the app
 if __name__ == "__main__":
+    logger.info("🚀 Starting Microsoft Phi-4 Multimodal Assistant...")
     iface.launch(server_name="0.0.0.0", server_port=7860, share=False)