import os import torch import gradio as gr import spaces from PIL import Image from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM import warnings warnings.filterwarnings("ignore") # ========================================================= # إعدادات النموذج # ========================================================= MODEL_ID = "openbmb/MiniCPM-o-2_6" # تحميل كسول للنموذج model = None tokenizer = None def load_model(): """تحميل النموذج عند الحاجة فقط""" global model, tokenizer if model is not None: return print(f"Loading {MODEL_ID}...") # استخدام float16 للتوافق مع ZeroGPU device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.float16 if torch.cuda.is_available() else torch.float32 try: # تحميل tokenizer أولاً tokenizer = AutoTokenizer.from_pretrained( MODEL_ID, trust_remote_code=True, use_fast=False ) # تحميل النموذج مع trust_remote_code=True model = AutoModel.from_pretrained( MODEL_ID, trust_remote_code=True, torch_dtype=dtype, low_cpu_mem_usage=True, attn_implementation="eager", ).eval() if torch.cuda.is_available(): model = model.cuda() print("Model loaded successfully!") except Exception as e: print(f"Error with AutoModel, trying AutoModelForCausalLM: {e}") # محاولة بديلة مع AutoModelForCausalLM try: model = AutoModelForCausalLM.from_pretrained( MODEL_ID, trust_remote_code=True, # مهم جداً! torch_dtype=dtype, low_cpu_mem_usage=True, attn_implementation="eager" ).eval() if torch.cuda.is_available(): model = model.cuda() print("Model loaded successfully with AutoModelForCausalLM!") except Exception as e2: print(f"Failed to load model: {e2}") raise RuntimeError(f"Could not load model: {e2}") # ========================================================= # دالة معالجة الصور # ========================================================= def process_image(image_input): """معالجة الصورة للنموذج""" if image_input is None: return None if isinstance(image_input, str): return Image.open(image_input).convert('RGB') else: return image_input.convert('RGB') # ========================================================= # دالة الاستدلال مع ZeroGPU # ========================================================= @spaces.GPU(duration=60) def generate_response( text_input, image_input, temperature, top_p, max_new_tokens ): """ معالجة النص والصور باستخدام MiniCPM-o-2_6 """ if not text_input and not image_input: return "Please provide text or image input." try: load_model() global model, tokenizer # إعداد المدخلات if image_input is not None: # معالجة الصورة + النص image = process_image(image_input) if not text_input: text_input = "What is shown in this image? Please describe in detail." # التحقق من وجود دالة chat في النموذج if hasattr(model, 'chat'): try: # استخدام دالة chat المخصصة msgs = [{"role": "user", "content": [image, text_input]}] with torch.no_grad(): response = model.chat( image=image, msgs=msgs, tokenizer=tokenizer, sampling=True, temperature=temperature, top_p=top_p, max_new_tokens=max_new_tokens ) return response except Exception as e: print(f"Chat method failed: {e}") # السقوط إلى الطريقة العادية # الطريقة البديلة للصور # دمج النص مع وصف الصورة prompt = f"Image: [Image will be processed]\n\nQuestion: {text_input}\n\nAnswer:" else: # نص فقط prompt = text_input # المعالجة العادية للنص inputs = tokenizer( prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048 ) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items() if v is not None} # إعدادات التوليد gen_kwargs = { "max_new_tokens": max_new_tokens, "temperature": temperature if temperature > 0 else 1e-7, "top_p": top_p, "do_sample": temperature > 0, "pad_token_id": tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id, "eos_token_id": tokenizer.eos_token_id, } # التوليد with torch.no_grad(): outputs = model.generate(**inputs, **gen_kwargs) # فك التشفير response = tokenizer.decode( outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True ) return response.strip() except Exception as e: import traceback traceback.print_exc() return f"Error: {str(e)}" # ========================================================= # دوال مساعدة للواجهة # ========================================================= def clear_all(): """مسح جميع المدخلات والمخرجات""" return "", None, "" def update_examples_visibility(show_examples): """تحديث رؤية الأمثلة""" return gr.update(visible=show_examples) # ========================================================= # واجهة Gradio # ========================================================= def create_demo(): """إنشاء واجهة Gradio البسيطة""" with gr.Blocks(title="MiniCPM-o-2.6", css=""" .gradio-container { max-width: 1200px; margin: auto; } h1 { text-align: center; } .contain { background: white; border-radius: 10px; padding: 20px; } """) as demo: gr.Markdown( """ # 🤖 MiniCPM-o-2.6 - Multimodal AI Assistant
8B parameters model with GPT-4 level performance
Supports: Text Generation, Image Understanding, OCR, and Multi-lingual conversations