import os import torch import gradio as gr import spaces from PIL import Image from transformers import AutoModel, AutoTokenizer, AutoModelForCausalLM import warnings warnings.filterwarnings("ignore") # ========================================================= # إعدادات النموذج # ========================================================= MODEL_ID = "openbmb/MiniCPM-o-2_6" # تحميل كسول للنموذج model = None tokenizer = None def load_model(): """تحميل النموذج عند الحاجة فقط""" global model, tokenizer if model is not None: return print(f"Loading {MODEL_ID}...") # استخدام float16 للتوافق مع ZeroGPU device = "cuda" if torch.cuda.is_available() else "cpu" dtype = torch.float16 if torch.cuda.is_available() else torch.float32 try: # تحميل tokenizer أولاً tokenizer = AutoTokenizer.from_pretrained( MODEL_ID, trust_remote_code=True, use_fast=False ) # تحميل النموذج مع trust_remote_code=True model = AutoModel.from_pretrained( MODEL_ID, trust_remote_code=True, torch_dtype=dtype, low_cpu_mem_usage=True, attn_implementation="eager", ).eval() if torch.cuda.is_available(): model = model.cuda() print("Model loaded successfully!") except Exception as e: print(f"Error with AutoModel, trying AutoModelForCausalLM: {e}") # محاولة بديلة مع AutoModelForCausalLM try: model = AutoModelForCausalLM.from_pretrained( MODEL_ID, trust_remote_code=True, # مهم جداً! torch_dtype=dtype, low_cpu_mem_usage=True, attn_implementation="eager" ).eval() if torch.cuda.is_available(): model = model.cuda() print("Model loaded successfully with AutoModelForCausalLM!") except Exception as e2: print(f"Failed to load model: {e2}") raise RuntimeError(f"Could not load model: {e2}") # ========================================================= # دالة معالجة الصور # ========================================================= def process_image(image_input): """معالجة الصورة للنموذج""" if image_input is None: return None if isinstance(image_input, str): return Image.open(image_input).convert('RGB') else: return image_input.convert('RGB') # ========================================================= # دالة الاستدلال مع ZeroGPU # ========================================================= @spaces.GPU(duration=60) def generate_response( text_input, image_input, temperature, top_p, max_new_tokens ): """ معالجة النص والصور باستخدام MiniCPM-o-2_6 """ if not text_input and not image_input: return "Please provide text or image input." try: load_model() global model, tokenizer # إعداد المدخلات if image_input is not None: # معالجة الصورة + النص image = process_image(image_input) if not text_input: text_input = "What is shown in this image? Please describe in detail." # التحقق من وجود دالة chat في النموذج if hasattr(model, 'chat'): try: # استخدام دالة chat المخصصة msgs = [{"role": "user", "content": [image, text_input]}] with torch.no_grad(): response = model.chat( image=image, msgs=msgs, tokenizer=tokenizer, sampling=True, temperature=temperature, top_p=top_p, max_new_tokens=max_new_tokens ) return response except Exception as e: print(f"Chat method failed: {e}") # السقوط إلى الطريقة العادية # الطريقة البديلة للصور # دمج النص مع وصف الصورة prompt = f"Image: [Image will be processed]\n\nQuestion: {text_input}\n\nAnswer:" else: # نص فقط prompt = text_input # المعالجة العادية للنص inputs = tokenizer( prompt, return_tensors="pt", padding=True, truncation=True, max_length=2048 ) if torch.cuda.is_available(): inputs = {k: v.cuda() for k, v in inputs.items() if v is not None} # إعدادات التوليد gen_kwargs = { "max_new_tokens": max_new_tokens, "temperature": temperature if temperature > 0 else 1e-7, "top_p": top_p, "do_sample": temperature > 0, "pad_token_id": tokenizer.pad_token_id if tokenizer.pad_token_id is not None else tokenizer.eos_token_id, "eos_token_id": tokenizer.eos_token_id, } # التوليد with torch.no_grad(): outputs = model.generate(**inputs, **gen_kwargs) # فك التشفير response = tokenizer.decode( outputs[0][inputs['input_ids'].shape[1]:], skip_special_tokens=True ) return response.strip() except Exception as e: import traceback traceback.print_exc() return f"Error: {str(e)}" # ========================================================= # دوال مساعدة للواجهة # ========================================================= def clear_all(): """مسح جميع المدخلات والمخرجات""" return "", None, "" def update_examples_visibility(show_examples): """تحديث رؤية الأمثلة""" return gr.update(visible=show_examples) # ========================================================= # واجهة Gradio # ========================================================= def create_demo(): """إنشاء واجهة Gradio البسيطة""" with gr.Blocks(title="MiniCPM-o-2.6", css=""" .gradio-container { max-width: 1200px; margin: auto; } h1 { text-align: center; } .contain { background: white; border-radius: 10px; padding: 20px; } """) as demo: gr.Markdown( """ # 🤖 MiniCPM-o-2.6 - Multimodal AI Assistant

8B parameters model with GPT-4 level performance
Supports: Text Generation, Image Understanding, OCR, and Multi-lingual conversations

""" ) with gr.Row(): # العمود الرئيسي with gr.Column(scale=2): with gr.Group(): text_input = gr.Textbox( label="💭 Text Input", placeholder="Enter your question or prompt here...\nYou can ask about images, request text generation, or have a conversation.", lines=4, elem_id="text_input" ) image_input = gr.Image( label="📷 Image Input (Optional)", type="pil", elem_id="image_input" ) with gr.Row(): submit_btn = gr.Button( "🚀 Generate Response", variant="primary", scale=2 ) clear_btn = gr.Button( "🗑️ Clear All", variant="secondary", scale=1 ) output = gr.Textbox( label="🤖 AI Response", lines=10, interactive=False, elem_id="output" ) # عمود الإعدادات with gr.Column(scale=1): with gr.Group(): gr.Markdown("### ⚙️ Generation Settings") temperature = gr.Slider( label="Temperature", minimum=0.0, maximum=1.5, value=0.7, step=0.1, info="Controls randomness (0=deterministic, 1.5=very creative)" ) top_p = gr.Slider( label="Top-p (Nucleus Sampling)", minimum=0.1, maximum=1.0, value=0.9, step=0.05, info="Controls diversity of output" ) max_new_tokens = gr.Slider( label="Max New Tokens", minimum=50, maximum=2048, value=512, step=50, info="Maximum length of generated response" ) gr.Markdown( """ ### 📚 Quick Tips: **Text Generation:** - Ask questions - Request explanations - Generate creative content **Image Understanding:** - Upload an image - Ask about contents - Request OCR/text extraction - Get detailed descriptions **Languages:** - English, Chinese, Arabic - And many more! """ ) # أمثلة with gr.Group(): gr.Markdown("### 💡 Example Prompts") gr.Examples( examples=[ ["Explain quantum computing in simple terms for a beginner.", None], ["Write a short story about a robot learning to paint.", None], ["What are the main differences between Python and JavaScript?", None], ["Create a healthy meal plan for one week.", None], ["Translate 'Hello, how are you?' to French, Spanish, and Arabic.", None], ], inputs=[text_input, image_input], outputs=output, fn=lambda t, i: generate_response(t, i, 0.7, 0.9, 512), cache_examples=False, label="Click any example to try it" ) # ربط الأحداث submit_btn.click( fn=generate_response, inputs=[text_input, image_input, temperature, top_p, max_new_tokens], outputs=output, api_name="generate" ) text_input.submit( fn=generate_response, inputs=[text_input, image_input, temperature, top_p, max_new_tokens], outputs=output ) clear_btn.click( fn=clear_all, inputs=[], outputs=[text_input, image_input, output] ) # رسالة ترحيبية عند التحميل demo.load( lambda: gr.Info("Model is loading... This may take a moment on first use."), inputs=None, outputs=None ) return demo # ========================================================= # تشغيل التطبيق # ========================================================= if __name__ == "__main__": demo = create_demo() demo.launch( ssr_mode=False, show_error=True, share=False )