Spaces:

prithivMLmods
/

Multimodal-VLM-v1.0

Running on Zero

prithivMLmods commited on 12 days ago

Commit

fbed3fa

verified ·

1 Parent(s): 2b589fc

update app

Files changed (1) hide show

app.py CHANGED Viewed

@@ -134,6 +134,7 @@ MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-062825"
 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -145,6 +146,7 @@ MODEL_ID_T = "zai-org/GLM-4.1V-9B-Thinking"
 processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
 model_t = Glm4vForConditionalGeneration.from_pretrained(
     MODEL_ID_T,
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
@@ -155,6 +157,7 @@ print("Loading moondream3-preview...")
 MODEL_ID_MD3 = "moondream/moondream3-preview"
 model_md3 = AutoModelForCausalLM.from_pretrained(
     MODEL_ID_MD3,
     trust_remote_code=True,
     torch_dtype=torch.bfloat16,
     device_map={"": "cuda"},

 processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
 model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
     MODEL_ID_M,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
 model_t = Glm4vForConditionalGeneration.from_pretrained(
     MODEL_ID_T,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.float16
 ).to(device).eval()
 MODEL_ID_MD3 = "moondream/moondream3-preview"
 model_md3 = AutoModelForCausalLM.from_pretrained(
     MODEL_ID_MD3,
+    attn_implementation="flash_attention_2",
     trust_remote_code=True,
     torch_dtype=torch.bfloat16,
     device_map={"": "cuda"},