prithivMLmods commited on
Commit
fbed3fa
·
verified ·
1 Parent(s): 2b589fc

update app

Browse files
Files changed (1) hide show
  1. app.py +3 -0
app.py CHANGED
@@ -134,6 +134,7 @@ MODEL_ID_M = "prithivMLmods/Camel-Doc-OCR-062825"
134
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
135
  model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
136
  MODEL_ID_M,
 
137
  trust_remote_code=True,
138
  torch_dtype=torch.float16
139
  ).to(device).eval()
@@ -145,6 +146,7 @@ MODEL_ID_T = "zai-org/GLM-4.1V-9B-Thinking"
145
  processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
146
  model_t = Glm4vForConditionalGeneration.from_pretrained(
147
  MODEL_ID_T,
 
148
  trust_remote_code=True,
149
  torch_dtype=torch.float16
150
  ).to(device).eval()
@@ -155,6 +157,7 @@ print("Loading moondream3-preview...")
155
  MODEL_ID_MD3 = "moondream/moondream3-preview"
156
  model_md3 = AutoModelForCausalLM.from_pretrained(
157
  MODEL_ID_MD3,
 
158
  trust_remote_code=True,
159
  torch_dtype=torch.bfloat16,
160
  device_map={"": "cuda"},
 
134
  processor_m = AutoProcessor.from_pretrained(MODEL_ID_M, trust_remote_code=True)
135
  model_m = Qwen2_5_VLForConditionalGeneration.from_pretrained(
136
  MODEL_ID_M,
137
+ attn_implementation="flash_attention_2",
138
  trust_remote_code=True,
139
  torch_dtype=torch.float16
140
  ).to(device).eval()
 
146
  processor_t = AutoProcessor.from_pretrained(MODEL_ID_T, trust_remote_code=True)
147
  model_t = Glm4vForConditionalGeneration.from_pretrained(
148
  MODEL_ID_T,
149
+ attn_implementation="flash_attention_2",
150
  trust_remote_code=True,
151
  torch_dtype=torch.float16
152
  ).to(device).eval()
 
157
  MODEL_ID_MD3 = "moondream/moondream3-preview"
158
  model_md3 = AutoModelForCausalLM.from_pretrained(
159
  MODEL_ID_MD3,
160
+ attn_implementation="flash_attention_2",
161
  trust_remote_code=True,
162
  torch_dtype=torch.bfloat16,
163
  device_map={"": "cuda"},