Update README.md (#3)
Browse files- Update README.md (e0b7f7a7a63a5dd8ec988ef5ed2014cca7d4cfe4)
Co-authored-by: Miquel Farré <mfarre@users.noreply.huggingface.co>
README.md
CHANGED
|
@@ -171,7 +171,7 @@ import warnings
|
|
| 171 |
from decord import VideoReader, cpu
|
| 172 |
import numpy as np
|
| 173 |
warnings.filterwarnings("ignore")
|
| 174 |
-
def load_video(
|
| 175 |
if max_frames_num == 0:
|
| 176 |
return np.zeros((1, 336, 336, 3))
|
| 177 |
vr = VideoReader(video_path, ctx=cpu(0),num_threads=1)
|
|
@@ -196,9 +196,9 @@ device_map = "auto"
|
|
| 196 |
tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, torch_dtype="bfloat16", device_map=device_map) # Add any other thing you want to pass in llava_model_args
|
| 197 |
model.eval()
|
| 198 |
video_path = "XXXX"
|
| 199 |
-
max_frames_num =
|
| 200 |
video,frame_time,video_time = load_video(video_path, max_frames_num, 1, force_sample=True)
|
| 201 |
-
video = image_processor.preprocess(video, return_tensors="pt")["pixel_values"].cuda().
|
| 202 |
video = [video]
|
| 203 |
conv_template = "qwen_1_5" # Make sure you use correct chat template for different models
|
| 204 |
time_instruciton = f"The video lasts for {video_time:.2f} seconds, and {len(video[0])} frames are uniformly sampled from it. These frames are located at {frame_time}.Please answer the following questions related to this video."
|
|
|
|
| 171 |
from decord import VideoReader, cpu
|
| 172 |
import numpy as np
|
| 173 |
warnings.filterwarnings("ignore")
|
| 174 |
+
def load_video(video_path, max_frames_num,fps=1,force_sample=False):
|
| 175 |
if max_frames_num == 0:
|
| 176 |
return np.zeros((1, 336, 336, 3))
|
| 177 |
vr = VideoReader(video_path, ctx=cpu(0),num_threads=1)
|
|
|
|
| 196 |
tokenizer, model, image_processor, max_length = load_pretrained_model(pretrained, None, model_name, torch_dtype="bfloat16", device_map=device_map) # Add any other thing you want to pass in llava_model_args
|
| 197 |
model.eval()
|
| 198 |
video_path = "XXXX"
|
| 199 |
+
max_frames_num = 64
|
| 200 |
video,frame_time,video_time = load_video(video_path, max_frames_num, 1, force_sample=True)
|
| 201 |
+
video = image_processor.preprocess(video, return_tensors="pt")["pixel_values"].cuda().half()
|
| 202 |
video = [video]
|
| 203 |
conv_template = "qwen_1_5" # Make sure you use correct chat template for different models
|
| 204 |
time_instruciton = f"The video lasts for {video_time:.2f} seconds, and {len(video[0])} frames are uniformly sampled from it. These frames are located at {frame_time}.Please answer the following questions related to this video."
|