Instructions to use binhpham/molmoact2-leslider with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use binhpham/molmoact2-leslider with Transformers:
# Load model directly from transformers import AutoModelForImageTextToText model = AutoModelForImageTextToText.from_pretrained("binhpham/molmoact2-leslider", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
| { | |
| "auto_map": { | |
| "AutoProcessor": "processing_molmoact2.MolmoAct2Processor" | |
| }, | |
| "image_processor": { | |
| "auto_map": { | |
| "AutoImageProcessor": "image_processing_molmoact2.MolmoAct2ImageProcessor", | |
| "AutoProcessor": "processing_molmoact2.MolmoAct2Processor" | |
| }, | |
| "crop_mode": "resize", | |
| "do_convert_rgb": true, | |
| "image_mean": [ | |
| 0.5, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "image_processor_type": "MolmoAct2ImageProcessor", | |
| "image_std": [ | |
| 0.5, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "max_crops": 8, | |
| "overlap_margins": [ | |
| 4, | |
| 4 | |
| ], | |
| "patch_size": 14, | |
| "pooling_size": [ | |
| 2, | |
| 2 | |
| ], | |
| "resample": 2, | |
| "size": { | |
| "height": 378, | |
| "width": 378 | |
| } | |
| }, | |
| "image_use_col_tokens": true, | |
| "processor_class": "MolmoAct2Processor", | |
| "use_frame_special_tokens": true, | |
| "use_single_crop_col_tokens": false, | |
| "use_single_crop_start_token": true, | |
| "video_processor": { | |
| "auto_map": { | |
| "AutoProcessor": "processing_molmoact2.MolmoAct2Processor", | |
| "AutoVideoProcessor": "video_processing_molmoact2.MolmoAct2VideoProcessor" | |
| }, | |
| "data_format": "channels_first", | |
| "default_to_square": true, | |
| "do_convert_rgb": true, | |
| "do_normalize": true, | |
| "do_rescale": true, | |
| "do_resize": true, | |
| "do_sample_frames": true, | |
| "frame_sample_mode": "uniform_last_frame", | |
| "image_mean": [ | |
| 0.5, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "image_std": [ | |
| 0.5, | |
| 0.5, | |
| 0.5 | |
| ], | |
| "max_fps": 2.0, | |
| "num_frames": 8, | |
| "patch_size": 14, | |
| "pooling_size": [ | |
| 3, | |
| 3 | |
| ], | |
| "resample": 2, | |
| "rescale_factor": 0.00392156862745098, | |
| "return_metadata": false, | |
| "sampling_fps": 2, | |
| "size": { | |
| "height": 378, | |
| "width": 378 | |
| }, | |
| "video_processor_type": "MolmoAct2VideoProcessor" | |
| }, | |
| "video_use_col_tokens": false | |
| } |