Image-Text-to-Text
Safetensors
Transformers
English
Chinese
multilingual
dots_mocr
dots_ocr
text-generation
image-to-text
ocr
document-parse
layout
table
formula
custom_code
conversational
Eval Results
Instructions to use rednote-hilab/dots.mocr with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use rednote-hilab/dots.mocr with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("image-text-to-text", model="rednote-hilab/dots.mocr", trust_remote_code=True) messages = [ { "role": "user", "content": [ {"type": "image", "url": "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/p-blog/candy.JPG"}, {"type": "text", "text": "What animal is on the candy?"} ] }, ] pipe(text=messages)# Load model directly from transformers import AutoModelForCausalLM model = AutoModelForCausalLM.from_pretrained("rednote-hilab/dots.mocr", trust_remote_code=True, dtype="auto") - Notebooks
- Google Colab
- Kaggle
- Local Apps
- vLLM
How to use rednote-hilab/dots.mocr with vLLM:
Install from pip and serve model
# Install vLLM from pip: pip install vllm # Start the vLLM server: vllm serve "rednote-hilab/dots.mocr" # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:8000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "rednote-hilab/dots.mocr", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker
docker model run hf.co/rednote-hilab/dots.mocr
- SGLang
How to use rednote-hilab/dots.mocr with SGLang:
Install from pip and serve model
# Install SGLang from pip: pip install sglang # Start the SGLang server: python3 -m sglang.launch_server \ --model-path "rednote-hilab/dots.mocr" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "rednote-hilab/dots.mocr", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }'Use Docker images
docker run --gpus all \ --shm-size 32g \ -p 30000:30000 \ -v ~/.cache/huggingface:/root/.cache/huggingface \ --env "HF_TOKEN=<secret>" \ --ipc=host \ lmsysorg/sglang:latest \ python3 -m sglang.launch_server \ --model-path "rednote-hilab/dots.mocr" \ --host 0.0.0.0 \ --port 30000 # Call the server using curl (OpenAI-compatible API): curl -X POST "http://localhost:30000/v1/chat/completions" \ -H "Content-Type: application/json" \ --data '{ "model": "rednote-hilab/dots.mocr", "messages": [ { "role": "user", "content": [ { "type": "text", "text": "Describe this image in one sentence." }, { "type": "image_url", "image_url": { "url": "https://cdn.britannica.com/61/93061-050-99147DCE/Statue-of-Liberty-Island-New-York-Bay.jpg" } } ] } ] }' - Docker Model Runner
How to use rednote-hilab/dots.mocr with Docker Model Runner:
docker model run hf.co/rednote-hilab/dots.mocr
[DRAFT] fix: transformers 5.x compat (cache_position + kwargs naming)
#6
by emanuelevivoli - opened
- modeling_dots_ocr.py +11 -3
modeling_dots_ocr.py
CHANGED
|
@@ -80,7 +80,7 @@ class DotsOCRForCausalLM(Qwen2ForCausalLM):
|
|
| 80 |
return_dict: Optional[bool] = None,
|
| 81 |
use_cache: Optional[bool] = None,
|
| 82 |
logits_to_keep: int = 0,
|
| 83 |
-
**
|
| 84 |
) -> Union[Tuple, CausalLMOutputWithPast]:
|
| 85 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 86 |
assert len(input_ids) >= 1, f"empty input_ids {input_ids.shape=} will cause gradnorm nan"
|
|
@@ -99,7 +99,7 @@ class DotsOCRForCausalLM(Qwen2ForCausalLM):
|
|
| 99 |
output_hidden_states=output_hidden_states,
|
| 100 |
# return_dict=return_dict,
|
| 101 |
logits_to_keep=logits_to_keep,
|
| 102 |
-
**
|
| 103 |
)
|
| 104 |
|
| 105 |
return outputs
|
|
@@ -125,7 +125,15 @@ class DotsOCRForCausalLM(Qwen2ForCausalLM):
|
|
| 125 |
**kwargs,
|
| 126 |
)
|
| 127 |
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
model_inputs["pixel_values"] = pixel_values
|
| 130 |
|
| 131 |
return model_inputs
|
|
|
|
| 80 |
return_dict: Optional[bool] = None,
|
| 81 |
use_cache: Optional[bool] = None,
|
| 82 |
logits_to_keep: int = 0,
|
| 83 |
+
**kwargs,
|
| 84 |
) -> Union[Tuple, CausalLMOutputWithPast]:
|
| 85 |
return_dict = return_dict if return_dict is not None else self.config.use_return_dict
|
| 86 |
assert len(input_ids) >= 1, f"empty input_ids {input_ids.shape=} will cause gradnorm nan"
|
|
|
|
| 99 |
output_hidden_states=output_hidden_states,
|
| 100 |
# return_dict=return_dict,
|
| 101 |
logits_to_keep=logits_to_keep,
|
| 102 |
+
**kwargs,
|
| 103 |
)
|
| 104 |
|
| 105 |
return outputs
|
|
|
|
| 125 |
**kwargs,
|
| 126 |
)
|
| 127 |
|
| 128 |
+
# Pass pixel_values only on the first generation step (prefill).
|
| 129 |
+
# Compatible with both transformers 4.x (cache_position available)
|
| 130 |
+
# and 5.x (cache_position removed, use past_key_values instead).
|
| 131 |
+
is_prefill = (
|
| 132 |
+
(cache_position is not None and cache_position[0] == 0)
|
| 133 |
+
or past_key_values is None
|
| 134 |
+
or (hasattr(past_key_values, "get_seq_length") and past_key_values.get_seq_length() == 0)
|
| 135 |
+
)
|
| 136 |
+
if is_prefill:
|
| 137 |
model_inputs["pixel_values"] = pixel_values
|
| 138 |
|
| 139 |
return model_inputs
|