Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -125,10 +125,32 @@ class QwenRecapAgent:
|
|
| 125 |
self.retry_delay = retry_delay
|
| 126 |
self.device = device_map
|
| 127 |
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
if hf_token:
|
| 131 |
model_kwargs["token"] = hf_token
|
|
|
|
| 132 |
self.model = AutoModelForCausalLM.from_pretrained(model_path, **model_kwargs)
|
| 133 |
if device_map != "auto":
|
| 134 |
self.model.to(device_map)
|
|
@@ -180,7 +202,7 @@ Elaborate on each core requirement to create a rich description.
|
|
| 180 |
model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
|
| 181 |
|
| 182 |
with torch.no_grad():
|
| 183 |
-
generated_ids = self.model.generate(**model_inputs, max_new_tokens=
|
| 184 |
|
| 185 |
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
| 186 |
full_response = self.tokenizer.decode(output_ids, skip_special_tokens=True)
|
|
|
|
| 125 |
self.retry_delay = retry_delay
|
| 126 |
self.device = device_map
|
| 127 |
|
| 128 |
+
# 强制使用 Fast Tokenizer
|
| 129 |
+
try:
|
| 130 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 131 |
+
model_path,
|
| 132 |
+
token=hf_token,
|
| 133 |
+
use_fast=True, # 强制使用 fast tokenizer
|
| 134 |
+
trust_remote_code=True
|
| 135 |
+
)
|
| 136 |
+
logging.info("Successfully loaded fast tokenizer")
|
| 137 |
+
except Exception as e:
|
| 138 |
+
logging.warning(f"Fast tokenizer failed, falling back to slow: {e}")
|
| 139 |
+
self.tokenizer = AutoTokenizer.from_pretrained(
|
| 140 |
+
model_path,
|
| 141 |
+
token=hf_token,
|
| 142 |
+
use_fast=False,
|
| 143 |
+
trust_remote_code=True
|
| 144 |
+
)
|
| 145 |
+
|
| 146 |
+
model_kwargs = {
|
| 147 |
+
"torch_dtype": torch.bfloat16,
|
| 148 |
+
"device_map": device_map if device_map == "auto" else None,
|
| 149 |
+
"trust_remote_code": True
|
| 150 |
+
}
|
| 151 |
if hf_token:
|
| 152 |
model_kwargs["token"] = hf_token
|
| 153 |
+
|
| 154 |
self.model = AutoModelForCausalLM.from_pretrained(model_path, **model_kwargs)
|
| 155 |
if device_map != "auto":
|
| 156 |
self.model.to(device_map)
|
|
|
|
| 202 |
model_inputs = self.tokenizer([text], return_tensors="pt").to(self.model.device)
|
| 203 |
|
| 204 |
with torch.no_grad():
|
| 205 |
+
generated_ids = self.model.generate(**model_inputs, max_new_tokens=4096, temperature=0.6)
|
| 206 |
|
| 207 |
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):].tolist()
|
| 208 |
full_response = self.tokenizer.decode(output_ids, skip_special_tokens=True)
|