| import torch |
| from typing import Dict, List, Any |
| from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline |
|
|
|
|
| class EndpointHandler(): |
| def __init__(self, path=""): |
| model = AutoModelForCausalLM.from_pretrained("hyperspaceai/hyperEngine_phi3_128k", device_map="auto", torch_dtype="auto", trust_remote_code=True) |
| tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-128k-instruct") |
| self.pipe = pipeline("text-generation", model=model, tokenizer=tokenizer) |
|
|
| def __call__(self, data:Dict[str, Any]) : |
| messages = data.pop("messages", None) |
| generation_args = data.pop("generation_args", None) |
|
|
| if generation_args==None : |
| generation_args = { |
| "max_new_tokens": 500, |
| "return_full_text": False, |
| "temperature": 0.0, |
| "do_sample": False, |
| } |
|
|
| output = self.pipe(messages, **generation_args) |
| return output[0]['generated_text'] |
|
|