Spaces:
Runtime error
Runtime error
Add appy_chat_template process
Browse files- app.py +7 -5
- spanish_medica_llm.py +38 -2
app.py
CHANGED
|
@@ -10,7 +10,7 @@ import sys
|
|
| 10 |
import torch
|
| 11 |
|
| 12 |
|
| 13 |
-
from spanish_medica_llm import run_training, run_training_process, run_finnetuning_process
|
| 14 |
|
| 15 |
import gradio as gr
|
| 16 |
|
|
@@ -31,13 +31,15 @@ def generate(prompt):
|
|
| 31 |
image = pipe(prompt).images[0]
|
| 32 |
return(image)
|
| 33 |
|
| 34 |
-
def evaluate_model():
|
| 35 |
#from diffusers import StableDiffusionPipeline
|
| 36 |
|
| 37 |
#pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
|
| 38 |
#pipe = pipe.to("cuda")
|
| 39 |
#image = pipe(prompt).images[0]
|
| 40 |
-
|
|
|
|
|
|
|
| 41 |
|
| 42 |
|
| 43 |
|
|
@@ -66,7 +68,7 @@ with gr.Blocks() as demo:
|
|
| 66 |
with gr.Row():
|
| 67 |
inp = gr.Textbox(placeholder="What is your name?")
|
| 68 |
out = gr.Textbox()
|
| 69 |
-
|
| 70 |
btn_response = gr.Button("Generate Response")
|
| 71 |
btn_response.click(fn=generate_model, inputs=inp, outputs=out)
|
| 72 |
btn_train = gr.Button("Train Model")
|
|
@@ -74,7 +76,7 @@ with gr.Blocks() as demo:
|
|
| 74 |
btn_finnetuning = gr.Button("Finnetuning Model")
|
| 75 |
btn_finnetuning.click(fn=finnetuning_model, inputs=[], outputs=out)
|
| 76 |
btn_evaluate = gr.Button("Evaluate Model")
|
| 77 |
-
btn_evaluate.click(fn=evaluate_model, inputs=
|
| 78 |
btn_stop = gr.Button("Stop Model")
|
| 79 |
btn_stop.click(fn=stop_model, inputs=[], outputs=out)
|
| 80 |
|
|
|
|
| 10 |
import torch
|
| 11 |
|
| 12 |
|
| 13 |
+
from spanish_medica_llm import run_training, run_training_process, run_finnetuning_process, generate_response
|
| 14 |
|
| 15 |
import gradio as gr
|
| 16 |
|
|
|
|
| 31 |
image = pipe(prompt).images[0]
|
| 32 |
return(image)
|
| 33 |
|
| 34 |
+
def evaluate_model(input):
|
| 35 |
#from diffusers import StableDiffusionPipeline
|
| 36 |
|
| 37 |
#pipe = StableDiffusionPipeline.from_pretrained("./output_model", torch_dtype=torch.float16)
|
| 38 |
#pipe = pipe.to("cuda")
|
| 39 |
#image = pipe(prompt).images[0]
|
| 40 |
+
output = generate_response(input)
|
| 41 |
+
return output
|
| 42 |
+
|
| 43 |
|
| 44 |
|
| 45 |
|
|
|
|
| 68 |
with gr.Row():
|
| 69 |
inp = gr.Textbox(placeholder="What is your name?")
|
| 70 |
out = gr.Textbox()
|
| 71 |
+
|
| 72 |
btn_response = gr.Button("Generate Response")
|
| 73 |
btn_response.click(fn=generate_model, inputs=inp, outputs=out)
|
| 74 |
btn_train = gr.Button("Train Model")
|
|
|
|
| 76 |
btn_finnetuning = gr.Button("Finnetuning Model")
|
| 77 |
btn_finnetuning.click(fn=finnetuning_model, inputs=[], outputs=out)
|
| 78 |
btn_evaluate = gr.Button("Evaluate Model")
|
| 79 |
+
btn_evaluate.click(fn=evaluate_model, inputs=inp, outputs=out)
|
| 80 |
btn_stop = gr.Button("Stop Model")
|
| 81 |
btn_stop.click(fn=stop_model, inputs=[], outputs=out)
|
| 82 |
|
spanish_medica_llm.py
CHANGED
|
@@ -19,7 +19,8 @@ from transformers import (
|
|
| 19 |
BitsAndBytesConfig,
|
| 20 |
DataCollatorForLanguageModeling,
|
| 21 |
TrainingArguments,
|
| 22 |
-
Trainer
|
|
|
|
| 23 |
)
|
| 24 |
|
| 25 |
from accelerate import FullyShardedDataParallelPlugin, Accelerator
|
|
@@ -711,6 +712,41 @@ def run_finnetuning_process():
|
|
| 711 |
print('Dataset in One ')
|
| 712 |
print (train_dataset[5])
|
| 713 |
configAndRunFineTuning(base_model,train_dataset, eval_dataset, tokenizer)
|
| 714 |
-
def generate_response(query):
|
| 715 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 716 |
|
|
|
|
| 19 |
BitsAndBytesConfig,
|
| 20 |
DataCollatorForLanguageModeling,
|
| 21 |
TrainingArguments,
|
| 22 |
+
Trainer,
|
| 23 |
+
GenerationConfig
|
| 24 |
)
|
| 25 |
|
| 26 |
from accelerate import FullyShardedDataParallelPlugin, Accelerator
|
|
|
|
| 712 |
print('Dataset in One ')
|
| 713 |
print (train_dataset[5])
|
| 714 |
configAndRunFineTuning(base_model,train_dataset, eval_dataset, tokenizer)
|
|
|
|
| 715 |
|
| 716 |
+
def generate_response(query):
|
| 717 |
+
max_new_tokens=256
|
| 718 |
+
temperature=0.1
|
| 719 |
+
top_p=0.75
|
| 720 |
+
top_k=40
|
| 721 |
+
num_beams=2
|
| 722 |
+
|
| 723 |
+
tokenizer = loadSpanishTokenizer()
|
| 724 |
+
model = loadBaseModel(HUB_MODEL_ID)
|
| 725 |
+
|
| 726 |
+
system = f"[INST]\nYou are a helpful coding assistant.[/INST]\n"
|
| 727 |
+
prompt = f"{system}\n{query}\n \n"
|
| 728 |
+
print(prompt)
|
| 729 |
+
inputs = tokenizer(prompt, return_tensors="pt")
|
| 730 |
+
input_ids = inputs["input_ids"].to("cuda")
|
| 731 |
+
attention_mask = inputs["attention_mask"].to("cuda")
|
| 732 |
+
generation_config = GenerationConfig(
|
| 733 |
+
temperature=temperature,
|
| 734 |
+
top_p=top_p,
|
| 735 |
+
top_k=top_k,
|
| 736 |
+
num_beams=num_beams,
|
| 737 |
+
)
|
| 738 |
+
with torch.no_grad():
|
| 739 |
+
generation_output = model.generate(
|
| 740 |
+
input_ids=input_ids,
|
| 741 |
+
attention_mask=attention_mask,
|
| 742 |
+
generation_config=generation_config,
|
| 743 |
+
return_dict_in_generate=True,
|
| 744 |
+
#output_scores=True,
|
| 745 |
+
max_new_tokens=max_new_tokens,
|
| 746 |
+
early_stopping=True
|
| 747 |
+
)
|
| 748 |
+
s = generation_output.sequences[0]
|
| 749 |
+
output = tokenizer.decode(s, skip_special_tokens=True)
|
| 750 |
+
return output
|
| 751 |
+
# return output.split("<|assistant|>")[1]
|
| 752 |
|