Spaces:
Sleeping
Sleeping
| import time | |
| import gradio as gr | |
| import json | |
| import requests | |
| import os | |
| models = [ | |
| "TinyLLama 1b 4_K_M 2048", | |
| "TinyLLama 1b OpenOrca 4_K_M 2048", | |
| "OpenLLama 3b 4_K_M 196k", | |
| "Phi-2 2.7b 4_K_M 2048", | |
| "Stable Zephyr 3b 4_K_M 4096" | |
| ] | |
| def make_request_to_llm(llm, prompt, max_new_tokens, nctx): | |
| headers = { | |
| "Authorization": f"Bearer {os.getenv('HF_TOKEN')}", | |
| "Content-Type": "application/json" | |
| } | |
| body = {'prompt': prompt, 'max_new_tokens': max_new_tokens, "llm": llm, "nctx":nctx} | |
| response = requests.post('https://daniellefranca96-cpu-inf.hf.space/llm_on_cpu', headers=headers, json=body) | |
| return response.text | |
| def change(llm): | |
| return int(models[llm]['nctx']/1000) | |
| def update(prompt, llm, nctx, max_tokens): | |
| answer = {} | |
| # Measure processing time | |
| start_time = time.time() | |
| result = make_request_to_llm(llm, prompt, max_tokens, int(nctx)*1000) | |
| print(result) | |
| end_time = time.time() | |
| # Calculate tokens per second | |
| duration = end_time - start_time | |
| answer['Duration'] = duration | |
| print("Duration: "+str(duration)) | |
| answer['answer'] = result | |
| return json.dumps(answer) | |
| with gr.Blocks() as demo: | |
| gr.Markdown("Test LM inferences speeds on CPU.") | |
| with gr.Row(): | |
| select = gr.Dropdown(models, label="LLM", value=models[0]) | |
| with gr.Row(): | |
| nctx = gr.Slider(minimum=1, maximum=100, label='Context (consider as 1000* the value chosen)', value="1") | |
| with gr.Row(): | |
| max_tokens = gr.Slider(minimum=512, maximum=4096, label='Max Tokens Generated') | |
| with gr.Row(): | |
| inp = gr.Textbox(placeholder="What is your prompt?", label="Prompt") | |
| with gr.Row(): | |
| out = gr.Textbox(label="Output", lines=20) | |
| btn = gr.Button("Run") | |
| btn.click(fn=update, inputs=[inp, select, nctx, max_tokens], outputs=out) | |
| select.change(fn=change, inputs=[select], outputs=nctx) | |
| demo.launch() | |