Spaces:
Running
Running
| import os | |
| import gradio as gr | |
| import ollama | |
| public_ip = os.environ['PUBLIC_IP'] | |
| port = os.environ['PORT'] | |
| model = 'llama3.1:8b' | |
| from ollama import Client | |
| client = Client(host=f'http://{public_ip}:{port}') | |
| def format_history(msg: str, history: list[list[str, str]], system_prompt: str): | |
| chat_history = [{"role": "system", "content":system_prompt}] | |
| for query, response in history: | |
| chat_history.append({"role": "user", "content": query}) | |
| chat_history.append({"role": "assistant", "content": response}) | |
| chat_history.append({"role": "user", "content": msg}) | |
| return chat_history | |
| def generate_response(msg: str, history: list[list[str, str]], system_prompt: str, top_k: int, top_p: float, temperature: float): | |
| chat_history = format_history(msg, history, system_prompt) | |
| response = client.chat(model=model, | |
| stream=True, | |
| messages=chat_history, | |
| options={'top_k':top_k, 'top_p':top_p, 'temperature':temperature}) | |
| message = "" | |
| for partial_resp in response: | |
| token = partial_resp["message"]["content"] | |
| message += token | |
| yield message | |
| chatbot = gr.ChatInterface( | |
| generate_response, | |
| chatbot=gr.Chatbot( | |
| avatar_images=["user.png", "chatbot.png"], | |
| height="64vh" | |
| ), | |
| additional_inputs=[ | |
| gr.Textbox("You are a helpful assistant and always try to answer user queries to the best of your ability.", label="System Prompt"), | |
| gr.Slider(0.0,100.0, label="top_k", value=40, info="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)"), | |
| gr.Slider(0.0,1.0, label="top_p", value=0.9, info=" Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)"), | |
| gr.Slider(0.0,2.0, label="temperature", value=0.4, info="The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)"), | |
| ], | |
| title="Trashcan AI", | |
| description="LLama3 8B model hosted on a 2013 \"Trashcan\" Mac Pro with ollama", | |
| theme="finlaymacklon/smooth_slate", | |
| submit_btn="Send", | |
| retry_btn="π Regenerate Response", | |
| undo_btn="β© Delete Previous", | |
| clear_btn="ποΈ Clear Chat" | |
| ) | |
| chatbot.queue().launch() | |