Spaces:

laloadrianmorales
/

openai-oss-groq

Sleeping

App Files Files Community

openai-oss-groq / app.py

laloadrianmorales

Update app.py

f5fe376 verified 4 months ago

raw

history blame contribute delete

13.5 kB

	import gradio as gr
	import os
	from typing import List, Tuple
	import json
	import time

	# Configure the model and provider
	MODEL_ID = "openai/gpt-oss-120b"
	DEFAULT_PROVIDER = "groq" # Can be changed to fireworks, hyperbolic, etc.

	# System prompts for different modes
	SYSTEM_PROMPTS = {
	"default": "You are a helpful AI assistant.",
	"creative": "You are a creative and imaginative AI that thinks outside the box.",
	"technical": "You are a technical expert AI that provides detailed, accurate technical information.",
	"concise": "You are a concise AI that provides brief, to-the-point responses.",
	"teacher": "You are a patient teacher who explains concepts clearly with examples.",
	"coder": "You are an expert programmer who writes clean, efficient, well-commented code.",
	}

	# CSS for dark theme and custom styling
	custom_css = """
	#chatbot {
	height: 600px !important;
	background: #0a0a0a;
	}
	#chatbot .message {
	font-size: 14px;
	line-height: 1.6;
	}
	.dark {
	background: #0a0a0a;
	}
	.user-message {
	background: rgba(0, 255, 136, 0.1) !important;
	border-left: 3px solid #00ff88;
	}
	.assistant-message {
	background: rgba(0, 255, 255, 0.05) !important;
	border-left: 3px solid #00ffff;
	}
	.footer {
	text-align: center;
	padding: 20px;
	color: #666;
	}
	"""

	def format_message_history(history: List[Tuple[str, str]], system_prompt: str) -> List[dict]:
	"""Format chat history for the model"""
	messages = []

	if system_prompt:
	messages.append({"role": "system", "content": system_prompt})

	for user_msg, assistant_msg in history:
	if user_msg:
	messages.append({"role": "user", "content": user_msg})
	if assistant_msg:
	messages.append({"role": "assistant", "content": assistant_msg})

	return messages

	def stream_response(message: str, history: List[Tuple[str, str]],
	system_prompt: str, temperature: float, max_tokens: int,
	top_p: float, provider: str):
	"""Generate streaming response from the model"""

	# Format messages for the model
	messages = format_message_history(history, system_prompt)
	messages.append({"role": "user", "content": message})

	# Simulate streaming for demo (replace with actual API call)
	# In production, you'd use the actual provider API here
	demo_response = f"""I'm GPT-OSS-120B running on {provider}!

	I received your message: "{message}"

	With these settings:
	- Temperature: {temperature}
	- Max tokens: {max_tokens}
	- Top-p: {top_p}
	- System prompt: {system_prompt[:50]}...

	This is where the actual model response would appear. In production, this would connect to the {provider} API to generate real responses from the 120B parameter model.

	The model would analyze your input and provide a detailed, thoughtful response based on its massive 120 billion parameters of knowledge."""

	# Simulate streaming effect
	words = demo_response.split()
	response = ""
	for i in range(0, len(words), 3):
	chunk = " ".join(words[i:i+3])
	response += chunk + " "
	time.sleep(0.05) # Simulate streaming delay
	yield response.strip()

	def clear_chat():
	"""Clear the chat history"""
	return None, []

	def undo_last(history):
	"""Remove the last message from history"""
	if history:
	return history[:-1]
	return history

	def retry_last(message, history):
	"""Retry the last message"""
	if history and history[-1][0]:
	last_message = history[-1][0]
	return last_message, history[:-1]
	return message, history

	def load_example(example):
	"""Load an example prompt"""
	return example

	# Create the Gradio interface
	with gr.Blocks(theme=gr.themes.Soft(), css=custom_css, title="GPT-OSS-120B Chat") as demo:

	# Header
	gr.Markdown(
	"""
	# 🧠 GPT-OSS-120B Mega Chat
	### 120 Billion Parameters of Pure Intelligence 🚀

	Chat with OpenAI's massive GPT-OSS-120B model - one of the largest open-weight models available!
	"""
	)

	# Main chat interface
	with gr.Row():
	# Chat column
	with gr.Column(scale=3):
	chatbot = gr.Chatbot(
	label="Chat",
	elem_id="chatbot",
	bubble_full_width=False,
	show_copy_button=True,
	height=500,
	type="tuples"
	)

	# Input area
	with gr.Row():
	msg = gr.Textbox(
	label="Message",
	placeholder="Ask anything... (Shift+Enter for new line, Enter to send)",
	lines=3,
	max_lines=10,
	scale=5,
	elem_classes="user-input"
	)

	with gr.Column(scale=1, min_width=80):
	send_btn = gr.Button("Send 📤", variant="primary", size="lg")
	stop_btn = gr.Button("Stop ⏹️", variant="stop", size="lg", visible=False)

	# Action buttons
	with gr.Row():
	clear_btn = gr.Button("🗑️ Clear", size="sm")
	undo_btn = gr.Button("↩️ Undo", size="sm")
	retry_btn = gr.Button("🔄 Retry", size="sm")

	# Settings column
	with gr.Column(scale=1):
	# Provider selection
	with gr.Accordion("🔌 Inference Provider", open=True):
	provider = gr.Dropdown(
	label="Provider",
	choices=["groq", "fireworks", "hyperbolic", "together", "anyscale"],
	value=DEFAULT_PROVIDER,
	info="Choose your inference provider"
	)

	login_btn = gr.Button("🔐 Sign in with HuggingFace", size="sm")

	# Model settings
	with gr.Accordion("⚙️ Model Settings", open=True):
	system_mode = gr.Dropdown(
	label="System Mode",
	choices=list(SYSTEM_PROMPTS.keys()),
	value="default",
	info="Preset system prompts"
	)

	system_prompt = gr.Textbox(
	label="Custom System Prompt",
	value=SYSTEM_PROMPTS["default"],
	lines=3,
	info="Override with custom instructions"
	)

	temperature = gr.Slider(
	label="Temperature",
	minimum=0.0,
	maximum=2.0,
	value=0.7,
	step=0.05,
	info="Higher = more creative, Lower = more focused"
	)

	max_tokens = gr.Slider(
	label="Max Tokens",
	minimum=64,
	maximum=8192,
	value=2048,
	step=64,
	info="Maximum response length"
	)

	top_p = gr.Slider(
	label="Top-p (Nucleus Sampling)",
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	info="Controls response diversity"
	)

	with gr.Row():
	seed = gr.Number(
	label="Seed",
	value=-1,
	info="Set for reproducible outputs (-1 for random)"
	)

	# Advanced settings
	with gr.Accordion("🔬 Advanced", open=False):
	stream_output = gr.Checkbox(
	label="Stream Output",
	value=True,
	info="Show response as it's generated"
	)

	show_reasoning = gr.Checkbox(
	label="Show Reasoning Process",
	value=False,
	info="Display chain-of-thought if available"
	)

	reasoning_lang = gr.Dropdown(
	label="Reasoning Language",
	choices=["English", "Spanish", "French", "German", "Chinese", "Japanese"],
	value="English",
	info="Language for reasoning process"
	)

	# Model info
	with gr.Accordion("📊 Model Info", open=False):
	gr.Markdown(
	"""
	Model: openai/gpt-oss-120b
	- Parameters: 120 Billion
	- Architecture: Transformer + MoE
	- Context: 128K tokens
	- Training: Multi-lingual, code, reasoning
	- License: Open weight

	Capabilities:
	- Complex reasoning
	- Code generation
	- Creative writing
	- Technical analysis
	- Multi-lingual support
	- Function calling
	"""
	)

	# Examples section
	with gr.Accordion("💡 Example Prompts", open=True):
	examples = gr.Examples(
	examples=[
	"Explain quantum computing to a 10-year-old",
	"Write a Python function to detect palindromes with O(1) space complexity",
	"What are the implications of AGI for society?",
	"Create a detailed business plan for a sustainable energy startup",
	"Translate 'Hello, how are you?' to 10 different languages",
	"Debug this code: `def fib(n): return fib(n-1) + fib(n-2)`",
	"Write a haiku about machine learning",
	"Compare and contrast transformers vs RNNs for NLP tasks",
	],
	inputs=msg,
	label="Click to load an example"
	)

	# Stats and info
	with gr.Row():
	with gr.Column():
	token_count = gr.Textbox(
	label="Token Count",
	value="0 tokens",
	interactive=False,
	scale=1
	)
	with gr.Column():
	response_time = gr.Textbox(
	label="Response Time",
	value="0.0s",
	interactive=False,
	scale=1
	)
	with gr.Column():
	model_status = gr.Textbox(
	label="Status",
	value="🟢 Ready",
	interactive=False,
	scale=1
	)

	# Event handlers
	def update_system_prompt(mode):
	return SYSTEM_PROMPTS.get(mode, SYSTEM_PROMPTS["default"])

	def user_submit(message, history):
	if not message.strip():
	return "", history
	return "", history + [(message, None)]

	def bot_respond(history, system_prompt, temperature, max_tokens, top_p, provider):
	if not history or history[-1][1] is not None:
	return history

	message = history[-1][0]

	# Generate response (streaming)
	bot_message = ""
	for chunk in stream_response(
	message,
	history[:-1],
	system_prompt,
	temperature,
	max_tokens,
	top_p,
	provider
	):
	bot_message = chunk
	history[-1] = (message, bot_message)
	yield history

	# Connect event handlers
	system_mode.change(
	update_system_prompt,
	inputs=[system_mode],
	outputs=[system_prompt]
	)

	# Message submission
	msg.submit(
	user_submit,
	[msg, chatbot],
	[msg, chatbot],
	queue=False
	).then(
	bot_respond,
	[chatbot, system_prompt, temperature, max_tokens, top_p, provider],
	chatbot
	)

	send_btn.click(
	user_submit,
	[msg, chatbot],
	[msg, chatbot],
	queue=False
	).then(
	bot_respond,
	[chatbot, system_prompt, temperature, max_tokens, top_p, provider],
	chatbot
	)

	# Action buttons
	clear_btn.click(
	lambda: (None, ""),
	outputs=[chatbot, msg],
	queue=False
	)

	undo_btn.click(
	undo_last,
	inputs=[chatbot],
	outputs=[chatbot],
	queue=False
	)

	retry_btn.click(
	retry_last,
	inputs=[msg, chatbot],
	outputs=[msg, chatbot],
	queue=False
	).then(
	bot_respond,
	[chatbot, system_prompt, temperature, max_tokens, top_p, provider],
	chatbot
	)

	# Login button
	login_btn.click(
	lambda: gr.Info("Please implement HuggingFace OAuth login"),
	queue=False
	)

	# Footer
	gr.Markdown(
	"""
	<div class='footer'>
	<p>Built with 🔥 for the GPT-OSS-120B community \| Model: openai/gpt-oss-120b</p>
	<p>Remember: This is a 120 billion parameter model - expect incredible responses!</p>
	</div>
	"""
	)

	# Launch configuration
	if __name__ == "__main__":
	demo.launch()