Spaces:

openbmb
/

UltraData-Math-L3-Generator

Sleeping

App Files Files Community

UltraData-Math-L3-Generator / app.py

ww762744

Update app.py

14caad2 verified about 2 months ago

raw

history blame contribute delete

22.9 kB

	# -- coding: utf-8 --
	"""
	UltraData-Math-L3-Generator - Hugging Face Space Demo
	"""

	import os
	import asyncio
	import gradio as gr

	from openai import AsyncOpenAI

	from qa_synthesis import QA_PROMPTS, get_qa_prompt
	from conversation_synthesis import CONVERSATION_PROMPTS, get_conversation_prompt
	from multistyle_rewrite import MULTISTYLE_PROMPTS, get_multistyle_prompt
	from knowledge_textbook import (
	get_knowledge_extraction_prompt,
	get_textbook_exercise_prompt,
	TEXTBOOK_EXERCISE_PROMPTS,
	)
	from run_synthesis import (
	parse_qa_output,
	parse_conversation_output,
	parse_rewrite_output,
	parse_knowledge_output,
	parse_textbook_output,
	)

	# API 配置从环境变量读取（通过 HF Secrets 设置）
	API_KEY = os.getenv("OPENAI_API_KEY")
	BASE_URL = os.getenv("OPENAI_BASE_URL", "https://llm-center.ali.modelbest.cn/llm/openai/v1")
	DEFAULT_MODEL = "GEMINI_anxt74"

	# 示例数据
	EXAMPLE_MATH_CONTENT = """The quadratic formula is a fundamental result in algebra that provides the solutions to any quadratic equation of the form ax² + bx + c = 0, where a ≠ 0.

	The formula states that the solutions are:
	x = (-b ± √(b² - 4ac)) / (2a)

	The term b² - 4ac is called the discriminant. It determines the nature of the roots:
	- If b² - 4ac > 0, there are two distinct real roots
	- If b² - 4ac = 0, there is exactly one real root (a repeated root)
	- If b² - 4ac < 0, there are two complex conjugate roots

	This formula was known to ancient mathematicians and remains one of the most important tools in solving polynomial equations."""

	EXAMPLE_KNOWLEDGE_POINT = """Definition: A continuous function is a function f: R → R such that for every point x₀ in its domain and every ε > 0, there exists a δ > 0 such that \|f(x) - f(x₀)\| < ε whenever \|x - x₀\| < δ.

	Key Properties:
	1. The sum, difference, and product of continuous functions are continuous
	2. The composition of continuous functions is continuous
	3. A continuous function on a closed interval attains its maximum and minimum values (Extreme Value Theorem)
	4. A continuous function on a closed interval takes on every value between its minimum and maximum (Intermediate Value Theorem)"""


	async def call_api(prompt: str, temperature: float = 0.7) -> str:
	"""调用 API 生成内容"""
	if not API_KEY:
	return "Error: API Key not configured. Please contact administrator."

	client = AsyncOpenAI(api_key=API_KEY, base_url=BASE_URL)
	try:
	response = await client.chat.completions.create(
	model=DEFAULT_MODEL,
	messages=[{"role": "user", "content": prompt}],
	temperature=temperature,
	max_tokens=8192,
	)
	# 处理 reasoning model 的返回格式
	message = response.choices[0].message
	content = message.content
	# 如果 content 为空，尝试获取 reasoning_content
	if not content and hasattr(message, 'reasoning_content') and message.reasoning_content:
	content = message.reasoning_content
	return content or ""
	except Exception as e:
	return f"Error: {str(e)}"


	def run_async(coro):
	"""运行异步函数"""
	try:
	loop = asyncio.get_event_loop()
	except RuntimeError:
	loop = asyncio.new_event_loop()
	asyncio.set_event_loop(loop)
	return loop.run_until_complete(coro)


	# ============================================================================
	# Task Handlers
	# ============================================================================

	def qa_synthesis(text: str, level: str):
	"""Q&A 问答对合成"""
	if not text.strip():
	return "", "", ""

	prompt_template = get_qa_prompt(level)
	prompt = prompt_template.format(text=text)

	response = run_async(call_api(prompt))
	parsed = parse_qa_output(response)

	return (
	parsed.get("problem", ""),
	parsed.get("solution", ""),
	response
	)


	def conversation_synthesis(text: str, style: str):
	"""多轮对话合成"""
	if not text.strip():
	return "", ""

	prompt_template = get_conversation_prompt(style)
	prompt = prompt_template.format(text=text)

	response = run_async(call_api(prompt))
	parsed = parse_conversation_output(response)

	return parsed.get("content", response), response


	def rewrite_synthesis(text: str, style: str):
	"""多风格改写"""
	if not text.strip():
	return "", ""

	prompt_template = get_multistyle_prompt(style)
	prompt = prompt_template.format(text=text)

	response = run_async(call_api(prompt))
	parsed = parse_rewrite_output(response)

	return parsed.get("rewritten", response), response


	def knowledge_extraction(text: str):
	"""知识点提取"""
	if not text.strip():
	return "", ""

	prompt_template = get_knowledge_extraction_prompt()
	prompt = prompt_template.format(text=text)

	response = run_async(call_api(prompt))
	parsed = parse_knowledge_output(response)

	knowledge_points = parsed.get("knowledge_points", [])
	formatted = "\n\n---\n\n".join(knowledge_points) if knowledge_points else "No knowledge points extracted."

	return formatted, response


	def textbook_exercise(knowledge_point: str, difficulty: str):
	"""教材练习生成"""
	if not knowledge_point.strip():
	return "", ""

	prompt_template = get_textbook_exercise_prompt(difficulty)
	prompt = prompt_template.format(mathematical_knowledge_point=knowledge_point)

	response = run_async(call_api(prompt))
	parsed = parse_textbook_output(response)

	return parsed.get("material", response), response


	# ============================================================================
	# Gradio UI
	# ============================================================================

	custom_css = """
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600;700&family=JetBrains+Mono:wght@400;500&display=swap');

	:root {
	--bg: #f8fafc;
	--surface: #ffffff;
	--surface-2: #f1f5f9;
	--border: #e2e8f0;
	--text: #0f172a;
	--muted: #1f2937; /* darker for readability */
	--accent: #4f46e5;
	--accent-2: #6366f1;
	}

	body {
	background-color: var(--bg);
	color: var(--text);
	}

	.gradio-container {
	font-family: 'Inter', sans-serif !important;
	background: linear-gradient(180deg, #f8fafc 0%, #eef2ff 100%) !important;
	max-width: 1440px !important;
	width: 100% !important;
	margin-left: auto !important;
	margin-right: auto !important;
	--button-primary-text-color: #ffffff;
	--button-primary-text-color-hover: #ffffff;
	--button-primary-text-color-active: #ffffff;
	--button-primary-background-fill: #6366f1;
	--button-primary-background-fill-hover: #4f46e5;
	--button-primary-border-color: #6366f1;
	}

	/* Title & Header */
	.main-title {
	font-family: 'Inter', sans-serif !important;
	font-weight: 800 !important;
	font-size: 2.6rem !important;
	background: linear-gradient(90deg, #0f172a, #4f46e5, #7c3aed) !important;
	-webkit-background-clip: text !important;
	-webkit-text-fill-color: transparent !important;
	text-align: center !important;
	margin-bottom: 0.4rem !important;
	}

	.subtitle {
	text-align: center !important;
	color: var(--muted) !important;
	font-size: 1.05rem !important;
	margin-bottom: 2.5rem !important;
	font-weight: 400 !important;
	}

	/* Panels */
	.glass-panel {
	background: var(--surface) !important;
	border: 1px solid var(--border) !important;
	border-radius: 16px !important;
	padding: 24px !important;
	box-shadow: 0 10px 30px rgba(15, 23, 42, 0.08) !important;
	}

	/* Labels */
	.block > label > span,
	.form > label > span,
	.gr-form > label > span,
	.label-wrap > span {
	color: var(--text) !important;
	font-weight: 600 !important;
	font-size: 1rem !important;
	margin-bottom: 0.5rem !important;
	text-shadow: none !important;
	}

	/* Radio group title */
	fieldset legend, fieldset legend span,
	.gr-radio > label, .gr-radio > label span,
	.gradio-container .label-wrap, .gradio-container .label-wrap span {
	color: var(--text) !important;
	font-weight: 600 !important;
	text-shadow: none !important;
	}

	/* Info Text (Description) */
	span.description, .description {
	color: var(--muted) !important;
	font-weight: 500 !important;
	text-shadow: none !important;
	opacity: 1 !important;
	}

	/* Radio/Checkbox alignment */
	fieldset label span {
	margin-bottom: 0 !important;
	text-shadow: none !important;
	font-weight: 500 !important;
	color: var(--text) !important;
	display: flex !important;
	align-items: center !important;
	}

	fieldset label.selected span {
	color: var(--text) !important;
	}

	fieldset label.selected {
	background: transparent !important;
	border-color: transparent !important;
	box-shadow: none !important;
	}

	fieldset label {
	border: none !important;
	background: transparent !important;
	box-shadow: none !important;
	}

	/* Inputs & Textareas */
	.gr-input, textarea, input, .gr-box, .gr-check-radio, .gr-dropdown {
	font-family: 'JetBrains Mono', monospace !important;
	background-color: var(--surface) !important;
	border: 1px solid var(--border) !important;
	color: var(--text) !important;
	box-shadow: none !important;
	}

	.gr-input:focus, textarea:focus, input:focus {
	border-color: var(--accent) !important;
	box-shadow: 0 0 0 2px rgba(79, 70, 229, 0.15) !important;
	}

	/* Dropdown options */
	ul.options, .gr-dropdown-options {
	background-color: var(--surface) !important;
	color: var(--text) !important;
	border: 1px solid var(--border) !important;
	}

	/* Markdown prose */
	.prose, .prose p, .prose h1, .prose h2, .prose h3, .prose strong, .prose li {
	color: var(--text) !important;
	}

	/* Outputs */
	.output-textbox textarea {
	background-color: var(--surface-2) !important;
	border: 1px solid var(--border) !important;
	border-radius: 8px !important;
	color: var(--text) !important;
	}

	.markdown-box {
	background: var(--surface-2) !important;
	border: 1px solid var(--border) !important;
	border-radius: 8px !important;
	padding: 16px !important;
	color: var(--text) !important;
	}

	.markdown-box * {
	color: var(--text) !important;
	}

	.markdown-box code, .markdown-box pre {
	background: #e2e8f0 !important;
	}

	/* Buttons */
	.gr-button-primary {
	background: #6366f1 !important; /* purple */
	border: none !important;
	color: #ffffff !important;
	font-weight: 600 !important;
	box-shadow: 0 6px 14px rgba(79, 70, 229, 0.25) !important;
	}
	.gr-button-primary,
	.gr-button-primary span,
	.gr-button-primary p,
	.gr-button-primary .label,
	.gr-button-primary svg {
	color: #ffffff !important;
	-webkit-text-fill-color: #ffffff !important;
	fill: #ffffff !important;
	}
	.gr-button-primary,
	.gr-button-primary * {
	--button-primary-text-color: #ffffff !important;
	--button-primary-text-color-hover: #ffffff !important;
	--button-primary-text-color-active: #ffffff !important;
	}
	.gradio-container button.gr-button-primary,
	.gradio-container button.gr-button-primary span,
	.gradio-container button.primary,
	.gradio-container button.primary span {
	color: #ffffff !important;
	-webkit-text-fill-color: #ffffff !important;
	}

	.gr-button-secondary {
	background: #475569 !important;
	border: 1px solid #334155 !important;
	color: #ffffff !important;
	box-shadow: 0 4px 10px rgba(15, 23, 42, 0.15) !important;
	}
	.gr-button-secondary:hover {
	background: #334155 !important;
	border-color: #1f2937 !important;
	}
	.gr-button-secondary,
	.gr-button-secondary span,
	.gr-button-secondary p,
	.gr-button-secondary .label {
	color: #ffffff !important;
	-webkit-text-fill-color: #ffffff !important;
	}

	/* Tabs */
	/* Tabs */
	.tabs button {
	color: #0f172a !important; /* dark text for readability */
	font-weight: 600 !important;
	}
	.tabs button.selected {
	color: #ffffff !important;
	background: var(--accent) !important;
	border-radius: 0 !important;
	padding: 4px 10px !important;
	border-bottom: none !important;
	box-shadow: none !important;
	}
	.tabs button.selected::after {
	display: none !important;
	content: none !important;
	border-bottom: none !important;
	}

	/* Radio buttons */
	.gr-radio-label {
	color: var(--text) !important;
	}

	/* Radio: custom filled dot */
	.gr-check-radio input[type="radio"] {
	appearance: none !important;
	-webkit-appearance: none !important;
	-moz-appearance: none !important;
	width: 16px !important;
	height: 16px !important;
	border-radius: 999px !important;
	border: 2px solid #cbd5e1 !important;
	background: transparent !important;
	display: inline-block !important;
	position: relative !important;
	box-sizing: border-box !important;
	vertical-align: middle !important;
	}

	fieldset label.selected input[type="radio"] {
	border-color: var(--accent) !important;
	background: radial-gradient(circle at center, #4f46e5 0 5px, transparent 5px) !important;
	}

	/* Footer */
	.footer-text, .footer-text p {
	color: var(--muted) !important;
	}
	.footer-text a {
	color: var(--accent) !important;
	}
	"""

	extra_css = """
	<style>
	.gradio-container button.gr-button-primary,
	.gradio-container button.gr-button-primary span,
	.gradio-container button.gr-button-primary p {
	color: #ffffff !important;
	-webkit-text-fill-color: #ffffff !important;
	}
	</style>
	"""

	with gr.Blocks(title="UltraData-Math-L3-Generator", css=custom_css, theme=gr.themes.Soft()) as demo:
	gr.HTML('<h1 class="main-title">UltraData-Math-L3-Generator</h1>')
	gr.HTML('<p class="subtitle">✨ Next-Gen Mathematical Data Synthesis Powered by LLM ✨</p>')
	gr.HTML(extra_css)

	with gr.Tabs():
	# Q&A Synthesis Tab
	with gr.TabItem("📝 Q&A Synthesis"):
	with gr.Column(elem_classes=["glass-panel"]):
	gr.Markdown("### 💡 Transform Text into Q&A Pairs\nGenerate high-quality question-answer pairs from mathematical content, tailored to different educational levels.")
	with gr.Row():
	with gr.Column(scale=1):
	qa_input = gr.Textbox(
	label="Input Mathematical Content",
	placeholder="Paste your mathematical text here (e.g., definitions, theorems, proofs)...",
	lines=10,
	)
	qa_level = gr.Radio(
	choices=list(QA_PROMPTS.keys()),
	value="high_school",
	label="Difficulty Level",
	info="Select the target audience level"
	)
	with gr.Row():
	qa_example_btn = gr.Button("Load Example", variant="secondary")
	qa_btn = gr.Button("Generate", variant="primary")

	with gr.Column(scale=1):
	qa_problem = gr.Textbox(label="Generated Problem", lines=5)
	qa_solution = gr.Textbox(label="Generated Solution", lines=12)
	qa_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)

	qa_example_btn.click(
	lambda: EXAMPLE_MATH_CONTENT,
	outputs=[qa_input],
	)
	qa_btn.click(
	qa_synthesis,
	inputs=[qa_input, qa_level],
	outputs=[qa_problem, qa_solution, qa_raw],
	)

	# Conversation Synthesis Tab
	with gr.TabItem("💬 Conversation Synthesis"):
	with gr.Column(elem_classes=["glass-panel"]):
	gr.Markdown("### 🗣️ Create Multi-turn Dialogues\nConvert static mathematical text into dynamic, engaging multi-turn conversations between students and teachers.")
	with gr.Row():
	with gr.Column(scale=1):
	conv_input = gr.Textbox(
	label="Input Mathematical Content",
	placeholder="Paste your mathematical text here...",
	lines=10,
	)
	conv_style = gr.Radio(
	choices=list(CONVERSATION_PROMPTS.keys()),
	value="teacher_student",
	label="Conversation Style",
	info="Choose the persona and tone of the conversation"
	)
	with gr.Row():
	conv_example_btn = gr.Button("Load Example", variant="secondary")
	conv_btn = gr.Button("Generate", variant="primary")

	with gr.Column(scale=1):
	conv_output = gr.Textbox(label="Generated Conversation", lines=20)
	conv_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)

	conv_example_btn.click(
	lambda: EXAMPLE_MATH_CONTENT,
	outputs=[conv_input],
	)
	conv_btn.click(
	conversation_synthesis,
	inputs=[conv_input, conv_style],
	outputs=[conv_output, conv_raw],
	)

	# Rewrite Tab
	with gr.TabItem("✨ Multi-style Rewrite"):
	with gr.Column(elem_classes=["glass-panel"]):
	gr.Markdown("### 🎨 Style Transfer\nRewrite mathematical content into various styles, from rigorous textbooks to engaging blog posts.")
	with gr.Row():
	with gr.Column(scale=1):
	rewrite_input = gr.Textbox(
	label="Input Mathematical Content",
	placeholder="Paste your mathematical text here...",
	lines=10,
	)
	rewrite_style = gr.Radio(
	choices=list(MULTISTYLE_PROMPTS.keys()),
	value="textbook",
	label="Target Style",
	info="Select the desired output style"
	)
	with gr.Row():
	rewrite_example_btn = gr.Button("Load Example", variant="secondary")
	rewrite_btn = gr.Button("Generate", variant="primary")

	with gr.Column(scale=1):
	rewrite_output = gr.Textbox(label="Rewritten Content", lines=20)
	rewrite_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)

	rewrite_example_btn.click(
	lambda: EXAMPLE_MATH_CONTENT,
	outputs=[rewrite_input],
	)
	rewrite_btn.click(
	rewrite_synthesis,
	inputs=[rewrite_input, rewrite_style],
	outputs=[rewrite_output, rewrite_raw],
	)

	# Knowledge Extraction Tab
	with gr.TabItem("📚 Knowledge Extraction"):
	with gr.Column(elem_classes=["glass-panel"]):
	gr.Markdown("### 🧠 Extract Core Knowledge\nAutomatically identify and extract key definitions, theorems, and properties from unstructured text.")
	with gr.Row():
	with gr.Column(scale=1):
	know_input = gr.Textbox(
	label="Input Mathematical Content",
	placeholder="Paste your mathematical text here...",
	lines=12,
	)
	with gr.Row():
	know_example_btn = gr.Button("Load Example", variant="secondary")
	know_btn = gr.Button("Generate", variant="primary")

	with gr.Column(scale=1):
	know_output = gr.Textbox(label="Extracted Knowledge Points", lines=20)
	know_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)

	know_example_btn.click(
	lambda: EXAMPLE_MATH_CONTENT,
	outputs=[know_input],
	)
	know_btn.click(
	knowledge_extraction,
	inputs=[know_input],
	outputs=[know_output, know_raw],
	)

	# Textbook Exercise Tab
	with gr.TabItem("📖 Textbook Exercise"):
	with gr.Column(elem_classes=["glass-panel"]):
	gr.Markdown("### 📝 Generate Exercises\nCreate comprehensive textbook-style exercises and problems based on specific knowledge points.")
	with gr.Row():
	with gr.Column(scale=1):
	textbook_input = gr.Textbox(
	label="Input Knowledge Point",
	placeholder="Enter a specific mathematical concept or theorem...",
	lines=8,
	)
	textbook_diff = gr.Radio(
	choices=list(TEXTBOOK_EXERCISE_PROMPTS.keys()),
	value="easy",
	label="Difficulty",
	info="Select the problem difficulty"
	)
	with gr.Row():
	textbook_example_btn = gr.Button("Load Example", variant="secondary")
	textbook_btn = gr.Button("Generate", variant="primary")

	with gr.Column(scale=1):
	textbook_output = gr.Textbox(label="Generated Exercise Material", lines=20)
	textbook_raw = gr.Textbox(label="Raw Response", lines=4, visible=False)

	textbook_example_btn.click(
	lambda: EXAMPLE_KNOWLEDGE_POINT,
	outputs=[textbook_input],
	)
	textbook_btn.click(
	textbook_exercise,
	inputs=[textbook_input, textbook_diff],
	outputs=[textbook_output, textbook_raw],
	)

	gr.HTML("""
	<div class="footer-text">
	<p>🔬 <strong>UltraData-Math-L3-Generator</strong> - Part of the UltraData-Math Project</p>
	<p>Powered by OpenBMB & ModelBest • <a href="https://huggingface.co/spaces/openbmb/UltraData-Math-L3-Generator" target="_blank" style="color: #818cf8; text-decoration: none;">View on Hugging Face</a></p>
	</div>
	""")


	if __name__ == "__main__":
	demo.launch(ssr_mode=False)