Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import json | |
| from model_handler import ModelHandler | |
| from config import LING_1T | |
| def _format_kb_for_prompt(df: pd.DataFrame) -> str: | |
| """Formats the knowledge base DataFrame into a simple list for the prompt.""" | |
| if df is None or df.empty: | |
| return "无。" | |
| terms = [f"- {row['Term']}" for _, row in df.iterrows()] | |
| return "\n".join(terms) | |
| def suggest_new_kb_terms_agent(kb_df: pd.DataFrame, editor_content: str): | |
| """ | |
| Agent to extract new terms from the text to recommend for the knowledge base using a real LLM. | |
| """ | |
| if editor_content is None or len(editor_content.strip()) < 50: | |
| print("[Agent] Editor content too short, skipping KB suggestion.") | |
| # Return empty data and keep components hidden | |
| return gr.update(value=[], visible=False), gr.update(visible=False) | |
| try: | |
| # 1. Prepare Prompts | |
| system_prompt = ( | |
| "你是一个实体提取机器人。你的任务是从给定文本中识别出新的、重要的、值得记录的专有名词(如人名、地名、组织、物品)或核心概念,并为它们提供一句简洁的描述。\n" | |
| "你的回答必须是一个遵循以下规则的 JSON 数组:\n" | |
| "1. 数组中的每个元素都是一个对象。\n" | |
| "2. 每个对象必须包含两个键:`Term` (词条名) 和 `Description` (描述)。\n" | |
| "3. 不要提取已经存在于'现有知识库'中的词条。\n" | |
| "4. 最多返回 5 个最重要的词条。\n" | |
| "5. 不要返回除了这个 JSON 数组之外的任何其他文本、解释或代码块标记。" | |
| ) | |
| kb_str = _format_kb_for_prompt(kb_df) | |
| user_prompt = ( | |
| f"### 现有知识库\n{kb_str}\n\n" | |
| f"### 当前文本\n{editor_content[-4000:]}\n\n" | |
| "### 指令\n请根据'当前文本',分析并提取出新的知识库词条,并返回 JSON 数组。" | |
| ) | |
| # 2. Call LLM | |
| model_handler = ModelHandler() | |
| response_generator = model_handler.generate_code( | |
| system_prompt=system_prompt, | |
| user_prompt=user_prompt, | |
| model_choice=LING_1T | |
| ) | |
| full_response = "".join(chunk for chunk in response_generator) | |
| # 3. Parse JSON and format for DataFrame | |
| print("【收到的完整上下文】") | |
| print("full_response:", repr(full_response)) | |
| if full_response.strip().startswith("```json"): | |
| full_response = full_response.strip()[7:-3].strip() | |
| suggested_terms = json.loads(full_response) | |
| # Convert list of dicts to list of lists for Gradio Dataframe | |
| df_data = [[item.get("Term", ""), item.get("Description", "")] for item in suggested_terms] | |
| print("【收到的完整上下文】") | |
| print("suggested_terms:", repr(suggested_terms)) | |
| # Make components visible and return data | |
| return gr.update(value=df_data, visible=True), gr.update(visible=True) | |
| except json.JSONDecodeError: | |
| print(f"[Agent] Error: Failed to decode JSON from LLM response for KB: {full_response}") | |
| return gr.update(visible=False), gr.update(visible=False) | |
| except Exception as e: | |
| print(f"[Agent] Error suggesting new KB terms: {e}") | |
| return gr.update(visible=False), gr.update(visible=False) | |