Upload app\prompts\graph_extraction.py with huggingface_hub
Browse files
app//prompts//graph_extraction.py
ADDED
|
@@ -0,0 +1,105 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Graph Extraction ํ๋กฌํํธ
|
| 3 |
+
์ํฐํฐ(์ธ๋ฌผ/์ฅ์)์ ๊ด๊ณ(์ฌ๊ฑด)๋ฅผ ์ถ์ถํ๋ GraphRAG ๊ธฐ๋ฐ ํ๋กฌํํธ
|
| 4 |
+
"""
|
| 5 |
+
|
| 6 |
+
from typing import Optional
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_graph_extraction_prompt(
|
| 10 |
+
episode_content: str,
|
| 11 |
+
episode_title: str,
|
| 12 |
+
full_content: Optional[str] = None,
|
| 13 |
+
parent_chunk_info: Optional[str] = None,
|
| 14 |
+
max_length: int = 10000
|
| 15 |
+
) -> str:
|
| 16 |
+
"""
|
| 17 |
+
Graph Extraction์ ์ํ ํ๋กฌํํธ ์์ฑ
|
| 18 |
+
|
| 19 |
+
Args:
|
| 20 |
+
episode_content: ๋ถ์ํ ํ์ฐจ ๋ด์ฉ
|
| 21 |
+
episode_title: ํ์ฐจ ์ ๋ชฉ (์: '1ํ', '2ํ')
|
| 22 |
+
full_content: ์๋ณธ ์น์์ค ์ ์ฒด ๋ด์ฉ (์ฐธ๊ณ ์ฉ)
|
| 23 |
+
parent_chunk_info: Parent Chunk ์ ๋ณด (์ ํ์ฌํญ)
|
| 24 |
+
max_length: ํ๋กฌํํธ์ ํฌํจํ ์ต๋ ํ
์คํธ ๊ธธ์ด
|
| 25 |
+
|
| 26 |
+
Returns:
|
| 27 |
+
ํ๋กฌํํธ ๋ฌธ์์ด
|
| 28 |
+
"""
|
| 29 |
+
# ํ์ฐจ ๋ด์ฉ ๊ธธ์ด ์ ํ
|
| 30 |
+
content_preview = episode_content[:max_length]
|
| 31 |
+
is_truncated = len(episode_content) > max_length
|
| 32 |
+
|
| 33 |
+
truncation_note = "\n(์ฐธ๊ณ : ํ์ฐจ ๋ด์ฉ์ด ๊ธธ์ด ์ผ๋ถ๋ง ์ฌ์ฉ๋์์ต๋๋ค.)" if is_truncated else ""
|
| 34 |
+
|
| 35 |
+
# ์ ์ฒด ๋ด์ฉ ์ฐธ๊ณ ์ฉ (์ ํ์ฌํญ)
|
| 36 |
+
full_content_preview = ""
|
| 37 |
+
if full_content:
|
| 38 |
+
# ์ ์ฒด ๋ด์ฉ์ด ๋๋ฌด ๊ธธ๋ฉด ์๋ถ๋ถ๊ณผ ๋ท๋ถ๋ถ ์ผ๋ถ๋ง ์ฌ์ฉ
|
| 39 |
+
if len(full_content) > 30000:
|
| 40 |
+
full_content_preview = full_content[:15000] + "\n... (์ค๊ฐ ์๋ต) ...\n" + full_content[-15000:]
|
| 41 |
+
else:
|
| 42 |
+
full_content_preview = full_content
|
| 43 |
+
|
| 44 |
+
prompt = f"""๋ค์ ์น์์ค์ {episode_title} ํ์ฐจ์์ ์ํฐํฐ(์ธ๋ฌผ/์ฅ์)์ ๊ด๊ณ(์ฌ๊ฑด)๋ฅผ ์ถ์ถํด์ฃผ์ธ์.
|
| 45 |
+
|
| 46 |
+
{parent_chunk_info if parent_chunk_info else ""}
|
| 47 |
+
|
| 48 |
+
์๋ณธ ์น์์ค ์ ์ฒด ๋ด์ฉ (์ฐธ๊ณ ์ฉ):
|
| 49 |
+
{full_content_preview[:50000] if full_content_preview else "์์"}
|
| 50 |
+
|
| 51 |
+
๋ถ์ํ ํ์ฐจ ๋ด์ฉ ({episode_title}):
|
| 52 |
+
{content_preview}{truncation_note}
|
| 53 |
+
|
| 54 |
+
๋ค์ ํ์์ผ๋ก JSON ํ์์ผ๋ก๋ง ์๋ตํ์ธ์:
|
| 55 |
+
|
| 56 |
+
{{
|
| 57 |
+
"entities": {{
|
| 58 |
+
"characters": [
|
| 59 |
+
{{
|
| 60 |
+
"name": "์ธ๋ฌผ ์ด๋ฆ",
|
| 61 |
+
"type": "์ธ๋ฌผ",
|
| 62 |
+
"description": "์ธ๋ฌผ์ ๋ํ ๊ฐ๋จํ ์ค๋ช
",
|
| 63 |
+
"role": "์ด ํ์ฐจ์์์ ์ญํ (์: ์ฃผ์ธ๊ณต, ์กฐ์ฐ, ์
์ญ ๋ฑ)"
|
| 64 |
+
}}
|
| 65 |
+
],
|
| 66 |
+
"locations": [
|
| 67 |
+
{{
|
| 68 |
+
"name": "์ฅ์ ์ด๋ฆ",
|
| 69 |
+
"type": "์ฅ์",
|
| 70 |
+
"description": "์ฅ์์ ๋ํ ๊ฐ๋จํ ์ค๋ช
",
|
| 71 |
+
"category": "์ฅ์ ์ ํ (์: ๋์, ๊ฑด๋ฌผ, ์ฐจ์ ๋ฑ)"
|
| 72 |
+
}}
|
| 73 |
+
]
|
| 74 |
+
}},
|
| 75 |
+
"relationships": [
|
| 76 |
+
{{
|
| 77 |
+
"source": "๊ด๊ณ์ ์ฃผ์ฒด (์ธ๋ฌผ ์ด๋ฆ)",
|
| 78 |
+
"target": "๊ด๊ณ์ ๋์ (์ธ๋ฌผ ์ด๋ฆ ๋๋ ์ฅ์ ์ด๋ฆ)",
|
| 79 |
+
"type": "๊ด๊ณ ์ ํ (์: ์น๊ตฌ, ์ , ์ฐ์ธ, ๊ฑฐ์ฃผ์ง, ๋ฐฉ๋ฌธ์ง ๋ฑ)",
|
| 80 |
+
"description": "๊ด๊ณ์ ๋ํ ์์ธ ์ค๋ช
",
|
| 81 |
+
"event": "์ด ๊ด๊ณ๋ฅผ ํ์ฑํ๊ฑฐ๋ ๋ณํ์ํจ ์ฌ๊ฑด (์๋ ๊ฒฝ์ฐ)"
|
| 82 |
+
}}
|
| 83 |
+
],
|
| 84 |
+
"events": [
|
| 85 |
+
{{
|
| 86 |
+
"name": "์ฌ๊ฑด ์ด๋ฆ",
|
| 87 |
+
"description": "์ฌ๊ฑด์ ๋ํ ์์ธ ์ค๋ช
",
|
| 88 |
+
"participants": ["๊ด๋ จ ์ธ๋ฌผ1", "๊ด๋ จ ์ธ๋ฌผ2"],
|
| 89 |
+
"location": "์ฌ๊ฑด์ด ๋ฐ์ํ ์ฅ์",
|
| 90 |
+
"significance": "์ฌ๊ฑด์ ์ค์๋ (์: ์ฃผ์ ์ฌ๊ฑด, ๋ถ์ ์ฌ๊ฑด ๋ฑ)"
|
| 91 |
+
}}
|
| 92 |
+
]
|
| 93 |
+
}}
|
| 94 |
+
|
| 95 |
+
์ค์ ์ฌํญ:
|
| 96 |
+
1. ์ํฐํฐ๋ ์ด ํ์ฐจ์์ ์ค์ ๋ก ๋ฑ์ฅํ๊ฑฐ๋ ์ธ๊ธ๋ ์ธ๋ฌผ๊ณผ ์ฅ์๋ง ์ถ์ถํ์ธ์.
|
| 97 |
+
2. ๊ด๊ณ๋ ์ด ํ์ฐจ์์ ์๋ก ํ์ฑ๋๊ฑฐ๋ ๋ณํํ ๊ด๊ณ๋ฅผ ์ค์ฌ์ผ๋ก ์ถ์ถํ์ธ์.
|
| 98 |
+
3. ์ฌ๊ฑด์ ์ด ํ์ฐจ์์ ์ผ์ด๋ ๊ตฌ์ฒด์ ์ธ ์ฌ๊ฑด๋ค์ ์ถ์ถํ์ธ์.
|
| 99 |
+
4. ์๋ต์ ์ค์ง JSON ํ์๋ง ์ฌ์ฉํ๊ณ , ๋ค๋ฅธ ์ค๋ช
์ด๋ ๋งํฌ๋ค์ด์ ํฌํจํ์ง ๋ง์ธ์.
|
| 100 |
+
5. JSON ํ์์ด ์ฌ๋ฐ๋ฅธ์ง ๋ฐ๋์ ํ์ธํ์ธ์ (๋ฐ์ดํ ์ด์ค์ผ์ดํ ๋ฑ).
|
| 101 |
+
6. ๋ฐฐ์ด์ด ๋น์ด์์ ๊ฒฝ์ฐ ๋น ๋ฐฐ์ด []๋ก ํ์ํ์ธ์.
|
| 102 |
+
7. ํ๋ ๊ฐ์ด ์๋ ๊ฒฝ์ฐ null ๋์ ๋น ๋ฌธ์์ด "" ๋๋ ๋น ๋ฐฐ์ด []์ ์ฌ์ฉํ์ธ์."""
|
| 103 |
+
|
| 104 |
+
return prompt
|
| 105 |
+
|