wiizm commited on
Commit
0adcd58
ยท
verified ยท
1 Parent(s): 2a6758b

Upload app\prompts\graph_extraction.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. app//prompts//graph_extraction.py +105 -0
app//prompts//graph_extraction.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ Graph Extraction ํ”„๋กฌํ”„ํŠธ
3
+ ์—”ํ‹ฐํ‹ฐ(์ธ๋ฌผ/์žฅ์†Œ)์™€ ๊ด€๊ณ„(์‚ฌ๊ฑด)๋ฅผ ์ถ”์ถœํ•˜๋Š” GraphRAG ๊ธฐ๋ฐ˜ ํ”„๋กฌํ”„ํŠธ
4
+ """
5
+
6
+ from typing import Optional
7
+
8
+
9
+ def get_graph_extraction_prompt(
10
+ episode_content: str,
11
+ episode_title: str,
12
+ full_content: Optional[str] = None,
13
+ parent_chunk_info: Optional[str] = None,
14
+ max_length: int = 10000
15
+ ) -> str:
16
+ """
17
+ Graph Extraction์„ ์œ„ํ•œ ํ”„๋กฌํ”„ํŠธ ์ƒ์„ฑ
18
+
19
+ Args:
20
+ episode_content: ๋ถ„์„ํ•  ํšŒ์ฐจ ๋‚ด์šฉ
21
+ episode_title: ํšŒ์ฐจ ์ œ๋ชฉ (์˜ˆ: '1ํ™”', '2ํ™”')
22
+ full_content: ์›๋ณธ ์›น์†Œ์„ค ์ „์ฒด ๋‚ด์šฉ (์ฐธ๊ณ ์šฉ)
23
+ parent_chunk_info: Parent Chunk ์ •๋ณด (์„ ํƒ์‚ฌํ•ญ)
24
+ max_length: ํ”„๋กฌํ”„ํŠธ์— ํฌํ•จํ•  ์ตœ๋Œ€ ํ…์ŠคํŠธ ๊ธธ์ด
25
+
26
+ Returns:
27
+ ํ”„๋กฌํ”„ํŠธ ๋ฌธ์ž์—ด
28
+ """
29
+ # ํšŒ์ฐจ ๋‚ด์šฉ ๊ธธ์ด ์ œํ•œ
30
+ content_preview = episode_content[:max_length]
31
+ is_truncated = len(episode_content) > max_length
32
+
33
+ truncation_note = "\n(์ฐธ๊ณ : ํšŒ์ฐจ ๋‚ด์šฉ์ด ๊ธธ์–ด ์ผ๋ถ€๋งŒ ์‚ฌ์šฉ๋˜์—ˆ์Šต๋‹ˆ๋‹ค.)" if is_truncated else ""
34
+
35
+ # ์ „์ฒด ๋‚ด์šฉ ์ฐธ๊ณ ์šฉ (์„ ํƒ์‚ฌํ•ญ)
36
+ full_content_preview = ""
37
+ if full_content:
38
+ # ์ „์ฒด ๋‚ด์šฉ์ด ๋„ˆ๋ฌด ๊ธธ๋ฉด ์•ž๋ถ€๋ถ„๊ณผ ๋’ท๋ถ€๋ถ„ ์ผ๋ถ€๋งŒ ์‚ฌ์šฉ
39
+ if len(full_content) > 30000:
40
+ full_content_preview = full_content[:15000] + "\n... (์ค‘๊ฐ„ ์ƒ๋žต) ...\n" + full_content[-15000:]
41
+ else:
42
+ full_content_preview = full_content
43
+
44
+ prompt = f"""๋‹ค์Œ ์›น์†Œ์„ค์˜ {episode_title} ํšŒ์ฐจ์—์„œ ์—”ํ‹ฐํ‹ฐ(์ธ๋ฌผ/์žฅ์†Œ)์™€ ๊ด€๊ณ„(์‚ฌ๊ฑด)๋ฅผ ์ถ”์ถœํ•ด์ฃผ์„ธ์š”.
45
+
46
+ {parent_chunk_info if parent_chunk_info else ""}
47
+
48
+ ์›๋ณธ ์›น์†Œ์„ค ์ „์ฒด ๋‚ด์šฉ (์ฐธ๊ณ ์šฉ):
49
+ {full_content_preview[:50000] if full_content_preview else "์—†์Œ"}
50
+
51
+ ๋ถ„์„ํ•  ํšŒ์ฐจ ๋‚ด์šฉ ({episode_title}):
52
+ {content_preview}{truncation_note}
53
+
54
+ ๋‹ค์Œ ํ˜•์‹์œผ๋กœ JSON ํ˜•์‹์œผ๋กœ๋งŒ ์‘๋‹ตํ•˜์„ธ์š”:
55
+
56
+ {{
57
+ "entities": {{
58
+ "characters": [
59
+ {{
60
+ "name": "์ธ๋ฌผ ์ด๋ฆ„",
61
+ "type": "์ธ๋ฌผ",
62
+ "description": "์ธ๋ฌผ์— ๋Œ€ํ•œ ๊ฐ„๋‹จํ•œ ์„ค๋ช…",
63
+ "role": "์ด ํšŒ์ฐจ์—์„œ์˜ ์—ญํ•  (์˜ˆ: ์ฃผ์ธ๊ณต, ์กฐ์—ฐ, ์•…์—ญ ๋“ฑ)"
64
+ }}
65
+ ],
66
+ "locations": [
67
+ {{
68
+ "name": "์žฅ์†Œ ์ด๋ฆ„",
69
+ "type": "์žฅ์†Œ",
70
+ "description": "์žฅ์†Œ์— ๋Œ€ํ•œ ๊ฐ„๋‹จํ•œ ์„ค๋ช…",
71
+ "category": "์žฅ์†Œ ์œ ํ˜• (์˜ˆ: ๋„์‹œ, ๊ฑด๋ฌผ, ์ฐจ์› ๋“ฑ)"
72
+ }}
73
+ ]
74
+ }},
75
+ "relationships": [
76
+ {{
77
+ "source": "๊ด€๊ณ„์˜ ์ฃผ์ฒด (์ธ๋ฌผ ์ด๋ฆ„)",
78
+ "target": "๊ด€๊ณ„์˜ ๋Œ€์ƒ (์ธ๋ฌผ ์ด๋ฆ„ ๋˜๋Š” ์žฅ์†Œ ์ด๋ฆ„)",
79
+ "type": "๊ด€๊ณ„ ์œ ํ˜• (์˜ˆ: ์นœ๊ตฌ, ์ , ์—ฐ์ธ, ๊ฑฐ์ฃผ์ง€, ๋ฐฉ๋ฌธ์ง€ ๋“ฑ)",
80
+ "description": "๊ด€๊ณ„์— ๋Œ€ํ•œ ์ƒ์„ธ ์„ค๋ช…",
81
+ "event": "์ด ๊ด€๊ณ„๋ฅผ ํ˜•์„ฑํ•˜๊ฑฐ๋‚˜ ๋ณ€ํ™”์‹œํ‚จ ์‚ฌ๊ฑด (์žˆ๋Š” ๊ฒฝ์šฐ)"
82
+ }}
83
+ ],
84
+ "events": [
85
+ {{
86
+ "name": "์‚ฌ๊ฑด ์ด๋ฆ„",
87
+ "description": "์‚ฌ๊ฑด์— ๋Œ€ํ•œ ์ƒ์„ธ ์„ค๋ช…",
88
+ "participants": ["๊ด€๋ จ ์ธ๋ฌผ1", "๊ด€๋ จ ์ธ๋ฌผ2"],
89
+ "location": "์‚ฌ๊ฑด์ด ๋ฐœ์ƒํ•œ ์žฅ์†Œ",
90
+ "significance": "์‚ฌ๊ฑด์˜ ์ค‘์š”๋„ (์˜ˆ: ์ฃผ์š” ์‚ฌ๊ฑด, ๋ถ€์ˆ˜ ์‚ฌ๊ฑด ๋“ฑ)"
91
+ }}
92
+ ]
93
+ }}
94
+
95
+ ์ค‘์š” ์‚ฌํ•ญ:
96
+ 1. ์—”ํ‹ฐํ‹ฐ๋Š” ์ด ํšŒ์ฐจ์—์„œ ์‹ค์ œ๋กœ ๋“ฑ์žฅํ•˜๊ฑฐ๋‚˜ ์–ธ๊ธ‰๋œ ์ธ๋ฌผ๊ณผ ์žฅ์†Œ๋งŒ ์ถ”์ถœํ•˜์„ธ์š”.
97
+ 2. ๊ด€๊ณ„๋Š” ์ด ํšŒ์ฐจ์—์„œ ์ƒˆ๋กœ ํ˜•์„ฑ๋˜๊ฑฐ๋‚˜ ๋ณ€ํ™”ํ•œ ๊ด€๊ณ„๋ฅผ ์ค‘์‹ฌ์œผ๋กœ ์ถ”์ถœํ•˜์„ธ์š”.
98
+ 3. ์‚ฌ๊ฑด์€ ์ด ํšŒ์ฐจ์—์„œ ์ผ์–ด๋‚œ ๊ตฌ์ฒด์ ์ธ ์‚ฌ๊ฑด๋“ค์„ ์ถ”์ถœํ•˜์„ธ์š”.
99
+ 4. ์‘๋‹ต์€ ์˜ค์ง JSON ํ˜•์‹๋งŒ ์‚ฌ์šฉํ•˜๊ณ , ๋‹ค๋ฅธ ์„ค๋ช…์ด๋‚˜ ๋งˆํฌ๋‹ค์šด์€ ํฌํ•จํ•˜์ง€ ๋งˆ์„ธ์š”.
100
+ 5. JSON ํ˜•์‹์ด ์˜ฌ๋ฐ”๋ฅธ์ง€ ๋ฐ˜๋“œ์‹œ ํ™•์ธํ•˜์„ธ์š” (๋”ฐ์˜ดํ‘œ ์ด์Šค์ผ€์ดํ”„ ๋“ฑ).
101
+ 6. ๋ฐฐ์—ด์ด ๋น„์–ด์žˆ์„ ๊ฒฝ์šฐ ๋นˆ ๋ฐฐ์—ด []๋กœ ํ‘œ์‹œํ•˜์„ธ์š”.
102
+ 7. ํ•„๋“œ ๊ฐ’์ด ์—†๋Š” ๊ฒฝ์šฐ null ๋Œ€์‹  ๋นˆ ๋ฌธ์ž์—ด "" ๋˜๋Š” ๋นˆ ๋ฐฐ์—ด []์„ ์‚ฌ์šฉํ•˜์„ธ์š”."""
103
+
104
+ return prompt
105
+