| from dataclasses import dataclass | |
| import os | |
| from langchain.chat_models import init_chat_model | |
| from langchain_core.prompts import PromptTemplate | |
| from pydantic import BaseModel, Field | |
| from mocked_script import mocked_raw_script | |
| class ScriptConfig: | |
| articles_path: str = "./src/public/articles" | |
| model: str = "gpt-4o" | |
| model_provider: str = "openai" | |
| api_key: str = os.getenv("OPENAI_API_KEY", "") | |
| mocked: bool = False | |
| class PodLine(BaseModel): | |
| """Podcast line""" | |
| speaker: str = Field(description="The name of the speaker") | |
| text: str = Field(description="The text spoken by the speaker") | |
| # Pydantic | |
| class PodScript(BaseModel): | |
| """Podcast script""" | |
| conversation: list[PodLine] = Field(description="The setup of the joke") | |
| class MarkdownToScrip: | |
| def __init__(self, config: ScriptConfig): | |
| self._config = config | |
| self._llm = init_chat_model( | |
| model=config.model, | |
| model_provider=config.model_provider, | |
| api_key=config.api_key).with_structured_output(PodScript) | |
| self._prompt = PromptTemplate.from_template( """You are a creative podcast scriptwriter specializing in tech content. Your task is to turn the following technical article into a spoken podcast script designed for two speakers | |
| The goal is to create a clear, engaging, natural-sounding conversation that feels spontaneous but informative, as if recorded for a professional podcast. The tone should be friendly, curious, and energetic. | |
| 1. The podcast must feature two fictional hosts, **{speaker1_name}** and **{speaker2_name}**, who take turns discussing the content. | |
| 2. Add informal elements like light humor, reactions, rhetorical questions, and natural interjections (\"Wait, what?\", \"Exactly!\", \"That's wild\", etc.) | |
| 3. Emphasize key points or surprising facts by marking them with [pause], [emphasis], or *italicized phrases* to guide expressive TTS rendering. | |
| 4. Begin with a short intro to set the tone of the episode and end with a friendly closing. | |
| 5. Break the discussion into logical sections (e.g., introduction, main points, implications, etc.) | |
| 6. Keep the language conversational and oral (short sentences, contractions, and natural rhythm). | |
| 7. Keep the duration equivalent to approximately 3–4 minutes when read aloud. | |
| 8. {language_instruction} | |
| Now write the full podcast script with style markers where relevant. | |
| Here is the article text: | |
| {article}""") | |
| def _fetch_article(self, article: str) -> str: | |
| """Fetches the article content from the specified path. | |
| Args: | |
| article (str): The name of the article file. | |
| Returns: | |
| str: The content of the article. | |
| Raises: | |
| ValueError: If the article is empty or not found. | |
| FileNotFoundError: If the article file does not exist. | |
| """ | |
| if not article: | |
| raise ValueError("Article cannot be empty") | |
| full_path = f"{self._config.articles_path}/{article}" | |
| if not os.path.exists(full_path): | |
| raise FileNotFoundError(f"Article not found: {full_path}") | |
| with open(full_path, "r", encoding="utf-8") as file: | |
| text = file.read() | |
| if not text: | |
| raise ValueError("Article content is empty") | |
| return text | |
| async def _generate_script(self, article: str, target_language, speaker1_name: str, speaker2_name: str) : | |
| """Generates a podcast script from the given text using the LLM. | |
| Args: | |
| text (str): The input text to be converted into a podcast script. | |
| target_language (str): The target language for the podcast. | |
| Returns: | |
| str: The generated podcast script in JSON format. | |
| Raises: | |
| ValueError: If the input text is empty or if the LLM request fails. | |
| """ | |
| if target_language == "Auto Detect": | |
| language_instruction = "The podcast MUST be in the same language as the article." | |
| else: | |
| language_instruction = f"The podcast MUST be in {target_language} language" | |
| try: | |
| response = await self._prompt.pipe(self._llm).ainvoke( | |
| { "speaker1_name":speaker1_name, | |
| "speaker2_name":speaker2_name, | |
| "language_instruction":language_instruction, | |
| "article":article} | |
| ) | |
| if isinstance(response, PodScript): | |
| return response | |
| elif isinstance(response, dict): | |
| return PodScript(**response) | |
| except Exception as e: | |
| raise RuntimeError(f"Failed to generate podcast script: {e}") | |
| def _generate_mock_podcast_script(self) -> PodScript: | |
| lines = [] | |
| for raw_line in mocked_raw_script.strip().splitlines(): | |
| if ':' in raw_line: | |
| speaker, text = raw_line.split(':', 1) | |
| lines.append(PodLine(speaker=speaker.strip(), text=text.strip())) | |
| return PodScript(conversation=lines) | |
| async def run(self, article: str, target_language: str, speaker1_name: str, speaker2_name: str): | |
| """Main method to convert an article to a podcast script. | |
| Args: | |
| article (str): The name of the article file. | |
| target_language (str): The target language for the podcast. | |
| speaker1_name (str): The name of the first speaker. | |
| speaker2_name (str): The name of the second speaker. | |
| Returns: | |
| PodScript: The generated podcast script. | |
| """ | |
| print("Running script generation") | |
| if self._config.mocked: | |
| return self._generate_mock_podcast_script() | |
| else: | |
| text = self._fetch_article(article) | |
| return await self._generate_script(text, target_language, speaker1_name, speaker2_name) | |