Spaces:

Younup
/

UpVoice

Sleeping

File size: 6,022 Bytes

from dataclasses import dataclass
import os
from langchain.chat_models import init_chat_model
from langchain_core.prompts import PromptTemplate
from pydantic import BaseModel, Field
from mocked_script import mocked_raw_script
@dataclass
class ScriptConfig:
    articles_path: str = "./src/public/articles"
    model: str = "gpt-4o"
    model_provider: str = "openai"
    api_key: str = os.getenv("OPENAI_API_KEY", "")
    mocked: bool = False


class PodLine(BaseModel):
    """Podcast line"""
    speaker: str = Field(description="The name of the speaker")
    text: str = Field(description="The text spoken by the speaker")

# Pydantic
class PodScript(BaseModel):
    """Podcast script"""
    conversation: list[PodLine] = Field(description="The setup of the joke")

class MarkdownToScrip:
    def __init__(self, config: ScriptConfig):
        self._config = config
        self._llm = init_chat_model(
            model=config.model, 
            model_provider=config.model_provider, 
            api_key=config.api_key).with_structured_output(PodScript)
        self._prompt = PromptTemplate.from_template( """You are a creative podcast scriptwriter specializing in tech content. Your task is to turn the following technical article into a spoken podcast script designed for two speakers
            The goal is to create a clear, engaging, natural-sounding conversation that feels spontaneous but informative, as if recorded for a professional podcast. The tone should be friendly, curious, and energetic.
            1. The podcast must feature two fictional hosts, **{speaker1_name}** and **{speaker2_name}**, who take turns discussing the content.
            2. Add informal elements like light humor, reactions, rhetorical questions, and natural interjections (\"Wait, what?\", \"Exactly!\", \"That's wild\", etc.)
            3. Emphasize key points or surprising facts by marking them with [pause], [emphasis], or *italicized phrases* to guide expressive TTS rendering.
            4. Begin with a short intro to set the tone of the episode and end with a friendly closing.
            5. Break the discussion into logical sections (e.g., introduction, main points, implications, etc.)
            6. Keep the language conversational and oral (short sentences, contractions, and natural rhythm).
            7. Keep the duration equivalent to approximately 3–4 minutes when read aloud.
            8. {language_instruction}
            Now write the full podcast script with style markers where relevant.
                                               
            Here is the article text:
            {article}""")
        

    def _fetch_article(self, article: str) -> str:
        """Fetches the article content from the specified path.
        Args:
            article (str): The name of the article file.
        Returns:
            str: The content of the article.
        Raises:
            ValueError: If the article is empty or not found.
            FileNotFoundError: If the article file does not exist.
        """
        if not article:
            raise ValueError("Article cannot be empty")

        full_path = f"{self._config.articles_path}/{article}"
        if not os.path.exists(full_path):
            raise FileNotFoundError(f"Article not found: {full_path}")
        with open(full_path, "r", encoding="utf-8") as file:
            text = file.read()
        if not text:
            raise ValueError("Article content is empty")
        return text

    async def _generate_script(self, article: str, target_language, speaker1_name: str, speaker2_name: str) :
        """Generates a podcast script from the given text using the LLM.
        Args:
            text (str): The input text to be converted into a podcast script.
            target_language (str): The target language for the podcast.
        Returns:
            str: The generated podcast script in JSON format.
        Raises:
            ValueError: If the input text is empty or if the LLM request fails.
        """
        if target_language == "Auto Detect":
            language_instruction = "The podcast MUST be in the same language as the article."
        else:
            language_instruction = f"The podcast MUST be in {target_language} language"

        try:
            response  = await self._prompt.pipe(self._llm).ainvoke(
              {  "speaker1_name":speaker1_name,
                "speaker2_name":speaker2_name,
                "language_instruction":language_instruction,
                "article":article}
            )
            if isinstance(response, PodScript):
                return response 
            elif isinstance(response, dict):
                return PodScript(**response)
        except Exception as e:
                raise RuntimeError(f"Failed to generate podcast script: {e}")
        
    def _generate_mock_podcast_script(self) -> PodScript:
        lines = []
        for raw_line in mocked_raw_script.strip().splitlines():
            if ':' in raw_line:
                speaker, text = raw_line.split(':', 1)
                lines.append(PodLine(speaker=speaker.strip(), text=text.strip()))
        return PodScript(conversation=lines)
    
    async def run(self, article: str, target_language: str, speaker1_name: str, speaker2_name: str):
        """Main method to convert an article to a podcast script.
        Args:
            article (str): The name of the article file.
            target_language (str): The target language for the podcast.
            speaker1_name (str): The name of the first speaker.
            speaker2_name (str): The name of the second speaker.
        Returns:
            PodScript: The generated podcast script.
        """
        print("Running script generation")
        if self._config.mocked:
            return self._generate_mock_podcast_script()
        else:
            text = self._fetch_article(article)
            return await self._generate_script(text, target_language, speaker1_name, speaker2_name)