Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| import base64 | |
| import os | |
| from moviepy.editor import VideoFileClip | |
| from pytube import YouTube | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| from youtube_transcript_api._errors import CouldNotRetrieveTranscript | |
| import whisper | |
| import ffmpeg | |
| import re | |
| import tempfile | |
| from huggingface_hub import InferenceClient | |
| st.set_page_config(layout="wide", initial_sidebar_state="collapsed") | |
| PROMPT = """Act as the author and provide a comprehensive detailed article in the same language as the transcript | |
| in markdown format that has a H1 main title(example "# <this is a title> ") and broken down into H2 subtitles (example "## <this is a title> ") for the following transcript | |
| You must follow the rules: | |
| - Write the article in markdown format | |
| - Create a main title for the article as markdown H1 and break the article into subtitles where each subtitle is markdown H2 | |
| - Article must be in the same language as the transcript | |
| - summary should be informative and act as a replacement for the original transcript to the point that the user doesn't have to go back to read the transcript | |
| - Summary should not mention the author or speaker at all should act as your independent writing without referencing the original transcript or speaker. | |
| - You can use bullet points within the article | |
| Transcript: | |
| {} \n\n Article:""" | |
| LLM = { | |
| "llama3-8b": {'prompt': f"""<|begin_of_text|><|start_header_id|>user<|end_header_id|> | |
| {PROMPT}<|eot_id|><|start_header_id|>assistant<|end_header_id|> | |
| """, 'endpoint': "meta-llama/Meta-Llama-3-8B-Instruct"} | |
| } | |
| def load_whisper(model): | |
| return whisper.load_model(model) | |
| def download_video(url): | |
| if "youtube" in url or "youtu.be" in url: | |
| yt = YouTube(url) | |
| video = yt.streams.get_highest_resolution() | |
| filename = video.download() | |
| else: | |
| response = requests.get(url, stream=True) | |
| filename = url.split("/")[-1] | |
| with open(filename, "wb") as file: | |
| for chunk in response.iter_content(chunk_size=1024): | |
| if chunk: | |
| file.write(chunk) | |
| return filename | |
| def convert_to_audio(video_filename): | |
| video = VideoFileClip(video_filename) | |
| audio_filename = video_filename.replace(".mp4", ".mp3") | |
| audio = video.audio | |
| audio.write_audiofile(audio_filename, codec="mp3") | |
| return audio_filename | |
| def summarise(prompt, llm): | |
| model = InferenceClient(LLM[llm]["endpoint"]) | |
| user_message = LLM[llm]["prompt"].format(prompt) | |
| return model.text_generation(user_message, max_new_tokens=1024) | |
| def delete_files(video_filename, audio_filename): | |
| delete_file(video_filename) | |
| delete_file(audio_filename) | |
| def delete_file(filename): | |
| if os.path.exists(filename): | |
| os.remove(filename) | |
| st.info(f"File '{os.path.basename(filename)}' deleted from the server.") | |
| def transcribe_whisper(_model, audio_filepath): | |
| return _model.transcribe(audio_filepath)["text"] | |
| def get_media_download_link(media_type, file_path): | |
| with open(file_path, "rb") as file: | |
| contents = file.read() | |
| encoded = base64.b64encode(contents).decode("utf-8") | |
| media_href = f"data:file/{media_type};base64,{encoded}" | |
| st.markdown( | |
| f'<a href="{media_href}" download="{os.path.basename(file_path)}">Download {os.path.basename(file_path)}</a>', | |
| unsafe_allow_html=True, | |
| ) | |
| def generate_summaries(_summarizer, text, min_length=50, max_length=500): | |
| paragraphs = text.split("\n\n") | |
| summaries = [] | |
| for paragraph in paragraphs: | |
| summary = _summarizer( | |
| paragraph, max_length=max_length, min_length=min_length, do_sample=False | |
| ) | |
| summaries.append(summary[0]["summary_text"].strip()) | |
| return "\n\n".join(summaries) | |
| def main(): | |
| st.title("VidScripter") | |
| st.write("#### A One Stop Solution to Video Transcription") | |
| c1, c2 = st.columns(2) | |
| c1.write( | |
| """ | |
| - Enter the video URL in the text input box. | |
| - Click the **Fetch** button to fetch the video. | |
| - Once the video is fetched, you can perform the following actions: | |
| - Fetch transcript from YouTube API (if available) by clicking the **Fetch Transcript** button. | |
| - Transcribe the video using the Whisper model by clicking the **Transcribe (Whisper)** button. | |
| - The transcript will be displayed in a text area below. | |
| - A summary of the transcript will also be generated by the selected LLM. | |
| - The summary will be displayed in a text area below. | |
| - You can download the video, audio, transcript or summary by clicking the respective download buttons. | |
| """ | |
| ) | |
| whisper_model = load_whisper("base") | |
| url = c2.text_input("Enter the video URL") | |
| llm = c2.selectbox("Select LLM", list(LLM.keys()), index=0) | |
| fetch_button = c2.button("Fetch") | |
| st.session_state.setdefault("load_state", False) | |
| if fetch_button or st.session_state.load_state: | |
| st.session_state.load_state = True | |
| if url: | |
| process_video(url, whisper_model, llm) | |
| def process_video(url, whisper_model, llm): | |
| yt = YouTube(url) | |
| video_id = yt.video_id | |
| try: | |
| video_filename = download_video(url) | |
| st.success("Video fetched successfully") | |
| except Exception: | |
| video_filename = None | |
| st.warning("Video could not be fetched") | |
| try: | |
| audio_filename = ( | |
| convert_to_audio(video_filename) if video_filename is not None else None | |
| ) | |
| if video_filename is not None: | |
| st.success("Audio converted successfully") | |
| else: | |
| st.info("No Video to convert into Audio") | |
| except Exception: | |
| audio_filename = None | |
| st.warning("Audio coud not be converted") | |
| text_filename = ( | |
| os.path.basename(video_filename).replace(".mp4", ".txt") | |
| if video_filename is not None | |
| else "transcript.txt" | |
| ) | |
| emp = st.empty() | |
| col1, col2, col3, col4 = st.columns(4) | |
| if "youtube" in url or "youtu.be" in url: | |
| process_youtube_video(video_id, col3, emp, text_filename, llm) | |
| process_whisper_transcript(whisper_model, audio_filename, col4, text_filename) | |
| with col1: | |
| if video_filename is not None and st.button("Download Video"): | |
| with st.spinner("Encoding Video"): | |
| get_media_download_link("video", video_filename) | |
| with col2: | |
| if audio_filename is not None and st.button("Download Audio"): | |
| with st.spinner("Encoding Audio"): | |
| get_media_download_link("audio", audio_filename) | |
| def process_youtube_video(video_id, col, emp, text_filename, llm): | |
| try: | |
| transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) | |
| transcripts = [transcript for transcript in transcript_list] | |
| if transcripts: | |
| transcript_options = { | |
| f"{transcript.language} ({transcript.language_code})": transcript | |
| for transcript in transcripts | |
| } | |
| transcript_option = emp.selectbox( | |
| "Select a transcript", list(transcript_options.keys()) | |
| ) | |
| selected_transcript = transcript_options[transcript_option] | |
| st.session_state.setdefault("api_transcript", False) | |
| if col.button("Fetch Transcript") or st.session_state.api_transcript: | |
| st.session_state.api_transcript = True | |
| transcript_text = selected_transcript.fetch() | |
| transcript_text = "\n".join( | |
| [re.sub("\s+", " ", chunk["text"]) for chunk in transcript_text] | |
| ) | |
| c1, c2 = st.columns(2) | |
| with c1: | |
| modified_text = st.text_area( | |
| "Transcript", transcript_text, height=500 | |
| ) | |
| st.download_button("Download Transcript", modified_text, text_filename) | |
| with c2: | |
| openai_summarization = summarise( | |
| modified_text, llm | |
| ) | |
| summarized_text = st.text_area( | |
| "Summarized Transcript", openai_summarization, height=500 | |
| ) | |
| st.download_button("Download Summary", summarized_text, text_filename) | |
| except CouldNotRetrieveTranscript: | |
| emp.warning("Could Not Retrieve API Transcripts for this video.") | |
| except Exception as e: | |
| emp.warning(f"Error Fetching API Transcripts for this video. {e}") | |
| def process_whisper_transcript(whisper_model, audio_filename, col, text_filename): | |
| if audio_filename is not None: | |
| st.session_state.setdefault("whisper_transcript", False) | |
| if col.button("Transcribe (Whisper)") or st.session_state.whisper_transcript: | |
| st.session_state.whisper_transcript = True | |
| whisper_text = transcribe_whisper(whisper_model, audio_filename) | |
| modified_text = st.text_area("Transcript", whisper_text, height=500) | |
| st.download_button("Download", modified_text, text_filename) | |
| if __name__ == "__main__": | |
| main() |