Spaces:

Curify
/

Digest

Runtime error

App Files Files Community

Digest / app.py

qqwjq1981

Update app.py

0e5cb78 verified about 1 year ago

raw

history blame contribute delete

12.2 kB

	#!/usr/bin/env python
	# coding: utf-8

	# In[23]:


	# In[24]:


	# import subprocess

	# try:
	# result = subprocess.run(["ffmpeg", "-version"], stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
	# if result.returncode == 0:
	# print("FFmpeg version:")
	# print(result.stdout.split('\n')[0]) # Print the first line of the version output
	# else:
	# print("Error checking FFmpeg version:")
	# print(result.stderr)
	# except FileNotFoundError:
	# print("FFmpeg is not installed or not found in PATH.")


	# In[25]:

	from urllib.parse import urlparse, parse_qs
	import gradio as gr
	import requests
	from bs4 import BeautifulSoup
	import openai
	from openai import OpenAI
	import speech_recognition as sr
	from transformers import pipeline

	from transformers.pipelines.audio_utils import ffmpeg_read

	from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled
	from youtube_transcript_api.formatters import TextFormatter

	from urllib.parse import urlparse, parse_qs
	import json

	import os
	import yaml
	import pandas as pd
	import numpy as np

	from datetime import datetime, timedelta


	# In[26]:

	openai_api_key = os.environ["OPENAI_API_KEY"]

	# In[27]:


	# transcription = pipeline(
	# "automatic-speech-recognition",
	# model="openai/whisper-medium")
	# result = transcription("2024_dairy.wav", return_timestamps=True)
	# print(result["text"])


	# In[28]:


	def is_youtube_url(url):
	try:
	# Parse the URL
	parsed_url = urlparse(url)

	# Check if the domain is YouTube
	if parsed_url.netloc in ["www.youtube.com", "youtube.com", "m.youtube.com", "youtu.be"]:
	# For standard YouTube URLs, ensure it has a 'v' parameter
	if "youtube.com" in parsed_url.netloc:
	return "v" in parse_qs(parsed_url.query)
	# For shortened YouTube URLs (youtu.be), check the path
	elif "youtu.be" in parsed_url.netloc:
	return len(parsed_url.path.strip("/")) > 0
	return False
	except Exception as e:
	return False

	def get_youtube_transcript(youtube_url):
	try:
	# Parse the video ID from the URL
	parsed_url = urlparse(youtube_url)
	video_id = parse_qs(parsed_url.query).get("v")

	if not video_id:
	return "Invalid YouTube URL. Please provide a valid URL."

	video_id = video_id[0] # Extract the video ID

	# Fetch the transcript
	transcript = YouTubeTranscriptApi.get_transcript(video_id, proxies={"https": "http://localhost:8080"})

	# Format the transcript as plain text
	formatter = TextFormatter()
	formatted_transcript = formatter.format_transcript(transcript)

	return formatted_transcript

	except Exception as e:
	return f"An error occurred: {str(e)}"


	# In[29]:


	def check_subtitles(video_id):
	try:
	transcripts = YouTubeTranscriptApi.list_transcripts(video_id)
	print(f"Available transcripts: {transcripts}")
	return True
	except TranscriptsDisabled:
	print("Subtitles are disabled for this video.")
	return False
	except Exception as e:
	print(f"An unexpected error occurred: {e}")
	return False

	# Test
	video_id = "Um017R5Kr3A" # Replace with your YouTube video ID
	check_subtitles(video_id)


	# In[30]:


	# 设置 OpenAI API
	client = OpenAI(api_key=openai_api_key)

	### Curify Digest ###

	# Function to fetch webpage, render it, and generate summary/perspectives
	def process_webpage(url):
	try:
	if is_youtube_url(url):
	rendered_content = get_youtube_transcript(url)
	else:
	# Fetch and parse webpage
	response = requests.get(url)
	soup = BeautifulSoup(response.text, "html.parser")
	title = soup.title.string.strip() if soup.title else "No Title Found"
	# Create a clickable hotlink
	hotlink = f'<a href="{url}" target="_blank" style="color:blue;text-decoration:underline;">{title}</a>'

	html_content = str(soup.prettify())

	for script in soup(["script", "style"]):
	script.decompose() # Remove script and style tags
	rendered_content = soup.get_text(separator="\n").strip().replace("\n\n", "")

	text_content = rendered_content[:2000] # Limit content length for processing

	# Generate summary and perspectives
	summary_prompt = f"Summarize the following content:\n{text_content}\n Please use the language of the originial content"
	perspectives_prompt = f"Generate a reflective review for the following content:\n{text_content}\n Please output the perspectives in no more than 5 very concise bullet points. Please use the language of the originial content"

	summary_response = client.chat.completions.create(
	model="gpt-4o",
	messages=[{"role": "user", "content": summary_prompt}],
	max_tokens=500,
	)
	perspectives_response = client.chat.completions.create(
	model="gpt-4o",
	messages=[{"role": "user", "content": perspectives_prompt}],
	max_tokens=500,
	)

	summary = summary_response.choices[0].message.content.strip()
	perspectives = perspectives_response.choices[0].message.content.strip()

	return hotlink, summary, perspectives, rendered_content
	except Exception as e:
	return f"Error fetching or processing content: {str(e)}", "", "", ""


	# In[31]:


	# Function for chatbot interaction
	def chat_with_ai(chat_history, user_input, content):
	try:
	messages = [{"role": "system", "content": "You are a helpful assistant."}]

	# Add chat history
	for user, bot in chat_history:
	messages.append({"role": "user", "content": user})
	messages.append({"role": "assistant", "content": bot})

	# Add user input with webpage content
	messages.append({"role": "user", "content": f"Based on this content: {content}\n\n{user_input}"})

	# Call OpenAI API
	ai_response = client.chat.completions.create(
	model="gpt-4o",
	messages=messages,
	max_tokens=300,
	)
	reply = ai_response.choices[0].message.content.strip()
	chat_history.append((user_input, reply))
	return chat_history
	except Exception as e:
	return chat_history + [(user_input, f"Error: {str(e)}")]


	# In[32]:


	def generate_reflection(chat_history):
	"""
	Generate a reflection based on the chat history.

	Args:
	chat_history (list of tuples): List of (user_input, ai_reply) pairs.

	Returns:
	str: A reflective summary generated by AI.
	"""
	try:
	messages = [{"role": "system", "content": "You are a professional content summarizer. Generate thoughtful reflections."}]

	# Add conversation to messages
	for user, bot in chat_history:
	messages.append({"role": "user", "content": user})
	messages.append({"role": "assistant", "content": bot})

	# Prompt for reflection
	messages.append({"role": "user", "content": "Please provide a concise, reflective summary of this conversation."})

	# Call OpenAI API
	ai_response = client.chat.completions.create(
	model="gpt-4o",
	messages=messages,
	max_tokens=200,
	)
	reflection = ai_response.choices[0].message.content.strip()
	return reflection
	except Exception as e:
	return f"Error generating reflection: {str(e)}"


	# In[33]:


	import requests

	def post_to_linkedin(access_token, reflection, visibility="PUBLIC"):
	"""
	Post a reflection to LinkedIn.

	Args:
	access_token (str): LinkedIn API access token.
	reflection (str): The content to post.
	visibility (str): Visibility setting ("PUBLIC" or "CONNECTIONS"). Defaults to "PUBLIC".

	Returns:
	str: Confirmation or error message.
	"""
	try:
	url = "https://api.linkedin.com/v2/ugcPosts"
	headers = {
	"Authorization": f"Bearer {access_token}",
	"Content-Type": "application/json",
	}
	your_linkedin_person_id = 'jay'
	payload = {
	"author": f"urn:li:person:{your_linkedin_person_id}", # Replace with your LinkedIn person URN
	"lifecycleState": "PUBLISHED",
	"visibility": {"com.linkedin.ugc.MemberNetworkVisibility": visibility},
	"specificContent": {
	"com.linkedin.ugc.ShareContent": {
	"shareCommentary": {
	"text": reflection
	},
	"shareMediaCategory": "NONE"
	}
	}
	}

	response = requests.post(url, headers=headers, json=payload)
	if response.status_code == 201:
	return "Reflection successfully posted to LinkedIn!"
	else:
	return f"Failed to post to LinkedIn. Error: {response.json()}"
	except Exception as e:
	return f"Error posting to LinkedIn: {str(e)}"

	# JavaScript for copying text to the clipboard
	copy_to_clipboard_js = """
	function copyToClipboard(text) {
	navigator.clipboard.writeText(text).then(() => {
	alert("Text copied to clipboard!");
	}).catch(err => {
	alert("Failed to copy text: " + err);
	});
	}
	"""

	# In[36]:


	### Gradio Demo ###
	with gr.Blocks() as demo:
	gr.Markdown("## Curify Digest: Consume and interact with content")

	with gr.Row():
	with gr.Column():
	gr.Markdown("## Render Webpage")
	url_input = gr.Textbox(label="Enter URL")
	with gr.Column():
	# Shared Button: Fetch content, show webpage, and summary/perspectives
	fetch_btn = gr.Button("Fetch and Process Webpage")
	title_output = gr.HTML(label="Webpage Content")

	with gr.Row():
	# Column 1: Summary and perspectives
	with gr.Column():
	gr.Markdown("## Summary and perspectives")
	summary_output = gr.Textbox(label="Summary", lines=5)
	perspectives_output = gr.Textbox(label="Perspectives", lines=5)
	fulltext_output = gr.Textbox(label="Fulltext", visible=False)
	# Column 2: Chatbot
	with gr.Column():
	gr.Markdown("## Interactive Chatbot and reflections")
	chatbot_history_gr = gr.Chatbot(label="Chat History")
	user_input = gr.Textbox(label="Ask a Question", placeholder="Type your question here...")
	chatbot_btn = gr.Button("Chat")
	# Column 3: Generate reflections
	with gr.Column():
	reflection_btn = gr.Button("Generate reflection")
	reflection_output = gr.Textbox(label="Reflections",interactive=True, lines=5)
	# Custom HTML and JS for copy-to-clipboard
	custom_js = """
	<script>
	function copyToClipboard() {
	const textbox = document.querySelector("textarea[aria-label='Reflections']");
	if (textbox) {
	navigator.clipboard.writeText(textbox.value).then(() => {
	alert("Text copied to clipboard!");
	}).catch(err => {
	alert("Failed to copy text: " + err);
	});
	}
	}
	</script>
	<button onclick="copyToClipboard()">Copy to clipboard</button>
	"""
	gr.HTML(custom_js)

	fetch_btn.click(
	process_webpage,
	inputs=url_input,
	outputs=[title_output, summary_output, perspectives_output, fulltext_output],
	)

	chatbot_btn.click(
	chat_with_ai,
	inputs=[chatbot_history_gr, user_input, fulltext_output],
	outputs=chatbot_history_gr,
	)

	reflection_btn.click(
	generate_reflection,
	inputs=chatbot_history_gr,
	outputs=reflection_output,
	)

	demo.launch(share=True)


	# In[ ]: