Spaces:

ProfessorLeVesseur
/

Translator

Sleeping

App Files Files Community

Translator / app.py

ProfessorLeVesseur

Update app.py

4447788 verified about 1 year ago

raw

history blame contribute delete

8.76 kB

	import streamlit as st
	import requests
	import os
	import PyPDF2
	import docx
	import time

	#------------------------------------------------------------------------
	# Configurations
	#------------------------------------------------------------------------
	# Streamlit page setup
	st.set_page_config(
	page_title="Text Translator",
	page_icon=":speech_balloon:",
	layout="centered",
	initial_sidebar_state="auto",
	menu_items={
	'Get Help': 'mailto:info@mtss.ai',
	'About': "This app is built to support translation tasks"
	}
	)

	#------------------------------------------------------------------------
	# Title
	#------------------------------------------------------------------------

	# Set the title of the app
	st.title("Text Translator")

	# Description
	st.write("""
	Choose a target language, enter your text or upload a document, and click Translate to get the translated text.
	""")

	#------------------------------------------------------------------------
	# Sidebar
	#------------------------------------------------------------------------
	with st.sidebar:
	# Password input field
	# password = st.text_input("Enter Password:", type="password")

	# Set the desired width in pixels
	image_width = 300
	# Define the path to the image
	# image_path = "MTSSai_logo.png"
	# Display the image
	# st.image(image_path, width=image_width)

	# Set the title
	st.title("MTSS.ai")

	# Toggle for Help and Report a Bug
	with st.expander("Need help and report a bug"):
	st.write("""
	Contact: Cheyne LeVesseur, PhD
	Email: info@mtss.ai
	""")
	st.divider()
	st.subheader('User Instructions')

	# Principles text with Markdown formatting
	User_Instructions = """

	- Step 1: Provide either text input or upload a document for translation.
	- Step 2: Click Translate.
	- Step 3: Sit back, relax, and let the magic happen!

	"""
	st.markdown(User_Instructions)

	#------------------------------------------------------------------------
	# Functions
	#------------------------------------------------------------------------

	# Language to model mapping
	language_model_mapping = {
	"Spanish": "Helsinki-NLP/opus-mt-en-es",
	"Arabic": "Helsinki-NLP/opus-mt-en-ar",
	"Chinese": "Helsinki-NLP/opus-mt-en-zh",
	"Albanian": "Helsinki-NLP/opus-mt-en-sq",
	"French": "Helsinki-NLP/opus-mt-en-fr",
	"German": "Helsinki-NLP/opus-mt-en-de",
	"Japanese": "Helsinki-NLP/opus-mt-en-jap",
	"Italian": "Helsinki-NLP/opus-mt-en-it",
	"Dutch": "Helsinki-NLP/opus-mt-en-nl",
	"Hindi": "Helsinki-NLP/opus-mt-en-hi",
	"Russian": "Helsinki-NLP/opus-mt-en-ru",
	"Indonesian": "Helsinki-NLP/opus-mt-en-id",
	"Greek": "Helsinki-NLP/opus-mt-en-el",
	"Danish": "Helsinki-NLP/opus-mt-en-da",
	"Swedish": "Helsinki-NLP/opus-mt-en-sv",
	"Czech": "Helsinki-NLP/opus-mt-en-cs",
	"Catalan": "Helsinki-NLP/opus-mt-en-ca",
	"Bulgarian": "Helsinki-NLP/opus-mt-en-bg",
	"Estonian": "Helsinki-NLP/opus-mt-en-et",
	"Basque": "Helsinki-NLP/opus-mt-en-eu",
	"Vietnamese": "Helsinki-NLP/opus-mt-en-vi",
	"Finnish": "Helsinki-NLP/opus-mt-en-fi",
	"Hebrew": "Helsinki-NLP/opus-mt-en-he",
	"Azerbaijani": "Helsinki-NLP/opus-mt-en-az",
	"Afrikaans": "Helsinki-NLP/opus-mt-en-af",
	"Armenian": "Helsinki-NLP/opus-mt-en-hy",
	"Hungarian": "Helsinki-NLP/opus-mt-en-hu"
	}

	# Dropdown for language selection
	language = st.selectbox(
	"Select target language",
	list(language_model_mapping.keys())
	)

	# Input method selection
	input_option = st.radio("Select input method:", ("Text Input", "Upload Document"))

	input_text = ""

	# Functions to extract text from files
	def extract_text_from_pdf(pdf_file):
	try:
	pdf_reader = PyPDF2.PdfReader(pdf_file)
	text = ""
	for page_num in range(len(pdf_reader.pages)):
	page = pdf_reader.pages[page_num]
	extracted_text = page.extract_text()
	if extracted_text:
	text += extracted_text + "\n"
	return text
	except Exception as e:
	st.error(f"Error extracting text from PDF: {e}")
	return ""

	def extract_text_from_docx(docx_file):
	try:
	doc = docx.Document(docx_file)
	text = ""
	for para in doc.paragraphs:
	text += para.text + "\n"
	return text
	except Exception as e:
	st.error(f"Error extracting text from Word document: {e}")
	return ""

	# Text area or file uploader based on input method
	if input_option == "Text Input":
	input_text = st.text_area("Enter text to translate", height=200)
	elif input_option == "Upload Document":
	uploaded_file = st.file_uploader("Choose a file", type=["pdf", "docx"])
	if uploaded_file is not None:
	file_extension = os.path.splitext(uploaded_file.name)[1].lower()
	if file_extension == ".pdf":
	with st.spinner("Extracting text from PDF..."):
	input_text = extract_text_from_pdf(uploaded_file)
	elif file_extension == ".docx":
	with st.spinner("Extracting text from Word document..."):
	input_text = extract_text_from_docx(uploaded_file)
	else:
	st.error("Unsupported file type.")
	input_text = ""

	# Function to split text into chunks
	def split_text_into_chunks(text, max_chunk_size):
	return [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)]

	# Function to perform the translation with retry mechanism
	def translate_text(text, target_lang, max_retries=5, backoff_factor=2):
	model = language_model_mapping.get(target_lang)
	if not model:
	st.error("Unsupported language selected.")
	return None

	# Retrieve Hugging Face API key from environment variables
	hf_api_key = os.getenv('HF_API_KEY')
	if not hf_api_key:
	st.error("Hugging Face API key not set in environment variables.")
	return None

	API_URL = f"https://api-inference.huggingface.co/models/{model}"
	headers = {
	"Authorization": f"Bearer {hf_api_key}" # Use the API key from environment variables
	}

	# Split the text into manageable chunks
	max_chunk_size = 500 # Adjust based on API limitations
	text_chunks = split_text_into_chunks(text, max_chunk_size)
	translated_chunks = []

	for chunk_index, chunk in enumerate(text_chunks):
	attempt = 0
	while attempt < max_retries:
	payload = {
	"inputs": chunk,
	}
	try:
	response = requests.post(API_URL, headers=headers, json=payload)
	if response.status_code == 503:
	# Service Unavailable, retry after delay
	attempt += 1
	wait_time = backoff_factor ** attempt
	time.sleep(wait_time)
	continue
	response.raise_for_status() # Raise an error for bad status codes
	result = response.json()

	# Handle possible errors from the API
	if isinstance(result, dict) and result.get("error"):
	st.error(f"Error from translation API: {result['error']}")
	return None

	# The API might return a list of translations
	if isinstance(result, list) and len(result) > 0:
	translated_text = result[0].get("translation_text", "No translation found.")
	elif isinstance(result, dict) and "translation_text" in result:
	translated_text = result["translation_text"]
	else:
	translated_text = "Unexpected response format from the API."

	translated_chunks.append(translated_text)
	break # Exit the retry loop if successful

	except requests.exceptions.RequestException as e:
	attempt += 1
	wait_time = backoff_factor ** attempt
	time.sleep(wait_time)
	else:
	# All retry attempts failed for this chunk
	st.error(f"Failed to translate chunk {chunk_index + 1} after {max_retries} attempts.")
	return None

	return " ".join(translated_chunks)

	# Translate button
	if st.button("Translate"):
	if not input_text.strip():
	st.warning("Please enter some text to translate.")
	else:
	with st.spinner("Translation service loading..."):
	translated = translate_text(input_text, language)
	if translated:
	st.subheader("Translated Text:")
	st.write(translated)
	else:
	st.error("Translation failed. Please try again later.")