Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| import os | |
| import PyPDF2 | |
| import docx | |
| import time | |
| #------------------------------------------------------------------------ | |
| # Configurations | |
| #------------------------------------------------------------------------ | |
| # Streamlit page setup | |
| st.set_page_config( | |
| page_title="Text Translator", | |
| page_icon=":speech_balloon:", | |
| layout="centered", | |
| initial_sidebar_state="auto", | |
| menu_items={ | |
| 'Get Help': 'mailto:info@mtss.ai', | |
| 'About': "This app is built to support translation tasks" | |
| } | |
| ) | |
| #------------------------------------------------------------------------ | |
| # Title | |
| #------------------------------------------------------------------------ | |
| # Set the title of the app | |
| st.title("Text Translator") | |
| # Description | |
| st.write(""" | |
| Choose a target language, enter your text or upload a document, and click **Translate** to get the translated text. | |
| """) | |
| #------------------------------------------------------------------------ | |
| # Sidebar | |
| #------------------------------------------------------------------------ | |
| with st.sidebar: | |
| # Password input field | |
| # password = st.text_input("Enter Password:", type="password") | |
| # Set the desired width in pixels | |
| image_width = 300 | |
| # Define the path to the image | |
| # image_path = "MTSSai_logo.png" | |
| # Display the image | |
| # st.image(image_path, width=image_width) | |
| # Set the title | |
| st.title("MTSS.ai") | |
| # Toggle for Help and Report a Bug | |
| with st.expander("Need help and report a bug"): | |
| st.write(""" | |
| **Contact**: Cheyne LeVesseur, PhD | |
| **Email**: info@mtss.ai | |
| """) | |
| st.divider() | |
| st.subheader('User Instructions') | |
| # Principles text with Markdown formatting | |
| User_Instructions = """ | |
| - **Step 1**: Provide either text input or upload a document for translation. | |
| - **Step 2**: Click Translate. | |
| - **Step 3**: Sit back, relax, and let the magic happen! | |
| """ | |
| st.markdown(User_Instructions) | |
| #------------------------------------------------------------------------ | |
| # Functions | |
| #------------------------------------------------------------------------ | |
| # Language to model mapping | |
| language_model_mapping = { | |
| "Spanish": "Helsinki-NLP/opus-mt-en-es", | |
| "Arabic": "Helsinki-NLP/opus-mt-en-ar", | |
| "Chinese": "Helsinki-NLP/opus-mt-en-zh", | |
| "Albanian": "Helsinki-NLP/opus-mt-en-sq", | |
| "French": "Helsinki-NLP/opus-mt-en-fr", | |
| "German": "Helsinki-NLP/opus-mt-en-de", | |
| "Japanese": "Helsinki-NLP/opus-mt-en-jap", | |
| "Italian": "Helsinki-NLP/opus-mt-en-it", | |
| "Dutch": "Helsinki-NLP/opus-mt-en-nl", | |
| "Hindi": "Helsinki-NLP/opus-mt-en-hi", | |
| "Russian": "Helsinki-NLP/opus-mt-en-ru", | |
| "Indonesian": "Helsinki-NLP/opus-mt-en-id", | |
| "Greek": "Helsinki-NLP/opus-mt-en-el", | |
| "Danish": "Helsinki-NLP/opus-mt-en-da", | |
| "Swedish": "Helsinki-NLP/opus-mt-en-sv", | |
| "Czech": "Helsinki-NLP/opus-mt-en-cs", | |
| "Catalan": "Helsinki-NLP/opus-mt-en-ca", | |
| "Bulgarian": "Helsinki-NLP/opus-mt-en-bg", | |
| "Estonian": "Helsinki-NLP/opus-mt-en-et", | |
| "Basque": "Helsinki-NLP/opus-mt-en-eu", | |
| "Vietnamese": "Helsinki-NLP/opus-mt-en-vi", | |
| "Finnish": "Helsinki-NLP/opus-mt-en-fi", | |
| "Hebrew": "Helsinki-NLP/opus-mt-en-he", | |
| "Azerbaijani": "Helsinki-NLP/opus-mt-en-az", | |
| "Afrikaans": "Helsinki-NLP/opus-mt-en-af", | |
| "Armenian": "Helsinki-NLP/opus-mt-en-hy", | |
| "Hungarian": "Helsinki-NLP/opus-mt-en-hu" | |
| } | |
| # Dropdown for language selection | |
| language = st.selectbox( | |
| "Select target language", | |
| list(language_model_mapping.keys()) | |
| ) | |
| # Input method selection | |
| input_option = st.radio("Select input method:", ("Text Input", "Upload Document")) | |
| input_text = "" | |
| # Functions to extract text from files | |
| def extract_text_from_pdf(pdf_file): | |
| try: | |
| pdf_reader = PyPDF2.PdfReader(pdf_file) | |
| text = "" | |
| for page_num in range(len(pdf_reader.pages)): | |
| page = pdf_reader.pages[page_num] | |
| extracted_text = page.extract_text() | |
| if extracted_text: | |
| text += extracted_text + "\n" | |
| return text | |
| except Exception as e: | |
| st.error(f"Error extracting text from PDF: {e}") | |
| return "" | |
| def extract_text_from_docx(docx_file): | |
| try: | |
| doc = docx.Document(docx_file) | |
| text = "" | |
| for para in doc.paragraphs: | |
| text += para.text + "\n" | |
| return text | |
| except Exception as e: | |
| st.error(f"Error extracting text from Word document: {e}") | |
| return "" | |
| # Text area or file uploader based on input method | |
| if input_option == "Text Input": | |
| input_text = st.text_area("Enter text to translate", height=200) | |
| elif input_option == "Upload Document": | |
| uploaded_file = st.file_uploader("Choose a file", type=["pdf", "docx"]) | |
| if uploaded_file is not None: | |
| file_extension = os.path.splitext(uploaded_file.name)[1].lower() | |
| if file_extension == ".pdf": | |
| with st.spinner("Extracting text from PDF..."): | |
| input_text = extract_text_from_pdf(uploaded_file) | |
| elif file_extension == ".docx": | |
| with st.spinner("Extracting text from Word document..."): | |
| input_text = extract_text_from_docx(uploaded_file) | |
| else: | |
| st.error("Unsupported file type.") | |
| input_text = "" | |
| # Function to split text into chunks | |
| def split_text_into_chunks(text, max_chunk_size): | |
| return [text[i:i+max_chunk_size] for i in range(0, len(text), max_chunk_size)] | |
| # Function to perform the translation with retry mechanism | |
| def translate_text(text, target_lang, max_retries=5, backoff_factor=2): | |
| model = language_model_mapping.get(target_lang) | |
| if not model: | |
| st.error("Unsupported language selected.") | |
| return None | |
| # Retrieve Hugging Face API key from environment variables | |
| hf_api_key = os.getenv('HF_API_KEY') | |
| if not hf_api_key: | |
| st.error("Hugging Face API key not set in environment variables.") | |
| return None | |
| API_URL = f"https://api-inference.huggingface.co/models/{model}" | |
| headers = { | |
| "Authorization": f"Bearer {hf_api_key}" # Use the API key from environment variables | |
| } | |
| # Split the text into manageable chunks | |
| max_chunk_size = 500 # Adjust based on API limitations | |
| text_chunks = split_text_into_chunks(text, max_chunk_size) | |
| translated_chunks = [] | |
| for chunk_index, chunk in enumerate(text_chunks): | |
| attempt = 0 | |
| while attempt < max_retries: | |
| payload = { | |
| "inputs": chunk, | |
| } | |
| try: | |
| response = requests.post(API_URL, headers=headers, json=payload) | |
| if response.status_code == 503: | |
| # Service Unavailable, retry after delay | |
| attempt += 1 | |
| wait_time = backoff_factor ** attempt | |
| time.sleep(wait_time) | |
| continue | |
| response.raise_for_status() # Raise an error for bad status codes | |
| result = response.json() | |
| # Handle possible errors from the API | |
| if isinstance(result, dict) and result.get("error"): | |
| st.error(f"Error from translation API: {result['error']}") | |
| return None | |
| # The API might return a list of translations | |
| if isinstance(result, list) and len(result) > 0: | |
| translated_text = result[0].get("translation_text", "No translation found.") | |
| elif isinstance(result, dict) and "translation_text" in result: | |
| translated_text = result["translation_text"] | |
| else: | |
| translated_text = "Unexpected response format from the API." | |
| translated_chunks.append(translated_text) | |
| break # Exit the retry loop if successful | |
| except requests.exceptions.RequestException as e: | |
| attempt += 1 | |
| wait_time = backoff_factor ** attempt | |
| time.sleep(wait_time) | |
| else: | |
| # All retry attempts failed for this chunk | |
| st.error(f"Failed to translate chunk {chunk_index + 1} after {max_retries} attempts.") | |
| return None | |
| return " ".join(translated_chunks) | |
| # Translate button | |
| if st.button("Translate"): | |
| if not input_text.strip(): | |
| st.warning("Please enter some text to translate.") | |
| else: | |
| with st.spinner("Translation service loading..."): | |
| translated = translate_text(input_text, language) | |
| if translated: | |
| st.subheader("Translated Text:") | |
| st.write(translated) | |
| else: | |
| st.error("Translation failed. Please try again later.") |