Rogaton
Claude
commited on
Commit
·
409b3a2
1
Parent(s):
e0d73ae
Fix translation flow and rename megalaa references
Browse filesCritical fixes:
- Fix translation logic: don't use COPTIC_PROMPTS template for translation mode
- Use raw text input for translation (line 641-642)
- Display raw prompt to user instead of full_prompt (line 659)
Rename "megalaa" references to generic terms:
- "megalaa models" → "local Coptic translator"
- "megalaa specialized Coptic models" → "specialized Coptic translator models"
- Update all comments, docstrings, and UI messages
- Keep technical model names in config: Norelad/coptic-megalaa-finetuned
This ensures Coptic→English translation works without requiring Apertus-8B API token.
🤖 Generated with [Claude Code](https://claude.com/claude-code)
Co-Authored-By: Claude <noreply@anthropic.com>
- apertus_ui.py +28 -24
apertus_ui.py
CHANGED
|
@@ -8,10 +8,10 @@ from huggingface_hub import InferenceClient
|
|
| 8 |
from coptic_parser_core import CopticParserCore
|
| 9 |
|
| 10 |
# ========================================
|
| 11 |
-
#
|
| 12 |
# ========================================
|
| 13 |
# These functions convert between Coptic Unicode and Greek transcription
|
| 14 |
-
# Required for
|
| 15 |
|
| 16 |
COPTIC_TO_GREEK = {
|
| 17 |
"ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ",
|
|
@@ -46,7 +46,7 @@ GREEK_TO_COPTIC = {
|
|
| 46 |
}
|
| 47 |
|
| 48 |
def greekify(coptic_text):
|
| 49 |
-
"""Convert Coptic Unicode to Greek transcription for
|
| 50 |
chars = []
|
| 51 |
for c in coptic_text:
|
| 52 |
l_c = c.lower()
|
|
@@ -171,13 +171,13 @@ def load_coptic_lexicon(file_path=None):
|
|
| 171 |
return lexicon
|
| 172 |
|
| 173 |
# ========================================
|
| 174 |
-
#
|
| 175 |
# ========================================
|
| 176 |
-
# Load and cache
|
| 177 |
|
| 178 |
@st.cache_resource
|
| 179 |
def load_coptic_to_english_model():
|
| 180 |
-
"""Load Coptic → English translation model (Norelad
|
| 181 |
try:
|
| 182 |
with st.spinner("📥 Loading Coptic→English model (first time only, ~600MB)..."):
|
| 183 |
model_name = "Norelad/coptic-megalaa-finetuned"
|
|
@@ -196,7 +196,7 @@ def load_coptic_to_english_model():
|
|
| 196 |
|
| 197 |
@st.cache_resource
|
| 198 |
def load_english_to_coptic_model():
|
| 199 |
-
"""Load
|
| 200 |
try:
|
| 201 |
with st.spinner("📥 Loading English→Coptic model (first time only, ~600MB)..."):
|
| 202 |
model_name = "megalaa/english-coptic-translator"
|
|
@@ -214,7 +214,7 @@ def load_english_to_coptic_model():
|
|
| 214 |
return None, None, None
|
| 215 |
|
| 216 |
def translate_coptic_to_english(text, dialect='cop-sa'):
|
| 217 |
-
"""Translate Coptic text to English using
|
| 218 |
|
| 219 |
Args:
|
| 220 |
text: Coptic text to translate
|
|
@@ -256,7 +256,7 @@ def translate_coptic_to_english(text, dialect='cop-sa'):
|
|
| 256 |
return f"Translation error: {e}"
|
| 257 |
|
| 258 |
def translate_english_to_coptic(text):
|
| 259 |
-
"""Translate English text to Coptic using
|
| 260 |
tokenizer, model, device = load_english_to_coptic_model()
|
| 261 |
|
| 262 |
if tokenizer is None or model is None:
|
|
@@ -300,8 +300,8 @@ with st.sidebar:
|
|
| 300 |
|
| 301 |
# Translation Model Selection
|
| 302 |
st.subheader("🤖 Translation Model")
|
| 303 |
-
st.info("✨ **NEW:** Using
|
| 304 |
-
st.markdown("Models: `
|
| 305 |
|
| 306 |
# Optional: HuggingFace API Token for advanced features
|
| 307 |
with st.expander("⚙️ Advanced: Use Apertus-8B (optional)"):
|
|
@@ -311,7 +311,7 @@ with st.sidebar:
|
|
| 311 |
type="password",
|
| 312 |
help="Optional: For Apertus-8B multi-language support"
|
| 313 |
)
|
| 314 |
-
use_apertus = st.checkbox("Use Apertus-8B instead of
|
| 315 |
if hf_token_input and use_apertus:
|
| 316 |
st.success("✅ Apertus-8B enabled")
|
| 317 |
elif not use_apertus:
|
|
@@ -557,7 +557,7 @@ if prompt := st.chat_input("Type your message..."):
|
|
| 557 |
|
| 558 |
st.stop() # Don't continue to translation
|
| 559 |
|
| 560 |
-
# Initialize inference client if API token is provided (optional for
|
| 561 |
inference_client = None
|
| 562 |
if hf_token_input:
|
| 563 |
inference_client = get_inference_client(hf_token_input)
|
|
@@ -584,9 +584,9 @@ if prompt := st.chat_input("Type your message..."):
|
|
| 584 |
st.divider()
|
| 585 |
st.subheader(f"🌍 Translation to {LANGUAGES[target_lang]}")
|
| 586 |
|
| 587 |
-
with st.spinner("🤖 Translating with
|
| 588 |
try:
|
| 589 |
-
# Use
|
| 590 |
if target_lang == 'en':
|
| 591 |
translation = translate_coptic_to_english(prompt, dialect=selected_lang)
|
| 592 |
st.markdown(translation)
|
|
@@ -628,7 +628,7 @@ if prompt := st.chat_input("Type your message..."):
|
|
| 628 |
st.session_state.messages.append({"role": "assistant", "content": combined_response})
|
| 629 |
else:
|
| 630 |
st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B. Please enable it in the sidebar.")
|
| 631 |
-
st.info("💡
|
| 632 |
|
| 633 |
except Exception as e:
|
| 634 |
st.error(f"❌ Translation error: {e}")
|
|
@@ -637,7 +637,11 @@ if prompt := st.chat_input("Type your message..."):
|
|
| 637 |
|
| 638 |
# Standard translation/analysis handling
|
| 639 |
if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals():
|
| 640 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 641 |
|
| 642 |
# Add lexicon context for lexicon lookup
|
| 643 |
if analysis_type == 'lexicon_lookup' and coptic_lexicon:
|
|
@@ -652,19 +656,19 @@ if prompt := st.chat_input("Type your message..."):
|
|
| 652 |
else:
|
| 653 |
full_prompt = prompt
|
| 654 |
|
| 655 |
-
st.session_state.messages.append({"role": "user", "content":
|
| 656 |
|
| 657 |
with st.chat_message("user"):
|
| 658 |
-
st.markdown(
|
| 659 |
|
| 660 |
-
# Generate response using
|
| 661 |
with st.chat_message("assistant"):
|
| 662 |
try:
|
| 663 |
# Check if this is a Coptic→English translation task
|
| 664 |
if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals() and analysis_type == 'translation':
|
| 665 |
-
# Use
|
| 666 |
if 'target_lang' in locals() and target_lang == 'en':
|
| 667 |
-
with st.spinner("🤖 Translating with
|
| 668 |
translation = translate_coptic_to_english(prompt, dialect=selected_lang)
|
| 669 |
st.markdown(translation)
|
| 670 |
st.session_state.messages.append({"role": "assistant", "content": translation})
|
|
@@ -699,7 +703,7 @@ if prompt := st.chat_input("Type your message..."):
|
|
| 699 |
else:
|
| 700 |
st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B.")
|
| 701 |
st.info("💡 Enable Apertus-8B in the sidebar for multi-language support.")
|
| 702 |
-
st.info("💡
|
| 703 |
|
| 704 |
# For non-translation tasks or other languages
|
| 705 |
else:
|
|
@@ -728,7 +732,7 @@ if prompt := st.chat_input("Type your message..."):
|
|
| 728 |
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 729 |
else:
|
| 730 |
st.warning("⚠️ This feature requires Apertus-8B. Please enable it in the sidebar.")
|
| 731 |
-
st.info("💡 Coptic→English translation works without API token using
|
| 732 |
|
| 733 |
except Exception as e:
|
| 734 |
st.error(f"❌ Error: {str(e)}")
|
|
|
|
| 8 |
from coptic_parser_core import CopticParserCore
|
| 9 |
|
| 10 |
# ========================================
|
| 11 |
+
# COPTIC TRANSLATOR PREPROCESSING FUNCTIONS
|
| 12 |
# ========================================
|
| 13 |
# These functions convert between Coptic Unicode and Greek transcription
|
| 14 |
+
# Required for Coptic translator models (MarianMT-based)
|
| 15 |
|
| 16 |
COPTIC_TO_GREEK = {
|
| 17 |
"ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ",
|
|
|
|
| 46 |
}
|
| 47 |
|
| 48 |
def greekify(coptic_text):
|
| 49 |
+
"""Convert Coptic Unicode to Greek transcription for Coptic translator models."""
|
| 50 |
chars = []
|
| 51 |
for c in coptic_text:
|
| 52 |
l_c = c.lower()
|
|
|
|
| 171 |
return lexicon
|
| 172 |
|
| 173 |
# ========================================
|
| 174 |
+
# COPTIC TRANSLATOR MODEL LOADING
|
| 175 |
# ========================================
|
| 176 |
+
# Load and cache Coptic translation models
|
| 177 |
|
| 178 |
@st.cache_resource
|
| 179 |
def load_coptic_to_english_model():
|
| 180 |
+
"""Load Coptic → English translation model (Norelad/coptic-megalaa-finetuned)."""
|
| 181 |
try:
|
| 182 |
with st.spinner("📥 Loading Coptic→English model (first time only, ~600MB)..."):
|
| 183 |
model_name = "Norelad/coptic-megalaa-finetuned"
|
|
|
|
| 196 |
|
| 197 |
@st.cache_resource
|
| 198 |
def load_english_to_coptic_model():
|
| 199 |
+
"""Load English → Coptic translation model (megalaa/english-coptic-translator)."""
|
| 200 |
try:
|
| 201 |
with st.spinner("📥 Loading English→Coptic model (first time only, ~600MB)..."):
|
| 202 |
model_name = "megalaa/english-coptic-translator"
|
|
|
|
| 214 |
return None, None, None
|
| 215 |
|
| 216 |
def translate_coptic_to_english(text, dialect='cop-sa'):
|
| 217 |
+
"""Translate Coptic text to English using local Coptic translator.
|
| 218 |
|
| 219 |
Args:
|
| 220 |
text: Coptic text to translate
|
|
|
|
| 256 |
return f"Translation error: {e}"
|
| 257 |
|
| 258 |
def translate_english_to_coptic(text):
|
| 259 |
+
"""Translate English text to Coptic using local Coptic translator."""
|
| 260 |
tokenizer, model, device = load_english_to_coptic_model()
|
| 261 |
|
| 262 |
if tokenizer is None or model is None:
|
|
|
|
| 300 |
|
| 301 |
# Translation Model Selection
|
| 302 |
st.subheader("🤖 Translation Model")
|
| 303 |
+
st.info("✨ **NEW:** Using specialized Coptic translator models (free, no API token needed!)")
|
| 304 |
+
st.markdown("Models: `Norelad/coptic-megalaa-finetuned` & `megalaa/english-coptic-translator`")
|
| 305 |
|
| 306 |
# Optional: HuggingFace API Token for advanced features
|
| 307 |
with st.expander("⚙️ Advanced: Use Apertus-8B (optional)"):
|
|
|
|
| 311 |
type="password",
|
| 312 |
help="Optional: For Apertus-8B multi-language support"
|
| 313 |
)
|
| 314 |
+
use_apertus = st.checkbox("Use Apertus-8B instead of local Coptic translator", value=False)
|
| 315 |
if hf_token_input and use_apertus:
|
| 316 |
st.success("✅ Apertus-8B enabled")
|
| 317 |
elif not use_apertus:
|
|
|
|
| 557 |
|
| 558 |
st.stop() # Don't continue to translation
|
| 559 |
|
| 560 |
+
# Initialize inference client if API token is provided (optional for local translator)
|
| 561 |
inference_client = None
|
| 562 |
if hf_token_input:
|
| 563 |
inference_client = get_inference_client(hf_token_input)
|
|
|
|
| 584 |
st.divider()
|
| 585 |
st.subheader(f"🌍 Translation to {LANGUAGES[target_lang]}")
|
| 586 |
|
| 587 |
+
with st.spinner("🤖 Translating with local Coptic translator..."):
|
| 588 |
try:
|
| 589 |
+
# Use local Coptic translator for Coptic→English translation
|
| 590 |
if target_lang == 'en':
|
| 591 |
translation = translate_coptic_to_english(prompt, dialect=selected_lang)
|
| 592 |
st.markdown(translation)
|
|
|
|
| 628 |
st.session_state.messages.append({"role": "assistant", "content": combined_response})
|
| 629 |
else:
|
| 630 |
st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B. Please enable it in the sidebar.")
|
| 631 |
+
st.info("💡 Local Coptic translator currently supports English↔Coptic only.")
|
| 632 |
|
| 633 |
except Exception as e:
|
| 634 |
st.error(f"❌ Translation error: {e}")
|
|
|
|
| 637 |
|
| 638 |
# Standard translation/analysis handling
|
| 639 |
if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals():
|
| 640 |
+
# For translation, use raw text without prompt template
|
| 641 |
+
if analysis_type == 'translation':
|
| 642 |
+
full_prompt = prompt
|
| 643 |
+
else:
|
| 644 |
+
full_prompt = f"{COPTIC_PROMPTS[analysis_type]} {prompt}"
|
| 645 |
|
| 646 |
# Add lexicon context for lexicon lookup
|
| 647 |
if analysis_type == 'lexicon_lookup' and coptic_lexicon:
|
|
|
|
| 656 |
else:
|
| 657 |
full_prompt = prompt
|
| 658 |
|
| 659 |
+
st.session_state.messages.append({"role": "user", "content": prompt})
|
| 660 |
|
| 661 |
with st.chat_message("user"):
|
| 662 |
+
st.markdown(prompt)
|
| 663 |
|
| 664 |
+
# Generate response using local Coptic translator or Apertus API
|
| 665 |
with st.chat_message("assistant"):
|
| 666 |
try:
|
| 667 |
# Check if this is a Coptic→English translation task
|
| 668 |
if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals() and analysis_type == 'translation':
|
| 669 |
+
# Use local Coptic translator (Norelad/coptic-megalaa-finetuned)
|
| 670 |
if 'target_lang' in locals() and target_lang == 'en':
|
| 671 |
+
with st.spinner("🤖 Translating with local Coptic translator..."):
|
| 672 |
translation = translate_coptic_to_english(prompt, dialect=selected_lang)
|
| 673 |
st.markdown(translation)
|
| 674 |
st.session_state.messages.append({"role": "assistant", "content": translation})
|
|
|
|
| 703 |
else:
|
| 704 |
st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B.")
|
| 705 |
st.info("💡 Enable Apertus-8B in the sidebar for multi-language support.")
|
| 706 |
+
st.info("💡 Local Coptic translator currently supports English↔Coptic only.")
|
| 707 |
|
| 708 |
# For non-translation tasks or other languages
|
| 709 |
else:
|
|
|
|
| 732 |
st.session_state.messages.append({"role": "assistant", "content": full_response})
|
| 733 |
else:
|
| 734 |
st.warning("⚠️ This feature requires Apertus-8B. Please enable it in the sidebar.")
|
| 735 |
+
st.info("💡 Coptic→English translation works without API token using local Coptic translator.")
|
| 736 |
|
| 737 |
except Exception as e:
|
| 738 |
st.error(f"❌ Error: {str(e)}")
|