Rogaton Claude commited on
Commit
409b3a2
·
1 Parent(s): e0d73ae

Fix translation flow and rename megalaa references

Browse files

Critical fixes:
- Fix translation logic: don't use COPTIC_PROMPTS template for translation mode
- Use raw text input for translation (line 641-642)
- Display raw prompt to user instead of full_prompt (line 659)

Rename "megalaa" references to generic terms:
- "megalaa models" → "local Coptic translator"
- "megalaa specialized Coptic models" → "specialized Coptic translator models"
- Update all comments, docstrings, and UI messages
- Keep technical model names in config: Norelad/coptic-megalaa-finetuned

This ensures Coptic→English translation works without requiring Apertus-8B API token.

🤖 Generated with [Claude Code](https://claude.com/claude-code)

Co-Authored-By: Claude <noreply@anthropic.com>

Files changed (1) hide show
  1. apertus_ui.py +28 -24
apertus_ui.py CHANGED
@@ -8,10 +8,10 @@ from huggingface_hub import InferenceClient
8
  from coptic_parser_core import CopticParserCore
9
 
10
  # ========================================
11
- # MEGALAA MODEL PREPROCESSING FUNCTIONS
12
  # ========================================
13
  # These functions convert between Coptic Unicode and Greek transcription
14
- # Required for megalaa/coptic-english-translator and megalaa/english-coptic-translator
15
 
16
  COPTIC_TO_GREEK = {
17
  "ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ",
@@ -46,7 +46,7 @@ GREEK_TO_COPTIC = {
46
  }
47
 
48
  def greekify(coptic_text):
49
- """Convert Coptic Unicode to Greek transcription for megalaa models."""
50
  chars = []
51
  for c in coptic_text:
52
  l_c = c.lower()
@@ -171,13 +171,13 @@ def load_coptic_lexicon(file_path=None):
171
  return lexicon
172
 
173
  # ========================================
174
- # MEGALAA MODEL LOADING
175
  # ========================================
176
- # Load and cache megalaa translation models
177
 
178
  @st.cache_resource
179
  def load_coptic_to_english_model():
180
- """Load Coptic → English translation model (Norelad's fine-tuned megalaa)."""
181
  try:
182
  with st.spinner("📥 Loading Coptic→English model (first time only, ~600MB)..."):
183
  model_name = "Norelad/coptic-megalaa-finetuned"
@@ -196,7 +196,7 @@ def load_coptic_to_english_model():
196
 
197
  @st.cache_resource
198
  def load_english_to_coptic_model():
199
- """Load megalaa English → Coptic translation model."""
200
  try:
201
  with st.spinner("📥 Loading English→Coptic model (first time only, ~600MB)..."):
202
  model_name = "megalaa/english-coptic-translator"
@@ -214,7 +214,7 @@ def load_english_to_coptic_model():
214
  return None, None, None
215
 
216
  def translate_coptic_to_english(text, dialect='cop-sa'):
217
- """Translate Coptic text to English using megalaa model.
218
 
219
  Args:
220
  text: Coptic text to translate
@@ -256,7 +256,7 @@ def translate_coptic_to_english(text, dialect='cop-sa'):
256
  return f"Translation error: {e}"
257
 
258
  def translate_english_to_coptic(text):
259
- """Translate English text to Coptic using megalaa model."""
260
  tokenizer, model, device = load_english_to_coptic_model()
261
 
262
  if tokenizer is None or model is None:
@@ -300,8 +300,8 @@ with st.sidebar:
300
 
301
  # Translation Model Selection
302
  st.subheader("🤖 Translation Model")
303
- st.info("✨ **NEW:** Using megalaa specialized Coptic models (free, no API token needed!)")
304
- st.markdown("Models: `megalaa/coptic-english-translator` & `megalaa/english-coptic-translator`")
305
 
306
  # Optional: HuggingFace API Token for advanced features
307
  with st.expander("⚙️ Advanced: Use Apertus-8B (optional)"):
@@ -311,7 +311,7 @@ with st.sidebar:
311
  type="password",
312
  help="Optional: For Apertus-8B multi-language support"
313
  )
314
- use_apertus = st.checkbox("Use Apertus-8B instead of megalaa", value=False)
315
  if hf_token_input and use_apertus:
316
  st.success("✅ Apertus-8B enabled")
317
  elif not use_apertus:
@@ -557,7 +557,7 @@ if prompt := st.chat_input("Type your message..."):
557
 
558
  st.stop() # Don't continue to translation
559
 
560
- # Initialize inference client if API token is provided (optional for megalaa)
561
  inference_client = None
562
  if hf_token_input:
563
  inference_client = get_inference_client(hf_token_input)
@@ -584,9 +584,9 @@ if prompt := st.chat_input("Type your message..."):
584
  st.divider()
585
  st.subheader(f"🌍 Translation to {LANGUAGES[target_lang]}")
586
 
587
- with st.spinner("🤖 Translating with megalaa model..."):
588
  try:
589
- # Use megalaa for Coptic→English translation
590
  if target_lang == 'en':
591
  translation = translate_coptic_to_english(prompt, dialect=selected_lang)
592
  st.markdown(translation)
@@ -628,7 +628,7 @@ if prompt := st.chat_input("Type your message..."):
628
  st.session_state.messages.append({"role": "assistant", "content": combined_response})
629
  else:
630
  st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B. Please enable it in the sidebar.")
631
- st.info("💡 Megalaa models currently support English↔Coptic only.")
632
 
633
  except Exception as e:
634
  st.error(f"❌ Translation error: {e}")
@@ -637,7 +637,11 @@ if prompt := st.chat_input("Type your message..."):
637
 
638
  # Standard translation/analysis handling
639
  if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals():
640
- full_prompt = f"{COPTIC_PROMPTS[analysis_type]} {prompt}"
 
 
 
 
641
 
642
  # Add lexicon context for lexicon lookup
643
  if analysis_type == 'lexicon_lookup' and coptic_lexicon:
@@ -652,19 +656,19 @@ if prompt := st.chat_input("Type your message..."):
652
  else:
653
  full_prompt = prompt
654
 
655
- st.session_state.messages.append({"role": "user", "content": full_prompt})
656
 
657
  with st.chat_message("user"):
658
- st.markdown(full_prompt)
659
 
660
- # Generate response using megalaa models or Apertus API
661
  with st.chat_message("assistant"):
662
  try:
663
  # Check if this is a Coptic→English translation task
664
  if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals() and analysis_type == 'translation':
665
- # Use megalaa models for Coptic translation
666
  if 'target_lang' in locals() and target_lang == 'en':
667
- with st.spinner("🤖 Translating with megalaa model..."):
668
  translation = translate_coptic_to_english(prompt, dialect=selected_lang)
669
  st.markdown(translation)
670
  st.session_state.messages.append({"role": "assistant", "content": translation})
@@ -699,7 +703,7 @@ if prompt := st.chat_input("Type your message..."):
699
  else:
700
  st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B.")
701
  st.info("💡 Enable Apertus-8B in the sidebar for multi-language support.")
702
- st.info("💡 Megalaa models currently support English↔Coptic only.")
703
 
704
  # For non-translation tasks or other languages
705
  else:
@@ -728,7 +732,7 @@ if prompt := st.chat_input("Type your message..."):
728
  st.session_state.messages.append({"role": "assistant", "content": full_response})
729
  else:
730
  st.warning("⚠️ This feature requires Apertus-8B. Please enable it in the sidebar.")
731
- st.info("💡 Coptic→English translation works without API token using megalaa models.")
732
 
733
  except Exception as e:
734
  st.error(f"❌ Error: {str(e)}")
 
8
  from coptic_parser_core import CopticParserCore
9
 
10
  # ========================================
11
+ # COPTIC TRANSLATOR PREPROCESSING FUNCTIONS
12
  # ========================================
13
  # These functions convert between Coptic Unicode and Greek transcription
14
+ # Required for Coptic translator models (MarianMT-based)
15
 
16
  COPTIC_TO_GREEK = {
17
  "ⲁ": "α", "ⲃ": "β", "ⲅ": "γ", "ⲇ": "δ", "ⲉ": "ε", "ⲋ": "ϛ",
 
46
  }
47
 
48
  def greekify(coptic_text):
49
+ """Convert Coptic Unicode to Greek transcription for Coptic translator models."""
50
  chars = []
51
  for c in coptic_text:
52
  l_c = c.lower()
 
171
  return lexicon
172
 
173
  # ========================================
174
+ # COPTIC TRANSLATOR MODEL LOADING
175
  # ========================================
176
+ # Load and cache Coptic translation models
177
 
178
  @st.cache_resource
179
  def load_coptic_to_english_model():
180
+ """Load Coptic → English translation model (Norelad/coptic-megalaa-finetuned)."""
181
  try:
182
  with st.spinner("📥 Loading Coptic→English model (first time only, ~600MB)..."):
183
  model_name = "Norelad/coptic-megalaa-finetuned"
 
196
 
197
  @st.cache_resource
198
  def load_english_to_coptic_model():
199
+ """Load English → Coptic translation model (megalaa/english-coptic-translator)."""
200
  try:
201
  with st.spinner("📥 Loading English→Coptic model (first time only, ~600MB)..."):
202
  model_name = "megalaa/english-coptic-translator"
 
214
  return None, None, None
215
 
216
  def translate_coptic_to_english(text, dialect='cop-sa'):
217
+ """Translate Coptic text to English using local Coptic translator.
218
 
219
  Args:
220
  text: Coptic text to translate
 
256
  return f"Translation error: {e}"
257
 
258
  def translate_english_to_coptic(text):
259
+ """Translate English text to Coptic using local Coptic translator."""
260
  tokenizer, model, device = load_english_to_coptic_model()
261
 
262
  if tokenizer is None or model is None:
 
300
 
301
  # Translation Model Selection
302
  st.subheader("🤖 Translation Model")
303
+ st.info("✨ **NEW:** Using specialized Coptic translator models (free, no API token needed!)")
304
+ st.markdown("Models: `Norelad/coptic-megalaa-finetuned` & `megalaa/english-coptic-translator`")
305
 
306
  # Optional: HuggingFace API Token for advanced features
307
  with st.expander("⚙️ Advanced: Use Apertus-8B (optional)"):
 
311
  type="password",
312
  help="Optional: For Apertus-8B multi-language support"
313
  )
314
+ use_apertus = st.checkbox("Use Apertus-8B instead of local Coptic translator", value=False)
315
  if hf_token_input and use_apertus:
316
  st.success("✅ Apertus-8B enabled")
317
  elif not use_apertus:
 
557
 
558
  st.stop() # Don't continue to translation
559
 
560
+ # Initialize inference client if API token is provided (optional for local translator)
561
  inference_client = None
562
  if hf_token_input:
563
  inference_client = get_inference_client(hf_token_input)
 
584
  st.divider()
585
  st.subheader(f"🌍 Translation to {LANGUAGES[target_lang]}")
586
 
587
+ with st.spinner("🤖 Translating with local Coptic translator..."):
588
  try:
589
+ # Use local Coptic translator for Coptic→English translation
590
  if target_lang == 'en':
591
  translation = translate_coptic_to_english(prompt, dialect=selected_lang)
592
  st.markdown(translation)
 
628
  st.session_state.messages.append({"role": "assistant", "content": combined_response})
629
  else:
630
  st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B. Please enable it in the sidebar.")
631
+ st.info("💡 Local Coptic translator currently supports English↔Coptic only.")
632
 
633
  except Exception as e:
634
  st.error(f"❌ Translation error: {e}")
 
637
 
638
  # Standard translation/analysis handling
639
  if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals():
640
+ # For translation, use raw text without prompt template
641
+ if analysis_type == 'translation':
642
+ full_prompt = prompt
643
+ else:
644
+ full_prompt = f"{COPTIC_PROMPTS[analysis_type]} {prompt}"
645
 
646
  # Add lexicon context for lexicon lookup
647
  if analysis_type == 'lexicon_lookup' and coptic_lexicon:
 
656
  else:
657
  full_prompt = prompt
658
 
659
+ st.session_state.messages.append({"role": "user", "content": prompt})
660
 
661
  with st.chat_message("user"):
662
+ st.markdown(prompt)
663
 
664
+ # Generate response using local Coptic translator or Apertus API
665
  with st.chat_message("assistant"):
666
  try:
667
  # Check if this is a Coptic→English translation task
668
  if selected_lang in ['cop', 'cop-sa', 'cop-bo'] and 'analysis_type' in locals() and analysis_type == 'translation':
669
+ # Use local Coptic translator (Norelad/coptic-megalaa-finetuned)
670
  if 'target_lang' in locals() and target_lang == 'en':
671
+ with st.spinner("🤖 Translating with local Coptic translator..."):
672
  translation = translate_coptic_to_english(prompt, dialect=selected_lang)
673
  st.markdown(translation)
674
  st.session_state.messages.append({"role": "assistant", "content": translation})
 
703
  else:
704
  st.warning(f"⚠️ Translation to {target_language_name} requires Apertus-8B.")
705
  st.info("💡 Enable Apertus-8B in the sidebar for multi-language support.")
706
+ st.info("💡 Local Coptic translator currently supports English↔Coptic only.")
707
 
708
  # For non-translation tasks or other languages
709
  else:
 
732
  st.session_state.messages.append({"role": "assistant", "content": full_response})
733
  else:
734
  st.warning("⚠️ This feature requires Apertus-8B. Please enable it in the sidebar.")
735
+ st.info("💡 Coptic→English translation works without API token using local Coptic translator.")
736
 
737
  except Exception as e:
738
  st.error(f"❌ Error: {str(e)}")