import streamlit as st
from PIL import Image, ImageFilter, ImageEnhance
import tempfile
import os
import easyocr
from transformers import MT5ForConditionalGeneration, MT5Tokenizer, pipeline

# Load tokenizer and model once at startup with proper config to avoid warnings
tokenizer = MT5Tokenizer.from_pretrained("google/mt5-small", legacy=False, use_fast=False)
model = MT5ForConditionalGeneration.from_pretrained("google/mt5-small")
pipe = pipeline("text2text-generation", model=model, tokenizer=tokenizer)

# Preprocess uploaded image to improve OCR accuracy
def preprocess_image_pillow(image):
    img = image.convert("L")  # Grayscale
    width, height = img.size
    img = img.resize((width * 2, height * 2), Image.LANCZOS)
    enhancer = ImageEnhance.Contrast(img)
    img = enhancer.enhance(2.0)
    img = img.filter(ImageFilter.SHARPEN)
    return img

# Streamlit App UI
st.set_page_config(page_title="📝 Telugu OCR & Correction", layout="centered")
st.title("📝 Telugu Handwriting to Typed Text")

uploaded_file = st.file_uploader("📤 Upload Telugu handwritten image", type=["png", "jpg", "jpeg"])

if uploaded_file:
    image = Image.open(uploaded_file).convert("RGB")
    enhanced_image = preprocess_image_pillow(image)
    st.image(enhanced_image, caption="Preprocessed Image", use_container_width=True)

    # Save temporarily for EasyOCR
    with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp:
        enhanced_image.save(temp.name)

    try:
        reader = easyocr.Reader(['te'], gpu=False)
        results = reader.readtext(temp.name)

        raw_text = "\n".join([text for (_, text, _) in results])

        st.markdown("### 📄 OCR Extracted Text")
        st.text_area("📝 Telugu OCR", raw_text, height=150)

        # Generate correction using mT5
        if raw_text.strip():
            st.markdown("### ✅ LLM Corrected Telugu Text")
            prompt = f"Correct the following Telugu text spelling and grammar:\n{raw_text}"
            try:
                response = pipe(prompt, max_new_tokens=256, do_sample=False)[0]['generated_text']
                st.text_area("🤖 Corrected Text", response, height=150)
                st.download_button("⬇️ Download", response, file_name="corrected_telugu.txt")
            except Exception as e:
                st.error(f"LLM Correction Error: {e}")
        else:
            st.warning("OCR did not extract any usable Telugu text.")
    finally:
        # Always remove the temp file
        if os.path.exists(temp.name):
            os.remove(temp.name)