| |
| import gradio as gr |
| import pandas as pd |
| import config |
| import os |
| import time |
| import threading |
| import shutil |
| from core import PDFEngine |
|
|
| os.environ["GRADIO_TEMP_DIR"] = config.TEMP_DIR |
|
|
| engine = PDFEngine() |
|
|
| |
| |
| |
| |
| def cleanup_cron(): |
| """ |
| Revisa cada minuto. Borra archivos Y carpetas mayores a 5 minutos. |
| """ |
| print("[INIT] 🛡️ Sistema de seguridad y auto-borrado ACTIVO.") |
| |
| while True: |
| try: |
| time.sleep(60) |
| |
| LIMIT_MINUTES = 5 |
| cutoff = time.time() - (LIMIT_MINUTES * 60) |
| |
| |
| |
| if os.path.exists(config.TEMP_DIR): |
| items = os.listdir(config.TEMP_DIR) |
| |
| if not items: |
| |
| pass |
| else: |
| |
| pass |
| |
| for filename in items: |
| filepath = os.path.join(config.TEMP_DIR, filename) |
| |
| if len(filename) > 8: |
| masked = f"{filename[:4]}****{os.path.splitext(filename)[1]}" |
| else: |
| masked = filename |
|
|
| try: |
| file_time = os.path.getmtime(filepath) |
| age_sec = time.time() - file_time |
| is_expired = file_time < cutoff |
| except FileNotFoundError: |
| continue |
|
|
| |
| if os.path.isfile(filepath): |
| if is_expired: |
| try: |
| os.remove(filepath) |
| |
| except Exception as e: |
| pass |
| else: |
| |
| pass |
| |
| |
| elif os.path.isdir(filepath): |
| if is_expired: |
| try: |
| shutil.rmtree(filepath) |
| |
| except Exception as e: |
| pass |
| else: |
| |
| pass |
| else: |
| |
| pass |
| |
| except Exception as e: |
| print(f"[CRITICAL] Error en limpieza: {e}") |
|
|
| threading.Thread(target=cleanup_cron, daemon=True).start() |
|
|
| |
| def get_duplicate_html(): |
| space_id = os.environ.get('SPACE_ID', None) |
| url = f"https://huggingface.co/spaces/{space_id}?duplicate=true" if space_id else "https://huggingface.co/spaces?duplicate=true" |
| return f""" |
| <div style="display: flex; align-items: center; gap: 10px; margin-bottom: 20px; background-color: #f9fafb; padding: 10px; border-radius: 8px; border: 1px solid #e5e7eb;"> |
| <a href="{url}" target="_blank"> |
| <img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg" alt="Duplicate Space" style="height: 30px;"> |
| </a> |
| <span style="font-size: 0.9em; color: #4b5563;"> |
| ⚡ <b>¿Va lento o quieres seguridad adicional?</b> Haz clic para duplicar este espacio y tener tu propia instancia privada. |
| </span> |
| </div> |
| """ |
|
|
| |
| def update_file_list(files): |
| if not files: return pd.DataFrame(), "" |
| data = [[i, f.split("/")[-1]] for i, f in enumerate(files)] |
| return pd.DataFrame(data, columns=["ID", "Archivo"]), ",".join([str(i) for i in range(len(files))]) |
|
|
| def process_merge(files, order_str, use_numbering): |
| if not files: return None |
| try: |
| indices = [int(x.strip()) for x in order_str.split(",") if x.strip().isdigit()] |
| return engine.merge_pdfs(files, indices, use_numbering) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
| def load_info(f): |
| if not f: return None, 0, gr.update(visible=False) |
| i = engine.get_pdf_info(f) |
| return f"📄 {i['name']} ({i['pages']} págs)", i['pages'], gr.update(visible=True) |
|
|
| def update_split_preview(f, r, t): |
| if not f or not r: return None |
| kp = engine.get_preview_indices_from_string(r, t) |
| if not kp: return None |
| return [(engine.generate_preview(f, p), f"Pág {p}") for p in kp[:8] if engine.generate_preview(f, p)] |
|
|
| def process_split(f, r): |
| if not f: return None |
| try: return engine.split_pdf_custom(f, r) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
| def process_reorder(f, o): |
| if not f: return None |
| try: return engine.reorder_pages(f, o) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
| def process_compare(fa, fb): |
| if not fa or not fb: return None |
| try: return engine.compare_pdfs_text(fa, fb) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
| def process_compress(f, l): |
| if not f: return None |
| lvls = {"Baja (Máxima calidad)": 1, "Media (Recomendado - eBook)": 3, "Alta (Pantalla - 72dpi)": 4} |
| try: return engine.compress_pdf(f, lvls.get(l, 3)) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
| def update_rot_preview(f, a_lbl): |
| if not f: return None |
| a = 0 |
| if "90" in a_lbl: a = 90 |
| elif "180" in a_lbl: a = 180 |
| elif "270" in a_lbl: a = 270 |
| return engine.get_rotated_preview(f, a) |
|
|
| def process_rotate(f, a_lbl): |
| if not f: return None |
| a = 0 |
| if "90" in a_lbl: a = 90 |
| elif "180" in a_lbl: a = 180 |
| elif "270" in a_lbl: a = 270 |
| if a == 0: return f.name |
| try: return engine.rotate_pdf(f, a) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
| def process_protect(f, p): |
| if not f or not p: return None |
| try: return engine.protect_pdf(f, p) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
| def process_meta(f, t, a, s): |
| if not f: return None |
| try: return engine.update_metadata(f, t, a, s) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
| def process_text(f): |
| if not f: return None |
| try: return engine.extract_text(f) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
| def process_watermark(f, t): |
| if not f or not t: return None |
| try: return engine.add_watermark(f, t) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
| def process_repair(f): |
| if not f: return None |
| try: return engine.repair_pdf(f) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
| |
| def process_word(f): |
| if not f: return None |
| try: return engine.pdf_to_word(f) |
| except Exception as e: raise gr.Error(str(e)) |
| def process_pptx(f): |
| if not f: return None |
| try: return engine.pdf_to_pptx(f) |
| except Exception as e: raise gr.Error(str(e)) |
| def process_p2i(f): |
| if not f: return None |
| try: return engine.pdf_to_images_zip(f) |
| except Exception as e: raise gr.Error(str(e)) |
|
|
|
|
| |
| with gr.Blocks(title=config.APP_TITLE, theme=gr.themes.Soft()) as demo: |
| |
| gr.Markdown(f"# {config.APP_TITLE}") |
| gr.Markdown(""" |
| Los archivos se procesan en memoria y se **autodestruyen tras 5 minutos**. |
| """) |
| |
| gr.HTML(get_duplicate_html()) |
|
|
| with gr.Tabs(): |
| |
| with gr.TabItem("Unir"): |
| with gr.Row(): |
| with gr.Column(scale=1): |
| m_files = gr.File(file_count="multiple", label="Archivos", file_types=[".pdf"]) |
| with gr.Column(scale=2): |
| m_tbl = gr.Dataframe(headers=["ID", "Archivo"], interactive=False) |
| m_ord = gr.Textbox(label="Orden de los documentos según ID", placeholder="Ej: 0, 2, 1") |
| m_nums = gr.Checkbox(label="Numerar páginas (1 de X)", value=False) |
| m_btn = gr.Button("Unir PDF", variant="primary") |
| m_out = gr.File(label="Resultado") |
| m_files.change(update_file_list, m_files, [m_tbl, m_ord]) |
| m_btn.click(process_merge, [m_files, m_ord, m_nums], m_out) |
|
|
| |
| with gr.TabItem("Dividir / Reordenar"): |
| dr_f = gr.File(label="PDF Origen", file_types=[".pdf"]) |
| dr_inf = gr.Markdown("") |
| dr_pg = gr.State(0) |
| with gr.Tabs(): |
| with gr.Tab("Extraer"): |
| gr.Markdown("Separa páginas en un ZIP.") |
| with gr.Row(): |
| with gr.Column(): |
| s_rng = gr.Textbox(label="Rango", placeholder="Ej: 1-3, 5") |
| with gr.Row(): |
| s_prv = gr.Button("Preview") |
| s_btn = gr.Button("Dividir (ZIP)", variant="primary") |
| with gr.Column(): |
| s_gal = gr.Gallery(height=160, columns=4, object_fit="contain", label="Vista Previa") |
| s_out = gr.File(label="ZIP") |
| s_prv.click(update_split_preview, [dr_f, s_rng, dr_pg], s_gal) |
| s_btn.click(process_split, [dr_f, s_rng], s_out) |
| with gr.Tab("Reordenar"): |
| gr.Markdown("Crea un PDF con nuevo orden.") |
| with gr.Row(): |
| with gr.Column(): |
| r_ord = gr.Textbox(label="Nuevo Orden", placeholder="Ej: 3, 1, 2, 4-10") |
| r_btn = gr.Button("Reordenar", variant="primary") |
| with gr.Column(): |
| r_out = gr.File(label="PDF Reordenado") |
| r_btn.click(process_reorder, [dr_f, r_ord], r_out) |
| dr_f.change(load_info, dr_f, [dr_inf, dr_pg, s_out]) |
|
|
| |
| with gr.TabItem("Comprimir"): |
| with gr.Row(): |
| with gr.Column(): |
| c_f = gr.File(label="PDF Original", file_types=[".pdf"]) |
| c_l = gr.Radio(["Baja (Máxima calidad)", "Media (Recomendado - eBook)", "Alta (Pantalla - 72dpi)"], label="Nivel", value="Media (Recomendado - eBook)") |
| c_b = gr.Button("Comprimir", variant="primary") |
| with gr.Column(): |
| c_out = gr.File(label="PDF Comprimido") |
| c_b.click(process_compress, [c_f, c_l], c_out) |
|
|
| |
| with gr.TabItem("Convertir Formatos"): |
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown("### A Word") |
| w_f = gr.File(label="PDF") |
| w_b = gr.Button("Convertir") |
| w_o = gr.File(label="DOCX") |
| w_b.click(process_word, w_f, w_o) |
| with gr.Column(): |
| gr.Markdown("### A Imágenes") |
| p2i_f = gr.File(label="PDF") |
| p2i_b = gr.Button("Extraer ZIP") |
| p2i_o = gr.File(label="ZIP") |
| p2i_b.click(process_p2i, p2i_f, p2i_o) |
| with gr.Column(): |
| gr.Markdown("### A PowerPoint") |
| p_f = gr.File(label="PDF") |
| p_b = gr.Button("Convertir") |
| p_o = gr.File(label="PPTX") |
| p_b.click(process_pptx, p_f, p_o) |
|
|
| |
| with gr.TabItem("Comparar"): |
| gr.Markdown("Compara el **texto** de dos versiones. Descarga un informe con cambios.") |
| with gr.Row(): |
| with gr.Column(): |
| ca = gr.File(label="Versión A (Original)", file_types=[".pdf"]) |
| with gr.Column(): |
| cb = gr.File(label="Versión B (Modificada)", file_types=[".pdf"]) |
| cb_btn = gr.Button("Generar Informe", variant="primary") |
| co = gr.File(label="Informe PDF") |
| cb_btn.click(process_compare, [ca, cb], co) |
|
|
| |
| with gr.TabItem("Extras"): |
| with gr.Tab("Rotar"): |
| with gr.Row(): |
| with gr.Column(): |
| rf = gr.File(label="PDF") |
| ra = gr.Radio(["0° (Original)", "90° (Derecha)", "180° (Invertir)", "270° (Izquierda)"], label="Rotación", value="0° (Original)") |
| rb = gr.Button("Rotar", variant="primary") |
| with gr.Column(): |
| rp = gr.Image(label="Preview") |
| ro = gr.File(label="PDF Rotado") |
| rf.change(update_rot_preview, [rf, ra], rp) |
| ra.change(update_rot_preview, [rf, ra], rp) |
| rb.click(process_rotate, [rf, ra], ro) |
| |
| with gr.Tab("Marca de Agua"): |
| gr.Markdown("Añade una marca de agua diagonal en todas las páginas.") |
| with gr.Row(): |
| with gr.Column(): |
| wf = gr.File(label="PDF") |
| wt = gr.Textbox(label="Texto Marca de Agua", placeholder="Ej: CONFIDENCIAL") |
| wb = gr.Button("Estampar", variant="primary") |
| with gr.Column(): |
| wo = gr.File(label="PDF Marcado") |
| wb.click(process_watermark, [wf, wt], wo) |
|
|
| with gr.Tab("Reparar"): |
| gr.Markdown("Intenta arreglar PDFs corruptos o dañados reescribiéndolos con Ghostscript.") |
| with gr.Row(): |
| with gr.Column(): |
| repf = gr.File(label="PDF Dañado") |
| repb = gr.Button("Reparar", variant="primary") |
| with gr.Column(): |
| repo = gr.File(label="PDF Reparado") |
| repb.click(process_repair, repf, repo) |
|
|
| with gr.Tab("Proteger"): |
| with gr.Row(): |
| with gr.Column(): |
| pf = gr.File(label="PDF") |
| pp = gr.Textbox(type="password", label="Pass") |
| pb = gr.Button("Encriptar", variant="primary") |
| with gr.Column(): |
| po = gr.File(label="Protegido") |
| pb.click(process_protect, [pf, pp], po) |
| |
| with gr.Tab("Info/Texto"): |
| with gr.Row(): |
| with gr.Column(): |
| tf = gr.File(label="PDF") |
| tb = gr.Button("Extraer Texto") |
| to = gr.File() |
| tb.click(process_text, tf, to) |
| with gr.Column(): |
| mt = gr.Textbox(label="Título") |
| ma = gr.Textbox(label="Autor") |
| ms = gr.Textbox(label="Asunto") |
| mb = gr.Button("Actualizar Meta") |
| mo = gr.File() |
| mb.click(process_meta, [tf, mt, ma, ms], mo) |
|
|
| if __name__ == "__main__": |
| demo.queue(default_concurrency_limit=2).launch( |
| server_name="0.0.0.0", |
| server_port=7860 |
| ) |