Spaces:

DaniFera
/

PDFree

Sleeping

App Files Files Community

PDFree / app.py

DaniFera

Update app.py

0a5d77a verified 4 months ago

raw

history blame contribute delete

15.6 kB

	## Versión Alfa 1.0
	import gradio as gr
	import pandas as pd
	import config
	import os
	import time
	import threading
	import shutil
	from core import PDFEngine

	os.environ["GRADIO_TEMP_DIR"] = config.TEMP_DIR

	engine = PDFEngine()

	# --- SEGURIDAD: GARBAGE COLLECTOR ---
	# NOTA PARA DUPLICADORES DEL ESPACIO:
	# Esta función limpia los archivos cada minuto. Por defecto está en modo SILENCIOSO.
	# Si quieres ver qué está pasando, DESCOMENTA las líneas que empiezan por "#print".
	def cleanup_cron():
	"""
	Revisa cada minuto. Borra archivos Y carpetas mayores a 5 minutos.
	"""
	print("[INIT] 🛡️ Sistema de seguridad y auto-borrado ACTIVO.")

	while True:
	try:
	time.sleep(60)

	LIMIT_MINUTES = 5
	cutoff = time.time() - (LIMIT_MINUTES * 60)

	#print(f"--- [SEGURIDAD] Ronda de limpieza: {time.strftime('%H:%M:%S')} ---")

	if os.path.exists(config.TEMP_DIR):
	items = os.listdir(config.TEMP_DIR)

	if not items:
	#print("[ESTADO] Carpeta limpia (0 elementos).")
	pass
	else:
	#print(f"[ESTADO] Analizando {len(items)} elementos...")
	pass

	for filename in items:
	filepath = os.path.join(config.TEMP_DIR, filename)

	if len(filename) > 8:
	masked = f"{filename[:4]}****{os.path.splitext(filename)[1]}"
	else:
	masked = filename

	try:
	file_time = os.path.getmtime(filepath)
	age_sec = time.time() - file_time
	is_expired = file_time < cutoff
	except FileNotFoundError:
	continue

	# CASO 1: ARCHIVO
	if os.path.isfile(filepath):
	if is_expired:
	try:
	os.remove(filepath)
	#print(f"❌ [BORRADO] Archivo {masked}")
	except Exception as e:
	pass
	else:
	#print(f"✅ [VIGENTE] Archivo {masked}")
	pass

	# CASO 2: CARPETA
	elif os.path.isdir(filepath):
	if is_expired:
	try:
	shutil.rmtree(filepath)
	#print(f"🗑️ [LIMPIEZA] Carpeta {masked} eliminada")
	except Exception as e:
	pass
	else:
	#print(f"📂 [VIGENTE] Carpeta {masked}")
	pass
	else:
	#print("[INFO] Carpeta temporal aún no creada.")
	pass

	except Exception as e:
	print(f"[CRITICAL] Error en limpieza: {e}")

	threading.Thread(target=cleanup_cron, daemon=True).start()

	# --- GENERADOR DE ENLACE ---
	def get_duplicate_html():
	space_id = os.environ.get('SPACE_ID', None)
	url = f"https://huggingface.co/spaces/{space_id}?duplicate=true" if space_id else "https://huggingface.co/spaces?duplicate=true"
	return f"""
	<div style="display: flex; align-items: center; gap: 10px; margin-bottom: 20px; background-color: #f9fafb; padding: 10px; border-radius: 8px; border: 1px solid #e5e7eb;">
	<a href="{url}" target="_blank">
	<img src="https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg" alt="Duplicate Space" style="height: 30px;">
	</a>
	<span style="font-size: 0.9em; color: #4b5563;">
	⚡ <b>¿Va lento o quieres seguridad adicional?</b> Haz clic para duplicar este espacio y tener tu propia instancia privada.
	</span>
	</div>
	"""

	# --- WRAPPERS ---
	def update_file_list(files):
	if not files: return pd.DataFrame(), ""
	data = [[i, f.split("/")[-1]] for i, f in enumerate(files)]
	return pd.DataFrame(data, columns=["ID", "Archivo"]), ",".join([str(i) for i in range(len(files))])

	def process_merge(files, order_str, use_numbering):
	if not files: return None
	try:
	indices = [int(x.strip()) for x in order_str.split(",") if x.strip().isdigit()]
	return engine.merge_pdfs(files, indices, use_numbering)
	except Exception as e: raise gr.Error(str(e))

	def load_info(f):
	if not f: return None, 0, gr.update(visible=False)
	i = engine.get_pdf_info(f)
	return f"📄 {i['name']} ({i['pages']} págs)", i['pages'], gr.update(visible=True)

	def update_split_preview(f, r, t):
	if not f or not r: return None
	kp = engine.get_preview_indices_from_string(r, t)
	if not kp: return None
	return [(engine.generate_preview(f, p), f"Pág {p}") for p in kp[:8] if engine.generate_preview(f, p)]

	def process_split(f, r):
	if not f: return None
	try: return engine.split_pdf_custom(f, r)
	except Exception as e: raise gr.Error(str(e))

	def process_reorder(f, o):
	if not f: return None
	try: return engine.reorder_pages(f, o)
	except Exception as e: raise gr.Error(str(e))

	def process_compare(fa, fb):
	if not fa or not fb: return None
	try: return engine.compare_pdfs_text(fa, fb)
	except Exception as e: raise gr.Error(str(e))

	def process_compress(f, l):
	if not f: return None
	lvls = {"Baja (Máxima calidad)": 1, "Media (Recomendado - eBook)": 3, "Alta (Pantalla - 72dpi)": 4}
	try: return engine.compress_pdf(f, lvls.get(l, 3))
	except Exception as e: raise gr.Error(str(e))

	def update_rot_preview(f, a_lbl):
	if not f: return None
	a = 0
	if "90" in a_lbl: a = 90
	elif "180" in a_lbl: a = 180
	elif "270" in a_lbl: a = 270
	return engine.get_rotated_preview(f, a)

	def process_rotate(f, a_lbl):
	if not f: return None
	a = 0
	if "90" in a_lbl: a = 90
	elif "180" in a_lbl: a = 180
	elif "270" in a_lbl: a = 270
	if a == 0: return f.name
	try: return engine.rotate_pdf(f, a)
	except Exception as e: raise gr.Error(str(e))

	def process_protect(f, p):
	if not f or not p: return None
	try: return engine.protect_pdf(f, p)
	except Exception as e: raise gr.Error(str(e))

	def process_meta(f, t, a, s):
	if not f: return None
	try: return engine.update_metadata(f, t, a, s)
	except Exception as e: raise gr.Error(str(e))

	def process_text(f):
	if not f: return None
	try: return engine.extract_text(f)
	except Exception as e: raise gr.Error(str(e))

	def process_watermark(f, t):
	if not f or not t: return None
	try: return engine.add_watermark(f, t)
	except Exception as e: raise gr.Error(str(e))

	def process_repair(f):
	if not f: return None
	try: return engine.repair_pdf(f)
	except Exception as e: raise gr.Error(str(e))

	# WRAPPERS OFFICE
	def process_word(f):
	if not f: return None
	try: return engine.pdf_to_word(f)
	except Exception as e: raise gr.Error(str(e))
	def process_pptx(f):
	if not f: return None
	try: return engine.pdf_to_pptx(f)
	except Exception as e: raise gr.Error(str(e))
	def process_p2i(f):
	if not f: return None
	try: return engine.pdf_to_images_zip(f)
	except Exception as e: raise gr.Error(str(e))


	# --- UI LAYOUT ---
	with gr.Blocks(title=config.APP_TITLE, theme=gr.themes.Soft()) as demo:

	gr.Markdown(f"# {config.APP_TITLE}")
	gr.Markdown("""
	Los archivos se procesan en memoria y se autodestruyen tras 5 minutos.
	""")

	gr.HTML(get_duplicate_html())

	with gr.Tabs():
	# 1. UNIR
	with gr.TabItem("Unir"):
	with gr.Row():
	with gr.Column(scale=1):
	m_files = gr.File(file_count="multiple", label="Archivos", file_types=[".pdf"])
	with gr.Column(scale=2):
	m_tbl = gr.Dataframe(headers=["ID", "Archivo"], interactive=False)
	m_ord = gr.Textbox(label="Orden de los documentos según ID", placeholder="Ej: 0, 2, 1")
	m_nums = gr.Checkbox(label="Numerar páginas (1 de X)", value=False)
	m_btn = gr.Button("Unir PDF", variant="primary")
	m_out = gr.File(label="Resultado")
	m_files.change(update_file_list, m_files, [m_tbl, m_ord])
	m_btn.click(process_merge, [m_files, m_ord, m_nums], m_out)

	# 2. DIVIDIR / REORDENAR
	with gr.TabItem("Dividir / Reordenar"):
	dr_f = gr.File(label="PDF Origen", file_types=[".pdf"])
	dr_inf = gr.Markdown("")
	dr_pg = gr.State(0)
	with gr.Tabs():
	with gr.Tab("Extraer"):
	gr.Markdown("Separa páginas en un ZIP.")
	with gr.Row():
	with gr.Column():
	s_rng = gr.Textbox(label="Rango", placeholder="Ej: 1-3, 5")
	with gr.Row():
	s_prv = gr.Button("Preview")
	s_btn = gr.Button("Dividir (ZIP)", variant="primary")
	with gr.Column():
	s_gal = gr.Gallery(height=160, columns=4, object_fit="contain", label="Vista Previa")
	s_out = gr.File(label="ZIP")
	s_prv.click(update_split_preview, [dr_f, s_rng, dr_pg], s_gal)
	s_btn.click(process_split, [dr_f, s_rng], s_out)
	with gr.Tab("Reordenar"):
	gr.Markdown("Crea un PDF con nuevo orden.")
	with gr.Row():
	with gr.Column():
	r_ord = gr.Textbox(label="Nuevo Orden", placeholder="Ej: 3, 1, 2, 4-10")
	r_btn = gr.Button("Reordenar", variant="primary")
	with gr.Column():
	r_out = gr.File(label="PDF Reordenado")
	r_btn.click(process_reorder, [dr_f, r_ord], r_out)
	dr_f.change(load_info, dr_f, [dr_inf, dr_pg, s_out])

	# 3. COMPRIMIR
	with gr.TabItem("Comprimir"):
	with gr.Row():
	with gr.Column():
	c_f = gr.File(label="PDF Original", file_types=[".pdf"])
	c_l = gr.Radio(["Baja (Máxima calidad)", "Media (Recomendado - eBook)", "Alta (Pantalla - 72dpi)"], label="Nivel", value="Media (Recomendado - eBook)")
	c_b = gr.Button("Comprimir", variant="primary")
	with gr.Column():
	c_out = gr.File(label="PDF Comprimido")
	c_b.click(process_compress, [c_f, c_l], c_out)

	# 4. CONVERTIR
	with gr.TabItem("Convertir Formatos"):
	with gr.Row():
	with gr.Column():
	gr.Markdown("### A Word")
	w_f = gr.File(label="PDF")
	w_b = gr.Button("Convertir")
	w_o = gr.File(label="DOCX")
	w_b.click(process_word, w_f, w_o)
	with gr.Column():
	gr.Markdown("### A Imágenes")
	p2i_f = gr.File(label="PDF")
	p2i_b = gr.Button("Extraer ZIP")
	p2i_o = gr.File(label="ZIP")
	p2i_b.click(process_p2i, p2i_f, p2i_o)
	with gr.Column():
	gr.Markdown("### A PowerPoint")
	p_f = gr.File(label="PDF")
	p_b = gr.Button("Convertir")
	p_o = gr.File(label="PPTX")
	p_b.click(process_pptx, p_f, p_o)

	# 5. COMPARAR
	with gr.TabItem("Comparar"):
	gr.Markdown("Compara el texto de dos versiones. Descarga un informe con cambios.")
	with gr.Row():
	with gr.Column():
	ca = gr.File(label="Versión A (Original)", file_types=[".pdf"])
	with gr.Column():
	cb = gr.File(label="Versión B (Modificada)", file_types=[".pdf"])
	cb_btn = gr.Button("Generar Informe", variant="primary")
	co = gr.File(label="Informe PDF")
	cb_btn.click(process_compare, [ca, cb], co)

	# 6. EXTRAS
	with gr.TabItem("Extras"):
	with gr.Tab("Rotar"):
	with gr.Row():
	with gr.Column():
	rf = gr.File(label="PDF")
	ra = gr.Radio(["0° (Original)", "90° (Derecha)", "180° (Invertir)", "270° (Izquierda)"], label="Rotación", value="0° (Original)")
	rb = gr.Button("Rotar", variant="primary")
	with gr.Column():
	rp = gr.Image(label="Preview")
	ro = gr.File(label="PDF Rotado")
	rf.change(update_rot_preview, [rf, ra], rp)
	ra.change(update_rot_preview, [rf, ra], rp)
	rb.click(process_rotate, [rf, ra], ro)

	with gr.Tab("Marca de Agua"):
	gr.Markdown("Añade una marca de agua diagonal en todas las páginas.")
	with gr.Row():
	with gr.Column():
	wf = gr.File(label="PDF")
	wt = gr.Textbox(label="Texto Marca de Agua", placeholder="Ej: CONFIDENCIAL")
	wb = gr.Button("Estampar", variant="primary")
	with gr.Column():
	wo = gr.File(label="PDF Marcado")
	wb.click(process_watermark, [wf, wt], wo)

	with gr.Tab("Reparar"):
	gr.Markdown("Intenta arreglar PDFs corruptos o dañados reescribiéndolos con Ghostscript.")
	with gr.Row():
	with gr.Column():
	repf = gr.File(label="PDF Dañado")
	repb = gr.Button("Reparar", variant="primary")
	with gr.Column():
	repo = gr.File(label="PDF Reparado")
	repb.click(process_repair, repf, repo)

	with gr.Tab("Proteger"):
	with gr.Row():
	with gr.Column():
	pf = gr.File(label="PDF")
	pp = gr.Textbox(type="password", label="Pass")
	pb = gr.Button("Encriptar", variant="primary")
	with gr.Column():
	po = gr.File(label="Protegido")
	pb.click(process_protect, [pf, pp], po)

	with gr.Tab("Info/Texto"):
	with gr.Row():
	with gr.Column():
	tf = gr.File(label="PDF")
	tb = gr.Button("Extraer Texto")
	to = gr.File()
	tb.click(process_text, tf, to)
	with gr.Column():
	mt = gr.Textbox(label="Título")
	ma = gr.Textbox(label="Autor")
	ms = gr.Textbox(label="Asunto")
	mb = gr.Button("Actualizar Meta")
	mo = gr.File()
	mb.click(process_meta, [tf, mt, ma, ms], mo)

	if __name__ == "__main__":
	demo.queue(default_concurrency_limit=2).launch(
	server_name="0.0.0.0",
	server_port=7860
	)