from datetime import datetime, timezone import gradio as gr import pandas as pd from datasets import Dataset, load_dataset from constants import ( EVENT_TYPES, TECHNICAL_LEVELS, format_dataframe_for_display, format_dataframe_for_html_display, ) # Dataset configuration DATASET_NAME = "somosnlp/recursos-pln-es" CONFIG_NAME = "events" RESOURCE_TYPE = "events" RESOURCE_TITLE = "Events" def load_data() -> pd.DataFrame: """Load data from HuggingFace dataset or return empty DataFrame.""" try: dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train") return dataset.to_pandas() except Exception as e: print(f"Could not load {RESOURCE_TYPE} dataset: {e}") # Return empty DataFrame with required columns return pd.DataFrame( columns=[ "titulo", "ponente", "bio", "tipo", "tema", "nivel_tecnico", "fecha", "youtube", "submitted_by", "date_submitted", ] ) def search_and_filter_data(df: pd.DataFrame, search_query: str) -> pd.DataFrame: """Filter dataframe based on search query.""" if search_query == "": return df else: filtered_df = df[ df.apply( lambda row: row.astype(str) .str.contains(search_query, case=False) .any(), axis=1, ) ] return filtered_df def validate_url(url: str) -> bool: """Validate if a string is a valid URL.""" if not url: return False # URLs are required for events return url.startswith(("http://", "https://")) def validate_nivel_tecnico(nivel: str) -> bool: """Validate technical level is an integer between 1-5.""" if not nivel: return False # Required field try: nivel_int = int(nivel) return 1 <= nivel_int <= 5 except ValueError: return False def submit_resource( titulo: str, ponente: str, bio: str, tipo: str, tema: str, nivel_tecnico: str, fecha: str, youtube: str, profile: gr.OAuthProfile | None, ): """Submit a new resource to the corresponding dataset.""" # Login required if profile is None: return "❌ Error: You need to be logged in to submit a resource." # Validate required fields required_fields = [ ("Título", titulo), ("Ponente", ponente), ("Bio", bio), ("Tipo", tipo), ("Tema", tema), ("Nivel Técnico", nivel_tecnico), ("Fecha", fecha), ("YouTube", youtube), ] for field_name, field_value in required_fields: if not field_value: return f"❌ Error: {field_name} is required." # Validate technical level if not validate_nivel_tecnico(nivel_tecnico): return "❌ Error: Nivel Técnico must be an integer between 1 and 5." # Validate YouTube URL if not validate_url(youtube): return "❌ Error: YouTube URL must be a valid URL starting with http:// or https://" try: username = profile.username current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") # Create new row data new_data = { "titulo": titulo, "ponente": ponente, "bio": bio, "tipo": tipo, "tema": tema, "nivel_tecnico": nivel_tecnico, "fecha": fecha, "youtube": youtube, "submitted_by": username, "date_submitted": current_time, } # Try to load existing dataset, or create new one try: existing_dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train") existing_df = existing_dataset.to_pandas() # Add new row updated_df = pd.concat( [existing_df, pd.DataFrame([new_data])], ignore_index=True ) except: # Create new dataset if it doesn't exist updated_df = pd.DataFrame([new_data]) # Convert back to Dataset and push to hub updated_dataset = Dataset.from_pandas(updated_df) updated_dataset.push_to_hub( DATASET_NAME, config_name=CONFIG_NAME, commit_message=f"Add {titulo} by {username}", token=True, # Use the user's token ) return f"✅ Success: {titulo} has been submitted successfully!" except Exception as e: return f"❌ Error: Failed to submit resource. {str(e)}" def create_all_tab(): """Create the 'All' tab for this resource type.""" with gr.TabItem("📋 All", id=f"{RESOURCE_TYPE}_all"): gr.Markdown(f"### All {RESOURCE_TITLE}") search_box = gr.Textbox( placeholder=f"Search {RESOURCE_TYPE}...", label="Filter the table", show_label=False, ) # Load and format initial data with clickable links def get_formatted_data(): df = load_data() return format_dataframe_for_display( df, url_columns=["youtube"], hide_columns=["date_submitted"], ) # Use Dataframe component with HTML rendering enabled table = gr.Dataframe( value=get_formatted_data(), label=RESOURCE_TITLE, show_label=False, interactive=False, wrap=False, # Disable wrapping to show full text in single lines datatype="markdown", # Enable HTML rendering ) # Connect search functionality def search_and_format(query): initial_df = load_data() filtered_df = search_and_filter_data(initial_df, query) return format_dataframe_for_display( filtered_df, url_columns=["youtube"], hide_columns=["date_submitted"], ) search_box.change( fn=search_and_format, inputs=search_box, outputs=table, ) # Refresh button to reload data refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary") refresh_btn.click(fn=get_formatted_data, outputs=table) return table def create_contribute_tab(): """Create the 'Contribute' tab for this resource type.""" with gr.TabItem("➕ Contribute", id=f"{RESOURCE_TYPE}_contribute"): gr.Markdown(f"### Contribute a New {RESOURCE_TITLE[:-1]}") # Login section gr.Markdown("Please log in to contribute resources:") login_button = gr.LoginButton(elem_id=f"{RESOURCE_TYPE}-oauth-button") gr.Markdown("Please fill in the information below to add a new event:") with gr.Column(): # All fields are required titulo_input = gr.Textbox( label="Título *", placeholder="Enter the title of the event", info="Title of the talk, workshop, or event (required)", ) ponente_input = gr.Textbox( label="Ponente *", placeholder="Speaker name", info="Name of the speaker or presenter (required)", ) bio_input = gr.Textbox( label="Bio *", placeholder="Brief biography of the speaker", info="Short description of the speaker's background (required)", lines=2, ) tipo_input = gr.Dropdown( label="Tipo *", choices=EVENT_TYPES, info="Type of event (required)", multiselect=False, ) tema_input = gr.Textbox( label="Tema *", placeholder="Main topic or theme", info="Main subject or theme of the event (required)", ) nivel_tecnico_input = gr.Dropdown( label="Nivel Técnico *", choices=TECHNICAL_LEVELS, info="Technical level from 1 (beginner) to 5 (expert) (required)", multiselect=False, ) fecha_input = gr.Textbox( label="Fecha *", placeholder="DD/MM/YYYY", info="Date of the event in DD/MM/YYYY format (required)", ) youtube_input = gr.Textbox( label="YouTube *", placeholder="https://youtube.com/...", info="YouTube link to the recorded event (required)", ) submit_btn = gr.Button(f"Submit {RESOURCE_TITLE[:-1]}", variant="primary") result_msg = gr.Markdown() # Submit function submit_btn.click( fn=submit_resource, inputs=[ titulo_input, ponente_input, bio_input, tipo_input, tema_input, nivel_tecnico_input, fecha_input, youtube_input, ], outputs=[result_msg], ) return ( titulo_input, ponente_input, bio_input, tipo_input, tema_input, nivel_tecnico_input, fecha_input, youtube_input, submit_btn, result_msg, ) def search_entries(query: str) -> pd.DataFrame: """Search for entries by titulo or ponente.""" if not query.strip(): return pd.DataFrame() df = load_data() if df.empty: return df # Search in titulo and ponente columns mask = df["titulo"].str.contains(query, case=False, na=False) | df[ "ponente" ].str.contains(query, case=False, na=False) return df[mask] def load_entry_for_edit(selected_entry: str) -> tuple: """Load a specific entry for editing.""" if not selected_entry: return ("",) * 8 # Return empty values for all fields df = load_data() if df.empty: return ("",) * 8 # Find the entry by titulo entry = df[df["titulo"] == selected_entry].iloc[0] return ( entry["titulo"], entry["ponente"], entry["bio"], entry["tipo"], entry["tema"], str(entry["nivel_tecnico"]), entry["fecha"], entry["youtube"], ) def update_entry( original_titulo: str, titulo: str, ponente: str, bio: str, tipo: str, tema: str, nivel_tecnico: str, fecha: str, youtube: str, profile: gr.OAuthProfile | None, ): """Update an existing entry.""" # Login required if profile is None: return "❌ Error: You need to be logged in to edit entries." username = profile.username if not username: return "❌ Could not get username from profile." if not original_titulo: return "❌ No entry selected to edit." # Validate required fields required_fields = [ titulo, ponente, bio, tipo, tema, nivel_tecnico, fecha, youtube, ] if not all(field.strip() for field in required_fields): return "❌ All fields are required." # Validate YouTube URL if not validate_url(youtube): return "❌ Invalid YouTube URL. Please provide a valid URL." # Validate technical level if not validate_nivel_tecnico(nivel_tecnico): return "❌ Invalid technical level. Please select a value from 1 to 5." # Validate date format if not validate_date(fecha): return "❌ Invalid date format. Please use DD/MM/YYYY format." try: # Load existing dataset existing_dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train") existing_df = existing_dataset.to_pandas() # Find and update the entry mask = existing_df["titulo"] == original_titulo if not mask.any(): return f"❌ Entry '{original_titulo}' not found." # Update the entry current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") existing_df.loc[mask, "titulo"] = titulo existing_df.loc[mask, "ponente"] = ponente existing_df.loc[mask, "bio"] = bio existing_df.loc[mask, "tipo"] = tipo existing_df.loc[mask, "tema"] = tema existing_df.loc[mask, "nivel_tecnico"] = nivel_tecnico existing_df.loc[mask, "fecha"] = fecha existing_df.loc[mask, "youtube"] = youtube existing_df.loc[mask, "date_submitted"] = current_time # Convert back to Dataset and push to hub updated_dataset = Dataset.from_pandas(existing_df) updated_dataset.push_to_hub( DATASET_NAME, config_name=CONFIG_NAME, commit_message=f"Update event entry: {titulo} (edited by {username})", ) return f"✅ Successfully updated '{titulo}'!" except Exception as e: return f"❌ Error updating entry: {str(e)}" def create_edit_tab(): """Create the edit tab for modifying existing entries.""" with gr.TabItem("✏️ Edit", id=f"{RESOURCE_TYPE}_edit"): gr.Markdown(f"### Edit Existing {RESOURCE_TITLE}") gr.Markdown("Please log in to edit entries:") login_button = gr.LoginButton(elem_id=f"{RESOURCE_TYPE}-edit-oauth-button") gr.Markdown("Search for an entry to edit:") with gr.Row(): search_input = gr.Textbox( label="Search by title or speaker", placeholder="Enter event title or speaker name...", scale=3, ) search_btn = gr.Button("🔍 Search", scale=1) search_results = gr.Dropdown( label="Select entry to edit", choices=[], interactive=True ) gr.Markdown("---") gr.Markdown("**Edit the selected entry:**") with gr.Column(visible=False) as edit_form: titulo_input = gr.Textbox(label="Título *", placeholder="Event title") ponente_input = gr.Textbox(label="Ponente *", placeholder="Speaker name") bio_input = gr.Textbox(label="Bio *", lines=2, placeholder="Speaker bio") tipo_input = gr.Dropdown( label="Tipo *", choices=EVENT_TYPES, value="talk", ) tema_input = gr.Textbox(label="Tema *", placeholder="Topic") nivel_tecnico_input = gr.Dropdown( label="Nivel Técnico *", choices=TECHNICAL_LEVELS, value="3" ) fecha_input = gr.Textbox(label="Fecha *", placeholder="DD/MM/YYYY") youtube_input = gr.Textbox( label="YouTube *", placeholder="https://youtube.com/..." ) update_btn = gr.Button("💾 Update Entry", variant="primary") result_msg = gr.Markdown() # Store the original titulo for updating original_titulo_state = gr.State("") def search_and_update_dropdown(query): results_df = search_entries(query) if results_df.empty: return gr.Dropdown(choices=[], value=None) else: choices = results_df["titulo"].tolist() return gr.Dropdown(choices=choices, value=None) def load_entry_and_show_form(selected_entry): if not selected_entry: return (gr.Column(visible=False), "", *[("",) * 8]) entry_data = load_entry_for_edit(selected_entry) return (gr.Column(visible=True), selected_entry, *entry_data) # Event handlers search_btn.click( fn=search_and_update_dropdown, inputs=[search_input], outputs=[search_results], ) search_results.change( fn=load_entry_and_show_form, inputs=[search_results], outputs=[ edit_form, original_titulo_state, titulo_input, ponente_input, bio_input, tipo_input, tema_input, nivel_tecnico_input, fecha_input, youtube_input, ], ) update_btn.click( fn=update_entry, inputs=[ original_titulo_state, titulo_input, ponente_input, bio_input, tipo_input, tema_input, nivel_tecnico_input, fecha_input, youtube_input, ], outputs=[result_msg], ) return ( search_input, search_btn, search_results, edit_form, titulo_input, ponente_input, bio_input, tipo_input, tema_input, nivel_tecnico_input, fecha_input, youtube_input, update_btn, result_msg, ) def create_tab(): """Create the complete tab for this resource type.""" with gr.TabItem(f"📅 {RESOURCE_TITLE}", id=RESOURCE_TYPE): with gr.Tabs(): table = create_all_tab() inputs = create_contribute_tab() edit_components = create_edit_tab() return table, inputs, edit_components