from datetime import datetime, timezone import gradio as gr import pandas as pd from datasets import Dataset, load_dataset from constants import ( COUNTRIES, INITIATIVE_TYPES, LANGUAGES, format_dataframe_for_display, format_dataframe_for_html_display, ) # Dataset configuration DATASET_NAME = "somosnlp/recursos-pln-es" CONFIG_NAME = "initiatives" RESOURCE_TYPE = "initiatives" RESOURCE_TITLE = "Initiatives" def load_data() -> pd.DataFrame: """Load data from HuggingFace dataset or return empty DataFrame.""" try: dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train") return dataset.to_pandas() except Exception as e: print(f"Could not load {RESOURCE_TYPE} dataset: {e}") # Return empty DataFrame with required columns return pd.DataFrame( columns=[ "name", "type", "countries", "languages", "website_url", "submitted_by", "date_submitted", ] ) def search_and_filter_data(df: pd.DataFrame, search_query: str) -> pd.DataFrame: """Filter dataframe based on search query.""" if search_query == "": return df else: filtered_df = df[ df.apply( lambda row: row.astype(str) .str.contains(search_query, case=False) .any(), axis=1, ) ] return filtered_df def validate_url(url: str) -> bool: """Validate if a string is a valid URL.""" if not url: return False # URLs are required for initiatives return url.startswith(("http://", "https://")) def submit_resource( name: str, initiative_type: str, countries: list, languages: list, website_url: str, profile: gr.OAuthProfile | None, ): """Submit a new resource to the corresponding dataset.""" # Login required if profile is None: return "❌ Error: You need to be logged in to submit a resource." # Validate required fields if not name: return "❌ Error: Name is required." if not initiative_type: return "❌ Error: Type is required." if not countries: return "❌ Error: At least one country must be selected." if not languages: return "❌ Error: At least one language must be selected." if not website_url: return "❌ Error: Website URL is required." # Validate URL if not validate_url(website_url): return "❌ Error: Website URL must be a valid URL starting with http:// or https://" try: username = profile.username current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") # Create new row data new_data = { "name": name, "type": initiative_type, "countries": ", ".join(countries) if countries else "", "languages": ", ".join(languages) if languages else "", "website_url": website_url, "submitted_by": username, "date_submitted": current_time, } # Try to load existing dataset, or create new one try: existing_dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train") existing_df = existing_dataset.to_pandas() # Add new row updated_df = pd.concat( [existing_df, pd.DataFrame([new_data])], ignore_index=True ) except: # Create new dataset if it doesn't exist updated_df = pd.DataFrame([new_data]) # Convert back to Dataset and push to hub updated_dataset = Dataset.from_pandas(updated_df) updated_dataset.push_to_hub( DATASET_NAME, config_name=CONFIG_NAME, commit_message=f"Add {name} by {username}", token=True, # Use the user's token ) return f"✅ Success: {name} has been submitted successfully!" except Exception as e: return f"❌ Error: Failed to submit resource. {str(e)}" def create_all_tab(): """Create the 'All' tab for this resource type.""" with gr.TabItem("📋 All", id=f"{RESOURCE_TYPE}_all"): gr.Markdown(f"### All {RESOURCE_TITLE}") search_box = gr.Textbox( placeholder=f"Search {RESOURCE_TYPE}...", label="Filter the table", show_label=False, ) # Load and format initial data with clickable links def get_formatted_data(): df = load_data() return format_dataframe_for_display( df, url_columns=["website_url"], hide_columns=["date_submitted"], ) # Use Dataframe component with HTML rendering enabled table = gr.Dataframe( value=get_formatted_data(), label=RESOURCE_TITLE, show_label=False, interactive=False, wrap=False, # Disable wrapping to show full text in single lines datatype="markdown", # Enable HTML rendering ) # Connect search functionality def search_and_format(query): initial_df = load_data() filtered_df = search_and_filter_data(initial_df, query) return format_dataframe_for_display( filtered_df, url_columns=["website_url"], hide_columns=["date_submitted"], ) search_box.change( fn=search_and_format, inputs=search_box, outputs=table, ) # Refresh button to reload data refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary") refresh_btn.click(fn=get_formatted_data, outputs=table) return table def create_contribute_tab(): """Create the 'Contribute' tab for this resource type.""" with gr.TabItem("➕ Contribute", id=f"{RESOURCE_TYPE}_contribute"): gr.Markdown(f"### Contribute a New {RESOURCE_TITLE[:-1]}") # Login section gr.Markdown("Please log in to contribute resources:") login_button = gr.LoginButton(elem_id=f"{RESOURCE_TYPE}-oauth-button") gr.Markdown("Please fill in the information below to add a new initiative:") with gr.Column(): # All fields are required name_input = gr.Textbox( label="Name *", placeholder="Enter the name of the initiative", info="The name or title of the initiative (required)", ) type_input = gr.Dropdown( label="Type *", choices=INITIATIVE_TYPES, info="Type of initiative (required)", multiselect=False, ) countries_input = gr.CheckboxGroup( label="Countries *", choices=[ "Spain", "Mexico", "Argentina", "Colombia", "Peru", "Venezuela", "Chile", "Ecuador", "Guatemala", "Cuba", "Bolivia", "Dominican Republic", "Honduras", "Paraguay", "El Salvador", "Nicaragua", "Costa Rica", "Panama", "Uruguay", "Puerto Rico", "Brazil", "Portugal", ], info="Countries where Spanish or Portuguese are spoken (required)", ) languages_input = gr.CheckboxGroup( label="Languages *", choices=[ "spanish", "portuguese", "basque", "catalan", "galician", "guarani", "quechua", ], info="Languages supported by the initiative (required)", ) website_url_input = gr.Textbox( label="Website URL *", placeholder="https://...", info="Official website of the initiative (required)", ) submit_btn = gr.Button(f"Submit {RESOURCE_TITLE[:-1]}", variant="primary") result_msg = gr.Markdown() # Submit function submit_btn.click( fn=submit_resource, inputs=[ name_input, type_input, countries_input, languages_input, website_url_input, ], outputs=[result_msg], ) return ( name_input, type_input, countries_input, languages_input, website_url_input, submit_btn, result_msg, ) def search_entries(query: str) -> pd.DataFrame: """Search for entries by name or website URL.""" if not query.strip(): return pd.DataFrame() df = load_data() if df.empty: return df # Search in name and website_url columns mask = df["name"].str.contains(query, case=False, na=False) | df[ "website_url" ].str.contains(query, case=False, na=False) return df[mask] def load_entry_for_edit(selected_entry: str) -> tuple: """Load a specific entry for editing.""" if not selected_entry: return ("",) * 5 # Return empty values for all fields df = load_data() if df.empty: return ("",) * 5 # Find the entry by name entry = df[df["name"] == selected_entry].iloc[0] # Convert comma-separated strings back to lists for multi-select components countries_list = ( [c.strip() for c in entry["countries"].split(",")] if entry["countries"] else [] ) languages_list = ( [l.strip() for l in entry["languages"].split(",")] if entry["languages"] else [] ) return ( entry["name"], entry["type"], countries_list, languages_list, entry["website_url"], ) def update_entry( original_name: str, name: str, initiative_type: str, countries: list, languages: list, website_url: str, profile: gr.OAuthProfile | None, ): """Update an existing entry.""" # Login required if profile is None: return "❌ Error: You need to be logged in to edit entries." username = profile.username if not username: return "❌ Could not get username from profile." if not original_name: return "❌ No entry selected to edit." # Validate required fields if not name.strip(): return "❌ Name is required." if not initiative_type: return "❌ Type is required." if not countries: return "❌ At least one country is required." if not languages: return "❌ At least one language is required." if not website_url.strip(): return "❌ Website URL is required." # Validate URL if not validate_url(website_url): return "❌ Invalid website URL. Please provide a valid URL." try: # Load existing dataset existing_dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train") existing_df = existing_dataset.to_pandas() # Find and update the entry mask = existing_df["name"] == original_name if not mask.any(): return f"❌ Entry '{original_name}' not found." # Update the entry current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") existing_df.loc[mask, "name"] = name existing_df.loc[mask, "type"] = initiative_type existing_df.loc[mask, "countries"] = ", ".join(countries) if countries else "" existing_df.loc[mask, "languages"] = ", ".join(languages) if languages else "" existing_df.loc[mask, "website_url"] = website_url existing_df.loc[mask, "date_submitted"] = current_time # Convert back to Dataset and push to hub updated_dataset = Dataset.from_pandas(existing_df) updated_dataset.push_to_hub( DATASET_NAME, config_name=CONFIG_NAME, commit_message=f"Update initiative entry: {name} (edited by {username})", ) return f"✅ Successfully updated '{name}'!" except Exception as e: return f"❌ Error updating entry: {str(e)}" def create_edit_tab(): """Create the edit tab for modifying existing entries.""" with gr.TabItem("✏️ Edit", id=f"{RESOURCE_TYPE}_edit"): gr.Markdown(f"### Edit Existing {RESOURCE_TITLE}") gr.Markdown("Please log in to edit entries:") login_button = gr.LoginButton(elem_id=f"{RESOURCE_TYPE}-edit-oauth-button") gr.Markdown("Search for an entry to edit:") with gr.Row(): search_input = gr.Textbox( label="Search by name or website URL", placeholder="Enter initiative name or website URL...", scale=3, ) search_btn = gr.Button("🔍 Search", scale=1) search_results = gr.Dropdown( label="Select entry to edit", choices=[], interactive=True ) gr.Markdown("---") gr.Markdown("**Edit the selected entry:**") with gr.Column(visible=False) as edit_form: name_input = gr.Textbox(label="Name *", placeholder="Initiative name") type_input = gr.Dropdown( label="Type *", choices=INITIATIVE_TYPES, value="project", ) countries_input = gr.CheckboxGroup( label="Countries *", choices=COUNTRIES, ) languages_input = gr.CheckboxGroup( label="Languages *", choices=LANGUAGES, ) website_url_input = gr.Textbox( label="Website URL *", placeholder="https://..." ) update_btn = gr.Button("💾 Update Entry", variant="primary") result_msg = gr.Markdown() # Store the original name for updating original_name_state = gr.State("") def search_and_update_dropdown(query): results_df = search_entries(query) if results_df.empty: return gr.Dropdown(choices=[], value=None) else: choices = results_df["name"].tolist() return gr.Dropdown(choices=choices, value=None) def load_entry_and_show_form(selected_entry): if not selected_entry: return (gr.Column(visible=False), "", *[("",) * 5]) entry_data = load_entry_for_edit(selected_entry) return (gr.Column(visible=True), selected_entry, *entry_data) # Event handlers search_btn.click( fn=search_and_update_dropdown, inputs=[search_input], outputs=[search_results], ) search_results.change( fn=load_entry_and_show_form, inputs=[search_results], outputs=[ edit_form, original_name_state, name_input, type_input, countries_input, languages_input, website_url_input, ], ) update_btn.click( fn=update_entry, inputs=[ original_name_state, name_input, type_input, countries_input, languages_input, website_url_input, ], outputs=[result_msg], ) return ( search_input, search_btn, search_results, edit_form, name_input, type_input, countries_input, languages_input, website_url_input, update_btn, result_msg, ) def create_tab(): """Create the complete tab for this resource type.""" with gr.TabItem(f"🌟 {RESOURCE_TITLE}", id=RESOURCE_TYPE): with gr.Tabs(): table = create_all_tab() inputs = create_contribute_tab() edit_components = create_edit_tab() return table, inputs, edit_components