recursos-pln-es / initiatives_resource.py
mariagrandury's picture
fix oauth config
1af5fea
from datetime import datetime, timezone
import gradio as gr
import pandas as pd
from datasets import Dataset, load_dataset
from constants import (
COUNTRIES,
INITIATIVE_TYPES,
LANGUAGES,
format_dataframe_for_display,
format_dataframe_for_html_display,
)
# Dataset configuration
DATASET_NAME = "somosnlp/recursos-pln-es"
CONFIG_NAME = "initiatives"
RESOURCE_TYPE = "initiatives"
RESOURCE_TITLE = "Initiatives"
def load_data() -> pd.DataFrame:
"""Load data from HuggingFace dataset or return empty DataFrame."""
try:
dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train")
return dataset.to_pandas()
except Exception as e:
print(f"Could not load {RESOURCE_TYPE} dataset: {e}")
# Return empty DataFrame with required columns
return pd.DataFrame(
columns=[
"name",
"type",
"countries",
"languages",
"website_url",
"submitted_by",
"date_submitted",
]
)
def search_and_filter_data(df: pd.DataFrame, search_query: str) -> pd.DataFrame:
"""Filter dataframe based on search query."""
if search_query == "":
return df
else:
filtered_df = df[
df.apply(
lambda row: row.astype(str)
.str.contains(search_query, case=False)
.any(),
axis=1,
)
]
return filtered_df
def validate_url(url: str) -> bool:
"""Validate if a string is a valid URL."""
if not url:
return False # URLs are required for initiatives
return url.startswith(("http://", "https://"))
def submit_resource(
name: str,
initiative_type: str,
countries: list,
languages: list,
website_url: str,
profile: gr.OAuthProfile | None,
):
"""Submit a new resource to the corresponding dataset."""
# Login required
if profile is None:
return "❌ Error: You need to be logged in to submit a resource."
# Validate required fields
if not name:
return "❌ Error: Name is required."
if not initiative_type:
return "❌ Error: Type is required."
if not countries:
return "❌ Error: At least one country must be selected."
if not languages:
return "❌ Error: At least one language must be selected."
if not website_url:
return "❌ Error: Website URL is required."
# Validate URL
if not validate_url(website_url):
return "❌ Error: Website URL must be a valid URL starting with http:// or https://"
try:
username = profile.username
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
# Create new row data
new_data = {
"name": name,
"type": initiative_type,
"countries": ", ".join(countries) if countries else "",
"languages": ", ".join(languages) if languages else "",
"website_url": website_url,
"submitted_by": username,
"date_submitted": current_time,
}
# Try to load existing dataset, or create new one
try:
existing_dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train")
existing_df = existing_dataset.to_pandas()
# Add new row
updated_df = pd.concat(
[existing_df, pd.DataFrame([new_data])], ignore_index=True
)
except:
# Create new dataset if it doesn't exist
updated_df = pd.DataFrame([new_data])
# Convert back to Dataset and push to hub
updated_dataset = Dataset.from_pandas(updated_df)
updated_dataset.push_to_hub(
DATASET_NAME,
config_name=CONFIG_NAME,
commit_message=f"Add {name} by {username}",
token=True, # Use the user's token
)
return f"βœ… Success: {name} has been submitted successfully!"
except Exception as e:
return f"❌ Error: Failed to submit resource. {str(e)}"
def create_all_tab():
"""Create the 'All' tab for this resource type."""
with gr.TabItem("πŸ“‹ All", id=f"{RESOURCE_TYPE}_all"):
gr.Markdown(f"### All {RESOURCE_TITLE}")
search_box = gr.Textbox(
placeholder=f"Search {RESOURCE_TYPE}...",
label="Filter the table",
show_label=False,
)
# Load and format initial data with clickable links
def get_formatted_data():
df = load_data()
return format_dataframe_for_display(
df,
url_columns=["website_url"],
hide_columns=["date_submitted"],
)
# Use Dataframe component with HTML rendering enabled
table = gr.Dataframe(
value=get_formatted_data(),
label=RESOURCE_TITLE,
show_label=False,
interactive=False,
wrap=False, # Disable wrapping to show full text in single lines
datatype="markdown", # Enable HTML rendering
)
# Connect search functionality
def search_and_format(query):
initial_df = load_data()
filtered_df = search_and_filter_data(initial_df, query)
return format_dataframe_for_display(
filtered_df,
url_columns=["website_url"],
hide_columns=["date_submitted"],
)
search_box.change(
fn=search_and_format,
inputs=search_box,
outputs=table,
)
# Refresh button to reload data
refresh_btn = gr.Button("πŸ”„ Refresh Data", variant="secondary")
refresh_btn.click(fn=get_formatted_data, outputs=table)
return table
def create_contribute_tab():
"""Create the 'Contribute' tab for this resource type."""
with gr.TabItem("βž• Contribute", id=f"{RESOURCE_TYPE}_contribute"):
gr.Markdown(f"### Contribute a New {RESOURCE_TITLE[:-1]}")
# Login section
gr.Markdown("Please log in to contribute resources:")
login_button = gr.LoginButton(elem_id=f"{RESOURCE_TYPE}-oauth-button")
gr.Markdown("Please fill in the information below to add a new initiative:")
with gr.Column():
# All fields are required
name_input = gr.Textbox(
label="Name *",
placeholder="Enter the name of the initiative",
info="The name or title of the initiative (required)",
)
type_input = gr.Dropdown(
label="Type *",
choices=INITIATIVE_TYPES,
info="Type of initiative (required)",
multiselect=False,
)
countries_input = gr.CheckboxGroup(
label="Countries *",
choices=[
"Spain",
"Mexico",
"Argentina",
"Colombia",
"Peru",
"Venezuela",
"Chile",
"Ecuador",
"Guatemala",
"Cuba",
"Bolivia",
"Dominican Republic",
"Honduras",
"Paraguay",
"El Salvador",
"Nicaragua",
"Costa Rica",
"Panama",
"Uruguay",
"Puerto Rico",
"Brazil",
"Portugal",
],
info="Countries where Spanish or Portuguese are spoken (required)",
)
languages_input = gr.CheckboxGroup(
label="Languages *",
choices=[
"spanish",
"portuguese",
"basque",
"catalan",
"galician",
"guarani",
"quechua",
],
info="Languages supported by the initiative (required)",
)
website_url_input = gr.Textbox(
label="Website URL *",
placeholder="https://...",
info="Official website of the initiative (required)",
)
submit_btn = gr.Button(f"Submit {RESOURCE_TITLE[:-1]}", variant="primary")
result_msg = gr.Markdown()
# Submit function
submit_btn.click(
fn=submit_resource,
inputs=[
name_input,
type_input,
countries_input,
languages_input,
website_url_input,
],
outputs=[result_msg],
)
return (
name_input,
type_input,
countries_input,
languages_input,
website_url_input,
submit_btn,
result_msg,
)
def search_entries(query: str) -> pd.DataFrame:
"""Search for entries by name or website URL."""
if not query.strip():
return pd.DataFrame()
df = load_data()
if df.empty:
return df
# Search in name and website_url columns
mask = df["name"].str.contains(query, case=False, na=False) | df[
"website_url"
].str.contains(query, case=False, na=False)
return df[mask]
def load_entry_for_edit(selected_entry: str) -> tuple:
"""Load a specific entry for editing."""
if not selected_entry:
return ("",) * 5 # Return empty values for all fields
df = load_data()
if df.empty:
return ("",) * 5
# Find the entry by name
entry = df[df["name"] == selected_entry].iloc[0]
# Convert comma-separated strings back to lists for multi-select components
countries_list = (
[c.strip() for c in entry["countries"].split(",")] if entry["countries"] else []
)
languages_list = (
[l.strip() for l in entry["languages"].split(",")] if entry["languages"] else []
)
return (
entry["name"],
entry["type"],
countries_list,
languages_list,
entry["website_url"],
)
def update_entry(
original_name: str,
name: str,
initiative_type: str,
countries: list,
languages: list,
website_url: str,
profile: gr.OAuthProfile | None,
):
"""Update an existing entry."""
# Login required
if profile is None:
return "❌ Error: You need to be logged in to edit entries."
username = profile.username
if not username:
return "❌ Could not get username from profile."
if not original_name:
return "❌ No entry selected to edit."
# Validate required fields
if not name.strip():
return "❌ Name is required."
if not initiative_type:
return "❌ Type is required."
if not countries:
return "❌ At least one country is required."
if not languages:
return "❌ At least one language is required."
if not website_url.strip():
return "❌ Website URL is required."
# Validate URL
if not validate_url(website_url):
return "❌ Invalid website URL. Please provide a valid URL."
try:
# Load existing dataset
existing_dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train")
existing_df = existing_dataset.to_pandas()
# Find and update the entry
mask = existing_df["name"] == original_name
if not mask.any():
return f"❌ Entry '{original_name}' not found."
# Update the entry
current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
existing_df.loc[mask, "name"] = name
existing_df.loc[mask, "type"] = initiative_type
existing_df.loc[mask, "countries"] = ", ".join(countries) if countries else ""
existing_df.loc[mask, "languages"] = ", ".join(languages) if languages else ""
existing_df.loc[mask, "website_url"] = website_url
existing_df.loc[mask, "date_submitted"] = current_time
# Convert back to Dataset and push to hub
updated_dataset = Dataset.from_pandas(existing_df)
updated_dataset.push_to_hub(
DATASET_NAME,
config_name=CONFIG_NAME,
commit_message=f"Update initiative entry: {name} (edited by {username})",
)
return f"βœ… Successfully updated '{name}'!"
except Exception as e:
return f"❌ Error updating entry: {str(e)}"
def create_edit_tab():
"""Create the edit tab for modifying existing entries."""
with gr.TabItem("✏️ Edit", id=f"{RESOURCE_TYPE}_edit"):
gr.Markdown(f"### Edit Existing {RESOURCE_TITLE}")
gr.Markdown("Please log in to edit entries:")
login_button = gr.LoginButton(elem_id=f"{RESOURCE_TYPE}-edit-oauth-button")
gr.Markdown("Search for an entry to edit:")
with gr.Row():
search_input = gr.Textbox(
label="Search by name or website URL",
placeholder="Enter initiative name or website URL...",
scale=3,
)
search_btn = gr.Button("πŸ” Search", scale=1)
search_results = gr.Dropdown(
label="Select entry to edit", choices=[], interactive=True
)
gr.Markdown("---")
gr.Markdown("**Edit the selected entry:**")
with gr.Column(visible=False) as edit_form:
name_input = gr.Textbox(label="Name *", placeholder="Initiative name")
type_input = gr.Dropdown(
label="Type *",
choices=INITIATIVE_TYPES,
value="project",
)
countries_input = gr.CheckboxGroup(
label="Countries *",
choices=COUNTRIES,
)
languages_input = gr.CheckboxGroup(
label="Languages *",
choices=LANGUAGES,
)
website_url_input = gr.Textbox(
label="Website URL *", placeholder="https://..."
)
update_btn = gr.Button("πŸ’Ύ Update Entry", variant="primary")
result_msg = gr.Markdown()
# Store the original name for updating
original_name_state = gr.State("")
def search_and_update_dropdown(query):
results_df = search_entries(query)
if results_df.empty:
return gr.Dropdown(choices=[], value=None)
else:
choices = results_df["name"].tolist()
return gr.Dropdown(choices=choices, value=None)
def load_entry_and_show_form(selected_entry):
if not selected_entry:
return (gr.Column(visible=False), "", *[("",) * 5])
entry_data = load_entry_for_edit(selected_entry)
return (gr.Column(visible=True), selected_entry, *entry_data)
# Event handlers
search_btn.click(
fn=search_and_update_dropdown,
inputs=[search_input],
outputs=[search_results],
)
search_results.change(
fn=load_entry_and_show_form,
inputs=[search_results],
outputs=[
edit_form,
original_name_state,
name_input,
type_input,
countries_input,
languages_input,
website_url_input,
],
)
update_btn.click(
fn=update_entry,
inputs=[
original_name_state,
name_input,
type_input,
countries_input,
languages_input,
website_url_input,
],
outputs=[result_msg],
)
return (
search_input,
search_btn,
search_results,
edit_form,
name_input,
type_input,
countries_input,
languages_input,
website_url_input,
update_btn,
result_msg,
)
def create_tab():
"""Create the complete tab for this resource type."""
with gr.TabItem(f"🌟 {RESOURCE_TITLE}", id=RESOURCE_TYPE):
with gr.Tabs():
table = create_all_tab()
inputs = create_contribute_tab()
edit_components = create_edit_tab()
return table, inputs, edit_components