from datetime import datetime, timezone import gradio as gr import pandas as pd from datasets import Dataset, load_dataset from constants import ( COUNTRIES, DOMAINS, LANGUAGES, TASKS, format_dataframe_for_display, format_dataframe_for_html_display, ) # Dataset configuration DATASET_NAME = "somosnlp/recursos-pln-es" CONFIG_NAME = "shared_tasks" RESOURCE_TYPE = "shared_tasks" RESOURCE_TITLE = "Shared Tasks" def load_data() -> pd.DataFrame: """Load data from HuggingFace dataset or return empty DataFrame.""" try: dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train") return dataset.to_pandas() except Exception as e: print(f"Could not load {RESOURCE_TYPE} dataset: {e}") # Return empty DataFrame with required columns for unified schema return pd.DataFrame( columns=[ "name", "venue", "website_url", "languages", "countries", "year", "domain", "task", "past_future", "registration_deadline", "workshop_date", "data_available_date", "submission_deadline", "paper_link", "submitted_by", "date_submitted", ] ) def search_and_filter_data(df: pd.DataFrame, search_query: str) -> pd.DataFrame: """Filter dataframe based on search query.""" if search_query == "": return df else: filtered_df = df[ df.apply( lambda row: row.astype(str) .str.contains(search_query, case=False) .any(), axis=1, ) ] return filtered_df def validate_url(url: str) -> bool: """Validate if a string is a valid URL.""" if not url: return False # URLs are required for shared tasks return url.startswith(("http://", "https://")) def validate_date(date_str: str) -> bool: """Validate date format DD/MM/YYYY.""" if not date_str: return False # Dates are required try: datetime.strptime(date_str, "%d/%m/%Y") return True except ValueError: return False def submit_resource( name: str, venue: str, website_url: str, languages: list, countries: list, year: str, domain: list, task: list, past_future: str, registration_deadline: str, workshop_date: str, data_available_date: str, submission_deadline: str, paper_link: str, profile: gr.OAuthProfile | None, ): """Submit a new resource to the corresponding dataset.""" # Login required if profile is None: return "❌ Error: You need to be logged in to submit a resource." # Validate required fields required_fields = [ ("Name", name), ("Venue", venue), ("Website URL", website_url), ("Languages", languages), ("Countries", countries), ("Past/Future", past_future), ] for field_name, field_value in required_fields: if not field_value: return f"❌ Error: {field_name} is required." # Validate past_future field if past_future not in ["past", "future"]: return "❌ Error: Past/Future must be either 'past' or 'future'." # Conditional validation based on past_future if past_future == "future": # For future tasks, all deadline fields are required future_required_fields = [ ("Registration Deadline", registration_deadline), ("Workshop Date", workshop_date), ("Data Available Date", data_available_date), ("Submission Deadline", submission_deadline), ] for field_name, field_value in future_required_fields: if not field_value: return f"❌ Error: {field_name} is required for future tasks." # Validate date formats for future tasks dates_to_check = [ ("Registration Deadline", registration_deadline), ("Workshop Date", workshop_date), ("Data Available Date", data_available_date), ("Submission Deadline", submission_deadline), ] for date_name, date_value in dates_to_check: if not validate_date(date_value): return f"❌ Error: {date_name} must be in DD/MM/YYYY format." else: # For past tasks, paper_link is required if not paper_link: return "❌ Error: Paper Link is required for past tasks." # Validate URL if not validate_url(website_url): return "❌ Error: Website URL must be a valid URL starting with http:// or https://" # Validate paper_link URL if provided if paper_link and not validate_url(paper_link): return ( "❌ Error: Paper Link must be a valid URL starting with http:// or https://" ) # Validate list fields if not languages or len(languages) == 0: return "❌ Error: Languages is required." if not countries or len(countries) == 0: return "❌ Error: Countries is required." try: username = profile.username current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") # Convert lists to comma-separated strings languages_str = ",".join(languages) if languages else "" countries_str = ",".join(countries) if countries else "" domain_str = ",".join(domain) if domain else "" task_str = ",".join(task) if task else "" # Create new row data new_data = { "name": name, "venue": venue, "website_url": website_url, "languages": languages_str, "countries": countries_str, "year": year, "domain": domain_str, "task": task_str, "past_future": past_future, "registration_deadline": ( registration_deadline if past_future == "future" else "" ), "workshop_date": workshop_date if past_future == "future" else "", "data_available_date": ( data_available_date if past_future == "future" else "" ), "submission_deadline": ( submission_deadline if past_future == "future" else "" ), "paper_link": paper_link if past_future == "past" else "", "submitted_by": username, "date_submitted": current_time, } # Try to load existing dataset, or create new one try: existing_dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train") existing_df = existing_dataset.to_pandas() # Add new row updated_df = pd.concat( [existing_df, pd.DataFrame([new_data])], ignore_index=True ) except: # Create new dataset if it doesn't exist updated_df = pd.DataFrame([new_data]) # Convert back to Dataset and push to hub updated_dataset = Dataset.from_pandas(updated_df) updated_dataset.push_to_hub( DATASET_NAME, config_name=CONFIG_NAME, commit_message=f"Add {name} by {username}", token=True, # Use the user's token ) return f"✅ Success: {name} has been submitted successfully!" except Exception as e: return f"❌ Error: Failed to submit resource. {str(e)}" def create_past_tab(): """Create the 'Past Tasks' tab for this resource type.""" # Get count of past tasks df = load_data() past_count = len(df[df["past_future"] == "past"]) with gr.TabItem(f"📚 Past Tasks ({past_count})", id=f"{RESOURCE_TYPE}_past"): gr.Markdown(f"### Past {RESOURCE_TITLE}") gr.Markdown("Browse completed shared tasks with papers and results.") search_box = gr.Textbox( placeholder="Search past shared tasks...", label="Filter the table", show_label=False, ) # Load and format past tasks data def get_past_formatted_data(): df = load_data() past_df = df[df["past_future"] == "past"] return format_dataframe_for_display( past_df, url_columns=["website_url", "paper_link"], hide_columns=[ "past_future", "date_submitted", "registration_deadline", "workshop_date", "data_available_date", "submission_deadline", ], ) table = gr.Dataframe( value=get_past_formatted_data(), label="Past Shared Tasks", show_label=False, interactive=False, wrap=False, datatype="markdown", ) # Connect search functionality for past tasks def search_past_and_format(query): initial_df = load_data() past_df = initial_df[initial_df["past_future"] == "past"] filtered_df = search_and_filter_data(past_df, query) return format_dataframe_for_display( filtered_df, url_columns=["website_url", "paper_link"], hide_columns=[ "past_future", "date_submitted", "registration_deadline", "workshop_date", "data_available_date", "submission_deadline", ], ) search_box.change( fn=search_past_and_format, inputs=search_box, outputs=table, ) # Refresh button to reload data refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary") refresh_btn.click(fn=get_past_formatted_data, outputs=table) return table def create_future_tab(): """Create the 'Future Tasks' tab for this resource type.""" # Get count of future tasks df = load_data() future_count = len(df[df["past_future"] == "future"]) with gr.TabItem(f"🔮 Future Tasks ({future_count})", id=f"{RESOURCE_TYPE}_future"): gr.Markdown(f"### Future {RESOURCE_TITLE}") gr.Markdown( "Browse upcoming shared tasks with deadlines and registration info." ) if future_count == 0: gr.Markdown( """ **No future shared tasks yet!** This tab will show upcoming shared tasks once they are submitted. If you know of any upcoming shared tasks, please contribute them using the "Contribute" tab. """ ) search_box = gr.Textbox( placeholder="Search future shared tasks...", label="Filter the table", show_label=False, ) # Load and format future tasks data def get_future_formatted_data(): df = load_data() future_df = df[df["past_future"] == "future"] return format_dataframe_for_display( future_df, url_columns=["website_url"], hide_columns=["date_submitted", "paper_link", "past_future"], ) table = gr.Dataframe( value=get_future_formatted_data(), label="Future Shared Tasks", show_label=False, interactive=False, wrap=False, datatype="markdown", ) # Connect search functionality for future tasks def search_future_and_format(query): initial_df = load_data() future_df = initial_df[initial_df["past_future"] == "future"] filtered_df = search_and_filter_data(future_df, query) return format_dataframe_for_display( filtered_df, url_columns=["website_url"], hide_columns=["date_submitted", "paper_link", "past_future"], ) search_box.change( fn=search_future_and_format, inputs=search_box, outputs=table, ) # Refresh button to reload data refresh_btn = gr.Button("🔄 Refresh Data", variant="secondary") refresh_btn.click(fn=get_future_formatted_data, outputs=table) return table def create_contribute_tab(): """Create the 'Contribute' tab for this resource type.""" with gr.TabItem("➕ Contribute", id=f"{RESOURCE_TYPE}_contribute"): gr.Markdown(f"### Contribute a New {RESOURCE_TITLE[:-1]}") # Login section gr.Markdown("Please log in to contribute resources:") login_button = gr.LoginButton(elem_id=f"{RESOURCE_TYPE}-oauth-button") gr.Markdown("Please fill in the information below to add a new shared task:") with gr.Column(): # Required fields name_input = gr.Textbox( label="Name *", placeholder="Enter the name of the shared task", info="The name or title of the shared task (required)", ) venue_input = gr.Textbox( label="Venue *", placeholder="e.g., IberLEF, SemEval, ACL", info="Name of the conference or workshop (required)", ) website_url_input = gr.Textbox( label="Website URL *", placeholder="https://...", info="Link to the shared task webpage with detailed information (required)", ) languages_input = gr.Dropdown( choices=LANGUAGES, label="Languages *", info="Select one or more languages (required)", multiselect=True, value=[], ) countries_input = gr.Dropdown( choices=COUNTRIES, label="Countries *", info="Select one or more countries (required)", multiselect=True, value=[], ) year_input = gr.Textbox( label="Year", placeholder="e.g., 2022, 2023", info="Year (YYYY format)", ) domain_input = gr.Dropdown( choices=DOMAINS, label="Domain", info="Select the task domain or area", multiselect=True, value=[], ) task_input = gr.Dropdown( choices=TASKS, label="Task", info="Select one or more NLP tasks", multiselect=True, value=[], ) past_future_input = gr.Radio( choices=["past", "future"], label="Past/Future *", info="Is this a past or future shared task? (required)", ) # Conditional fields for future tasks with gr.Group(visible=True) as future_fields: gr.Markdown("**Important Dates for Future Tasks** (format: DD/MM/YYYY)") with gr.Row(): registration_deadline_input = gr.Textbox( label="Registration Deadline", placeholder="DD/MM/YYYY", info="Last date to register for the task", ) workshop_date_input = gr.Textbox( label="Workshop Date", placeholder="DD/MM/YYYY", info="Date when the workshop will take place", ) with gr.Row(): data_available_date_input = gr.Textbox( label="Data Available Date", placeholder="DD/MM/YYYY", info="Date when the dataset will be released", ) submission_deadline_input = gr.Textbox( label="Submission Deadline", placeholder="DD/MM/YYYY", info="Last date to submit results", ) # Conditional field for past tasks with gr.Group(visible=True) as past_fields: paper_link_input = gr.Textbox( label="Paper Link", placeholder="https://...", info="Link to the paper or proceedings (required for past tasks)", ) submit_btn = gr.Button(f"Submit {RESOURCE_TITLE[:-1]}", variant="primary") result_msg = gr.Markdown() # Function to show/hide conditional fields def toggle_conditional_fields(past_future): if past_future == "future": return gr.Group(visible=True), gr.Group(visible=False) else: return gr.Group(visible=False), gr.Group(visible=True) past_future_input.change( fn=toggle_conditional_fields, inputs=[past_future_input], outputs=[future_fields, past_fields], ) # Submit function submit_btn.click( fn=submit_resource, inputs=[ name_input, venue_input, website_url_input, languages_input, countries_input, year_input, domain_input, task_input, past_future_input, registration_deadline_input, workshop_date_input, data_available_date_input, submission_deadline_input, paper_link_input, ], outputs=[result_msg], ) return ( name_input, venue_input, website_url_input, languages_input, countries_input, year_input, domain_input, task_input, past_future_input, registration_deadline_input, workshop_date_input, data_available_date_input, submission_deadline_input, paper_link_input, submit_btn, result_msg, ) def search_entries(query: str) -> pd.DataFrame: """Search for entries by name or conference.""" if not query.strip(): return pd.DataFrame() df = load_data() if df.empty: return df # Search in name and venue columns mask = df["name"].str.contains(query, case=False, na=False) | df[ "venue" ].str.contains(query, case=False, na=False) return df[mask] def load_entry_for_edit(selected_entry: str) -> tuple: """Load a specific entry for editing.""" if not selected_entry: return ("",) * 14 # Return empty values for all fields df = load_data() if df.empty: return ("",) * 14 # Find the entry by name entry = df[df["name"] == selected_entry].iloc[0] # Convert comma-separated strings back to lists for editing languages_list = entry["languages"].split(",") if entry["languages"] else [] countries_list = entry["countries"].split(",") if entry["countries"] else [] domain_list = entry["domain"].split(",") if entry["domain"] else [] task_list = entry["task"].split(",") if entry["task"] else [] return ( entry["name"], entry["venue"], entry["website_url"], languages_list, countries_list, entry["year"], domain_list, task_list, entry["past_future"], entry["registration_deadline"], entry["workshop_date"], entry["data_available_date"], entry["submission_deadline"], entry["paper_link"], ) def update_entry( original_name: str, name: str, venue: str, website_url: str, languages: list, countries: list, year: str, domain: list, task: list, past_future: str, registration_deadline: str, workshop_date: str, data_available_date: str, submission_deadline: str, paper_link: str, profile: gr.OAuthProfile | None, ): """Update an existing entry.""" # Login required if profile is None: return "❌ Error: You need to be logged in to edit entries." username = profile.username if not original_name: return "❌ No entry selected to edit." # Validate required fields required_fields = [ ("Name", name), ("Venue", venue), ("Website URL", website_url), ("Languages", languages), ("Countries", countries), ("Past/Future", past_future), ] for field_name, field_value in required_fields: if not field_value: return f"❌ Error: {field_name} is required." # Validate past_future field if past_future not in ["past", "future"]: return "❌ Error: Past/Future must be either 'past' or 'future'." # Conditional validation based on past_future if past_future == "future": # For future tasks, all deadline fields are required future_required_fields = [ ("Registration Deadline", registration_deadline), ("Workshop Date", workshop_date), ("Data Available Date", data_available_date), ("Submission Deadline", submission_deadline), ] for field_name, field_value in future_required_fields: if not field_value: return f"❌ Error: {field_name} is required for future tasks." # Validate date formats for future tasks dates_to_check = [ ("Registration Deadline", registration_deadline), ("Workshop Date", workshop_date), ("Data Available Date", data_available_date), ("Submission Deadline", submission_deadline), ] for date_name, date_value in dates_to_check: if not validate_date(date_value): return f"❌ Error: {date_name} must be in DD/MM/YYYY format." else: # For past tasks, paper_link is required if not paper_link: return "❌ Error: Paper Link is required for past tasks." # Validate URL if not validate_url(website_url): return "❌ Error: Website URL must be a valid URL starting with http:// or https://" # Validate paper_link URL if provided if paper_link and not validate_url(paper_link): return ( "❌ Error: Paper Link must be a valid URL starting with http:// or https://" ) # Validate list fields if not languages or len(languages) == 0: return "❌ Error: Languages is required." if not countries or len(countries) == 0: return "❌ Error: Countries is required." try: # Load existing dataset existing_dataset = load_dataset(DATASET_NAME, CONFIG_NAME, split="train") existing_df = existing_dataset.to_pandas() # Find and update the entry mask = existing_df["name"] == original_name if not mask.any(): return f"❌ Entry '{original_name}' not found." # Update the entry current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ") # Convert lists to comma-separated strings languages_str = ",".join(languages) if languages else "" countries_str = ",".join(countries) if countries else "" domain_str = ",".join(domain) if domain else "" task_str = ",".join(task) if task else "" existing_df.loc[mask, "name"] = name existing_df.loc[mask, "venue"] = venue existing_df.loc[mask, "website_url"] = website_url existing_df.loc[mask, "languages"] = languages_str existing_df.loc[mask, "countries"] = countries_str existing_df.loc[mask, "year"] = year existing_df.loc[mask, "domain"] = domain_str existing_df.loc[mask, "task"] = task_str existing_df.loc[mask, "past_future"] = past_future existing_df.loc[mask, "registration_deadline"] = ( registration_deadline if past_future == "future" else "" ) existing_df.loc[mask, "workshop_date"] = ( workshop_date if past_future == "future" else "" ) existing_df.loc[mask, "data_available_date"] = ( data_available_date if past_future == "future" else "" ) existing_df.loc[mask, "submission_deadline"] = ( submission_deadline if past_future == "future" else "" ) existing_df.loc[mask, "paper_link"] = ( paper_link if past_future == "past" else "" ) existing_df.loc[mask, "date_submitted"] = current_time # Convert back to Dataset and push to hub updated_dataset = Dataset.from_pandas(existing_df) updated_dataset.push_to_hub( DATASET_NAME, config_name=CONFIG_NAME, commit_message=f"Update shared task entry: {name} (edited by {username})", ) return f"✅ Successfully updated '{name}'!" except Exception as e: return f"❌ Error updating entry: {str(e)}" def create_edit_tab(): """Create the edit tab for modifying existing entries.""" with gr.TabItem("✏️ Edit", id=f"{RESOURCE_TYPE}_edit"): gr.Markdown(f"### Edit Existing {RESOURCE_TITLE}") gr.Markdown("Please log in to edit entries:") login_button = gr.LoginButton(elem_id=f"{RESOURCE_TYPE}-edit-oauth-button") gr.Markdown("Search for an entry to edit:") with gr.Row(): search_input = gr.Textbox( label="Search by name or venue", placeholder="Enter shared task name or venue name...", scale=3, ) search_btn = gr.Button("🔍 Search", scale=1) search_results = gr.Dropdown( label="Select entry to edit", choices=[], interactive=True ) gr.Markdown("---") gr.Markdown("**Edit the selected entry:**") with gr.Column(visible=False) as edit_form: name_input = gr.Textbox(label="Name *", placeholder="Shared task name") venue_input = gr.Textbox( label="Venue *", placeholder="Conference or workshop name" ) website_url_input = gr.Textbox( label="Website URL *", placeholder="https://..." ) languages_input = gr.Dropdown( choices=LANGUAGES, label="Languages *", info="Select one or more languages (required)", multiselect=True, value=[], ) countries_input = gr.Dropdown( choices=COUNTRIES, label="Countries *", info="Select one or more countries (required)", multiselect=True, value=[], ) year_input = gr.Textbox(label="Year", placeholder="e.g., 2022, 2023") domain_input = gr.Dropdown( choices=DOMAINS, label="Domain", info="Select the task domain or area", multiselect=True, value=[], ) task_input = gr.Dropdown( choices=TASKS, label="Task", info="Select one or more NLP tasks", multiselect=True, value=[], ) past_future_input = gr.Radio( choices=["past", "future"], label="Past/Future *", info="Is this a past or future shared task?", ) # Conditional fields for future tasks with gr.Group(visible=True) as future_fields: gr.Markdown("**Important Dates for Future Tasks** (format: DD/MM/YYYY)") with gr.Row(): registration_deadline_input = gr.Textbox( label="Registration Deadline", placeholder="DD/MM/YYYY", ) workshop_date_input = gr.Textbox( label="Workshop Date", placeholder="DD/MM/YYYY", ) with gr.Row(): data_available_date_input = gr.Textbox( label="Data Available Date", placeholder="DD/MM/YYYY", ) submission_deadline_input = gr.Textbox( label="Submission Deadline", placeholder="DD/MM/YYYY", ) # Conditional field for past tasks with gr.Group(visible=True) as past_fields: paper_link_input = gr.Textbox( label="Paper Link", placeholder="https://...", ) update_btn = gr.Button("💾 Update Entry", variant="primary") result_msg = gr.Markdown() # Store the original name for updating original_name_state = gr.State("") def search_and_update_dropdown(query): results_df = search_entries(query) if results_df.empty: return gr.Dropdown(choices=[], value=None) else: choices = results_df["name"].tolist() return gr.Dropdown(choices=choices, value=None) def load_entry_and_show_form(selected_entry): if not selected_entry: return (gr.Column(visible=False), "", *[("",) * 14]) entry_data = load_entry_for_edit(selected_entry) return (gr.Column(visible=True), selected_entry, *entry_data) # Function to show/hide conditional fields def toggle_conditional_fields(past_future): if past_future == "future": return gr.Group(visible=True), gr.Group(visible=False) else: return gr.Group(visible=False), gr.Group(visible=True) # Event handlers search_btn.click( fn=search_and_update_dropdown, inputs=[search_input], outputs=[search_results], ) search_results.change( fn=load_entry_and_show_form, inputs=[search_results], outputs=[ edit_form, original_name_state, name_input, venue_input, website_url_input, languages_input, countries_input, year_input, domain_input, task_input, past_future_input, registration_deadline_input, workshop_date_input, data_available_date_input, submission_deadline_input, paper_link_input, ], ) past_future_input.change( fn=toggle_conditional_fields, inputs=[past_future_input], outputs=[future_fields, past_fields], ) update_btn.click( fn=update_entry, inputs=[ original_name_state, name_input, venue_input, website_url_input, languages_input, countries_input, year_input, domain_input, task_input, past_future_input, registration_deadline_input, workshop_date_input, data_available_date_input, submission_deadline_input, paper_link_input, ], outputs=[result_msg], ) return ( search_input, search_btn, search_results, edit_form, name_input, venue_input, website_url_input, languages_input, countries_input, year_input, domain_input, task_input, past_future_input, registration_deadline_input, workshop_date_input, data_available_date_input, submission_deadline_input, paper_link_input, update_btn, result_msg, ) def create_tab(): """Create the complete tab for this resource type.""" with gr.TabItem(f"🏆 {RESOURCE_TITLE}", id=RESOURCE_TYPE): with gr.Tabs(): past_table = create_past_tab() future_table = create_future_tab() inputs = create_contribute_tab() edit_components = create_edit_tab() return past_table, future_table, inputs, edit_components