""" Script to upload edited CSV files from the data/ folder back to the HuggingFace dataset. Upload all configs: ```bash python upload_datasets.py ``` Upload a single config: ```bash python upload_datasets.py events python upload_datasets.py datasets python upload_datasets.py models python upload_datasets.py shared_tasks python upload_datasets.py initiatives ``` """ import os import pandas as pd from datasets import Dataset # Dataset configuration DATASET_NAME = "somosnlp/recursos-pln-es" DATA_FOLDER = "data" # All available configs in the dataset CONFIGS = ["datasets", "models", "shared_tasks", "events", "initiatives"] def upload_config(config_name: str): """Upload a specific CSV file as a dataset config.""" csv_filename = os.path.join(DATA_FOLDER, f"{config_name}.csv") if not os.path.exists(csv_filename): print(f"❌ File not found: {csv_filename}") return False try: print(f"📤 Uploading {config_name} config from {csv_filename}...") # Load CSV file df = pd.read_csv(csv_filename) print(f" 📊 Loaded {df.shape[0]} rows, {df.shape[1]} columns") # Convert to Dataset and push to hub dataset = Dataset.from_pandas(df) dataset.push_to_hub( DATASET_NAME, config_name=config_name, commit_message=f"Update {config_name} dataset", token=True, ) print(f"✅ Successfully uploaded {config_name} config") return True except Exception as e: print(f"❌ Failed to upload {config_name}: {e}") return False def upload_all_datasets(): """Upload all CSV files as dataset configs.""" print("🚀 Uploading CSV files to HuggingFace dataset...") print(f"Dataset: {DATASET_NAME}") print(f"Source folder: {DATA_FOLDER}/") print() # Check if data folder exists if not os.path.exists(DATA_FOLDER): print(f"❌ Error: {DATA_FOLDER}/ folder not found!") print(f"Run download_datasets.py first to create the CSV files.") return # Upload each config successful_uploads = 0 total_configs = len(CONFIGS) for config_name in CONFIGS: if upload_config(config_name): successful_uploads += 1 print() # Add spacing between configs # Summary print("=" * 50) print(f"📊 Upload Summary:") print(f" Total configs: {total_configs}") print(f" Successfully uploaded: {successful_uploads}") print(f" Failed: {total_configs - successful_uploads}") if successful_uploads == total_configs: print("🎉 All datasets uploaded successfully!") print( f"🔗 View updated dataset: https://huggingface.co/datasets/{DATASET_NAME}" ) else: print("⚠️ Some uploads failed. Check the errors above.") def upload_single_config(config_name: str): """Upload a single config by name.""" if config_name not in CONFIGS: print(f"❌ Invalid config name: {config_name}") print(f"Available configs: {', '.join(CONFIGS)}") return print(f"🚀 Uploading single config: {config_name}") print() if upload_config(config_name): print("🎉 Upload completed successfully!") print( f"🔗 View updated dataset: https://huggingface.co/datasets/{DATASET_NAME}" ) else: print("❌ Upload failed. Check the error above.") if __name__ == "__main__": import sys if len(sys.argv) > 1: # Upload specific config config_name = sys.argv[1] upload_single_config(config_name) else: # Upload all configs upload_all_datasets()