Spaces:
Sleeping
Sleeping
| """ | |
| Script to upload edited CSV files from the data/ folder back to the HuggingFace dataset. | |
| Upload all configs: | |
| ```bash | |
| python upload_datasets.py | |
| ``` | |
| Upload a single config: | |
| ```bash | |
| python upload_datasets.py events | |
| python upload_datasets.py datasets | |
| python upload_datasets.py models | |
| python upload_datasets.py shared_tasks | |
| python upload_datasets.py initiatives | |
| ``` | |
| """ | |
| import os | |
| import pandas as pd | |
| from datasets import Dataset | |
| # Dataset configuration | |
| DATASET_NAME = "somosnlp/recursos-pln-es" | |
| DATA_FOLDER = "data" | |
| # All available configs in the dataset | |
| CONFIGS = ["datasets", "models", "shared_tasks", "events", "initiatives"] | |
| def upload_config(config_name: str): | |
| """Upload a specific CSV file as a dataset config.""" | |
| csv_filename = os.path.join(DATA_FOLDER, f"{config_name}.csv") | |
| if not os.path.exists(csv_filename): | |
| print(f"β File not found: {csv_filename}") | |
| return False | |
| try: | |
| print(f"π€ Uploading {config_name} config from {csv_filename}...") | |
| # Load CSV file | |
| df = pd.read_csv(csv_filename) | |
| print(f" π Loaded {df.shape[0]} rows, {df.shape[1]} columns") | |
| # Convert to Dataset and push to hub | |
| dataset = Dataset.from_pandas(df) | |
| dataset.push_to_hub( | |
| DATASET_NAME, | |
| config_name=config_name, | |
| commit_message=f"Update {config_name} dataset", | |
| token=True, | |
| ) | |
| print(f"β Successfully uploaded {config_name} config") | |
| return True | |
| except Exception as e: | |
| print(f"β Failed to upload {config_name}: {e}") | |
| return False | |
| def upload_all_datasets(): | |
| """Upload all CSV files as dataset configs.""" | |
| print("π Uploading CSV files to HuggingFace dataset...") | |
| print(f"Dataset: {DATASET_NAME}") | |
| print(f"Source folder: {DATA_FOLDER}/") | |
| print() | |
| # Check if data folder exists | |
| if not os.path.exists(DATA_FOLDER): | |
| print(f"β Error: {DATA_FOLDER}/ folder not found!") | |
| print(f"Run download_datasets.py first to create the CSV files.") | |
| return | |
| # Upload each config | |
| successful_uploads = 0 | |
| total_configs = len(CONFIGS) | |
| for config_name in CONFIGS: | |
| if upload_config(config_name): | |
| successful_uploads += 1 | |
| print() # Add spacing between configs | |
| # Summary | |
| print("=" * 50) | |
| print(f"π Upload Summary:") | |
| print(f" Total configs: {total_configs}") | |
| print(f" Successfully uploaded: {successful_uploads}") | |
| print(f" Failed: {total_configs - successful_uploads}") | |
| if successful_uploads == total_configs: | |
| print("π All datasets uploaded successfully!") | |
| print( | |
| f"π View updated dataset: https://huggingface.co/datasets/{DATASET_NAME}" | |
| ) | |
| else: | |
| print("β οΈ Some uploads failed. Check the errors above.") | |
| def upload_single_config(config_name: str): | |
| """Upload a single config by name.""" | |
| if config_name not in CONFIGS: | |
| print(f"β Invalid config name: {config_name}") | |
| print(f"Available configs: {', '.join(CONFIGS)}") | |
| return | |
| print(f"π Uploading single config: {config_name}") | |
| print() | |
| if upload_config(config_name): | |
| print("π Upload completed successfully!") | |
| print( | |
| f"π View updated dataset: https://huggingface.co/datasets/{DATASET_NAME}" | |
| ) | |
| else: | |
| print("β Upload failed. Check the error above.") | |
| if __name__ == "__main__": | |
| import sys | |
| if len(sys.argv) > 1: | |
| # Upload specific config | |
| config_name = sys.argv[1] | |
| upload_single_config(config_name) | |
| else: | |
| # Upload all configs | |
| upload_all_datasets() | |