Spaces:
Running
Running
File size: 585 Bytes
daafb32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 | import json
from config.settings import RAW_DIR
from pathlib import Path
fixed = 0
for f in RAW_DIR.glob("*.json"):
if f.name == "paper_index.json":
continue
with open(f, "r", encoding = 'utf-8') as fp:
data = json.load(fp)
if not data.get("primary_category"):
cats = data.get("categories", [])
data['primary_category'] = cats[0] if cats else "cs.LG"
with open(f, "w", encoding = "utf-8") as fp:
json.dump(data, fp, indent = 2, ensure_ascii = False)
fixed += 1
print(f"Fixed {fixed} raw metadata files") |