Spaces:
Sleeping
Sleeping
File size: 509 Bytes
daafb32 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | import json
from config.settings import PROCESSED_DIR
fixed = 0
for f in PROCESSED_DIR.glob("*.json"):
with open(f, "r", encoding = 'utf-8') as fp:
data = json.load(fp)
if not data.get("primary_category"):
cats = data.get("categories", [])
data["primary_category"] = cats[0] if cats else "cs.LG"
with open(f, "w", encoding = "utf-8") as fp:
json.dump(data, fp, indent = 2, ensure_ascii = False)
fixed += 1
print(f"Fixed {fixed} processed files") |