File size: 585 Bytes
daafb32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
import json
from config.settings import RAW_DIR
from pathlib import Path


fixed = 0

for f in RAW_DIR.glob("*.json"):
    if f.name == "paper_index.json":
        continue
    with open(f, "r", encoding = 'utf-8') as fp:
        data = json.load(fp)

    if not data.get("primary_category"):
        cats = data.get("categories", [])
        data['primary_category'] = cats[0] if cats else "cs.LG"
        with open(f, "w", encoding = "utf-8") as fp:
            json.dump(data, fp, indent = 2, ensure_ascii = False)
        fixed += 1

    
print(f"Fixed {fixed} raw metadata files")