| import json |
| import os |
| import math |
|
|
| anno_json_path = ( |
| "/mnt/petrelfs/zhuchenglin/LLaVA/playground/data/LLaVA-Pretrain/llava_gen_558k.json" |
| ) |
| with open(anno_json_path, "r") as f: |
| annotation_data = json.load(f) |
|
|
| total_annotations = len(annotation_data) |
| num_parts = 8 |
| annotations_per_part = math.ceil(total_annotations / num_parts) |
|
|
| anno_output_dir = "../annotations/" |
| if not os.path.exists(anno_output_dir): |
| os.makedirs(anno_output_dir) |
|
|
| for i in range(num_parts): |
| start_idx = i * annotations_per_part |
| end_idx = min((i + 1) * annotations_per_part, total_annotations) |
| annotations_subset = annotation_data[start_idx:end_idx] |
| part_anno_json_path = os.path.join( |
| anno_output_dir, f"annotations_part_{i + 1}.json" |
| ) |
| with open(part_anno_json_path, "w") as f: |
| json.dump(annotations_subset, f) |
| print(len(annotations_subset)) |
|
|
| print("标注已成功分成8份,并保存到文件夹中。") |
|
|