thearnabsarkar commited on
Commit
f340460
·
verified ·
1 Parent(s): fda7d57

Upload json_semval/schema_utils.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. json_semval/schema_utils.py +59 -0
json_semval/schema_utils.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from __future__ import annotations
2
+
3
+ from typing import Any, Dict, List
4
+
5
+
6
+ def collect_enums(schema: Dict[str, Any]) -> Dict[str, List[str]]:
7
+ enums: Dict[str, List[str]] = {}
8
+
9
+ def _walk(subschema: Dict[str, Any], path: List[str]) -> None:
10
+ if "enum" in subschema and isinstance(subschema["enum"], list):
11
+ enums["".join(path)] = [str(v) for v in subschema["enum"]]
12
+ if subschema.get("type") == "object":
13
+ props = subschema.get("properties", {})
14
+ for key, child in props.items():
15
+ _walk(child, path + [f".{key}"])
16
+ if subschema.get("type") == "array":
17
+ items = subschema.get("items")
18
+ if isinstance(items, dict):
19
+ _walk(items, path + ["[0]"])
20
+
21
+ _walk(schema, [])
22
+ return enums
23
+
24
+
25
+ def collect_formats(schema: Dict[str, Any]) -> Dict[str, str]:
26
+ fmts: Dict[str, str] = {}
27
+
28
+ def _walk(subschema: Dict[str, Any], path: List[str]) -> None:
29
+ if "format" in subschema:
30
+ fmts["".join(path)] = subschema["format"]
31
+ if subschema.get("type") == "object":
32
+ for key, child in subschema.get("properties", {}).items():
33
+ _walk(child, path + [f".{key}"])
34
+ if subschema.get("type") == "array":
35
+ items = subschema.get("items")
36
+ if isinstance(items, dict):
37
+ _walk(items, path + ["[0]"])
38
+
39
+ _walk(schema, [])
40
+ return fmts
41
+
42
+
43
+ def collect_ranges(schema: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
44
+ ranges: Dict[str, Dict[str, Any]] = {}
45
+
46
+ def _walk(subschema: Dict[str, Any], path: List[str]) -> None:
47
+ for k in ["minimum", "maximum", "minLength", "maxLength", "pattern"]:
48
+ if k in subschema:
49
+ ranges.setdefault("".join(path), {})[k] = subschema[k]
50
+ if subschema.get("type") == "object":
51
+ for key, child in subschema.get("properties", {}).items():
52
+ _walk(child, path + [f".{key}"])
53
+ if subschema.get("type") == "array":
54
+ items = subschema.get("items")
55
+ if isinstance(items, dict):
56
+ _walk(items, path + ["[0]"])
57
+
58
+ _walk(schema, [])
59
+ return ranges