thearnabsarkar's picture
Upload json_semval/schema_utils.py with huggingface_hub
f340460 verified
raw
history blame
2.17 kB
from __future__ import annotations
from typing import Any, Dict, List
def collect_enums(schema: Dict[str, Any]) -> Dict[str, List[str]]:
enums: Dict[str, List[str]] = {}
def _walk(subschema: Dict[str, Any], path: List[str]) -> None:
if "enum" in subschema and isinstance(subschema["enum"], list):
enums["".join(path)] = [str(v) for v in subschema["enum"]]
if subschema.get("type") == "object":
props = subschema.get("properties", {})
for key, child in props.items():
_walk(child, path + [f".{key}"])
if subschema.get("type") == "array":
items = subschema.get("items")
if isinstance(items, dict):
_walk(items, path + ["[0]"])
_walk(schema, [])
return enums
def collect_formats(schema: Dict[str, Any]) -> Dict[str, str]:
fmts: Dict[str, str] = {}
def _walk(subschema: Dict[str, Any], path: List[str]) -> None:
if "format" in subschema:
fmts["".join(path)] = subschema["format"]
if subschema.get("type") == "object":
for key, child in subschema.get("properties", {}).items():
_walk(child, path + [f".{key}"])
if subschema.get("type") == "array":
items = subschema.get("items")
if isinstance(items, dict):
_walk(items, path + ["[0]"])
_walk(schema, [])
return fmts
def collect_ranges(schema: Dict[str, Any]) -> Dict[str, Dict[str, Any]]:
ranges: Dict[str, Dict[str, Any]] = {}
def _walk(subschema: Dict[str, Any], path: List[str]) -> None:
for k in ["minimum", "maximum", "minLength", "maxLength", "pattern"]:
if k in subschema:
ranges.setdefault("".join(path), {})[k] = subschema[k]
if subschema.get("type") == "object":
for key, child in subschema.get("properties", {}).items():
_walk(child, path + [f".{key}"])
if subschema.get("type") == "array":
items = subschema.get("items")
if isinstance(items, dict):
_walk(items, path + ["[0]"])
_walk(schema, [])
return ranges