File size: 6,025 Bytes
3307548 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 |
from __future__ import annotations
import json
from pathlib import Path
import gradio as gr
from json_semval.pipeline import run_validation
def load_fixtures() -> tuple[str, str]:
try:
schema = Path("tests/fixtures/sample_schema.json").read_text(encoding="utf-8")
bad = Path("tests/fixtures/sample_bad.json").read_text(encoding="utf-8")
return schema, bad
except Exception:
example_schema = '{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}'
example_json = '{"name":"Alice"}'
return example_schema, example_json
def infer(
schema_text: str, json_text: str, backend: str, apply_minimal: bool
) -> tuple[str, str, str]:
try:
schema = json.loads(schema_text)
payload = json.loads(json_text)
except Exception as e:
return "[]", f"Invalid input JSON: {e}", json_text
report = run_validation(schema, payload, apply_fixes=apply_minimal, backend=backend)
rule_errors = json.dumps(report.get("rule_errors", []), indent=2)
ml_preds = json.dumps(report.get("ml_predictions", []), indent=2)
corrected = json.dumps(report.get("corrected_json", payload), indent=2)
return rule_errors, ml_preds, corrected
EXAMPLES = {
"Example 1: Fixtures (bad)": (
(
Path("tests/fixtures/sample_schema.json").read_text(encoding="utf-8")
if Path("tests/fixtures/sample_schema.json").exists()
else '{"type":"object","properties":{"age":{"type":"integer"},"start_date":{"type":"string","format":"date"},"active":{"type":"boolean"},"status":{"type":"string","enum":["pending","approved","rejected"]}},"required":["age","start_date","active","status"]}'
),
(
Path("tests/fixtures/sample_bad.json").read_text(encoding="utf-8")
if Path("tests/fixtures/sample_bad.json").exists()
else '{"age":"twenty five","active":"yes","start_date":"15 Jan 2024","status":"pendng"}'
),
),
"Example 2: Minimal valid": (
'{"type":"object","properties":{"name":{"type":"string"}},"required":["name"]}',
'{"name":"Alice"}',
),
"Example 3: Enum/date mix": (
'{"type":"object","properties":{"status":{"type":"string","enum":["pending","approved","rejected"]},"d":{"type":"string","format":"date"}},"required":["status","d"]}',
'{"status":"Pendng","d":"01/02/2024"}',
),
}
with gr.Blocks(title="JSON Semantic Validator") as demo:
gr.Markdown(
"""
# JSON Semantic Validator
Hybrid rules + tiny ML to validate and auto-fix JSON against a schema.
"""
)
with gr.Row():
schema_in = gr.Code(label="JSON Schema", language="json")
json_in = gr.Code(label="JSON Payload", language="json")
with gr.Row():
backend = gr.Dropdown(
["rules-only", "local", "onnx"], value="rules-only", label="Backend"
)
apply_minimal = gr.Checkbox(value=True, label="Apply minimal fixes")
example_dd = gr.Dropdown(list(EXAMPLES.keys()), label="Load Example")
load_btn = gr.Button("Load Fixtures")
run_btn = gr.Button("Run Validation")
with gr.Row():
rule_errors_out = gr.Code(label="Rule Errors", language="json")
ml_preds_out = gr.Code(label="ML Predictions", language="json")
corrected_out = gr.Code(label="Corrected JSON", language="json")
validity_md = gr.Markdown(visible=True)
def _backend_value(b: str) -> str:
if b == "rules-only":
return "local" # we will set apply_minimal False when executing
return b
def on_load() -> tuple[str, str]:
return load_fixtures()
def on_example_select(label: str) -> tuple[str, str]:
pair = EXAMPLES.get(label)
if not pair:
return "", ""
return pair
def on_run(
schema_text: str, json_text: str, b: str, do_fix: bool
) -> tuple[str, str, str, str]:
# Compute rules-only and hybrid validity to show a delta badge
validity_text = ""
try:
schema = json.loads(schema_text)
payload = json.loads(json_text)
rules_only = run_validation(
schema, payload, apply_fixes=False, backend="rules-only"
)
hybrid = run_validation(
schema,
payload,
apply_fixes=(False if b == "rules-only" else do_fix),
backend=_backend_value(b),
)
r_ok = 1 if rules_only.get("valid") else 0
h_ok = 1 if hybrid.get("valid") else 0
delta = (h_ok - r_ok) * 100
validity_text = f"**Validity** — Rules-only: {r_ok*100}% · Hybrid: {h_ok*100}% · Δ: {delta:+d}%"
# Return details based on the hybrid run to keep UX consistent
rule_errors = json.dumps(hybrid.get("rule_errors", []), indent=2)
ml_preds = json.dumps(hybrid.get("ml_predictions", []), indent=2)
corrected = json.dumps(hybrid.get("corrected_json", payload), indent=2)
return rule_errors, ml_preds, corrected, validity_text
except Exception as e:
return "[]", f"Invalid input JSON: {e}", json_text, ""
load_btn.click(fn=on_load, outputs=[schema_in, json_in])
example_dd.change(
fn=on_example_select, inputs=[example_dd], outputs=[schema_in, json_in]
)
run_btn.click(
fn=on_run,
inputs=[schema_in, json_in, backend, apply_minimal],
outputs=[rule_errors_out, ml_preds_out, corrected_out, validity_md],
)
# Disable the "Apply minimal fixes" checkbox when backend == rules-only
def on_backend_change(b: str, current: bool):
if b == "rules-only":
return gr.update(value=False, interactive=False)
return gr.update(interactive=True)
backend.change(
fn=on_backend_change, inputs=[backend, apply_minimal], outputs=apply_minimal
)
if __name__ == "__main__":
demo.launch(share=True)
|