mumble-cleanup / src /cleanup /export /quantize.py
adikuma's picture
initial upload: cleanup code and 688-pair seed dataset
fd0b01f verified
# dynamic int8 quantize the onnx via ort. dynamic (not static) because we do
# not have a calibration dataset and dynamic works well for decoder-style LMs.
from pathlib import Path
def quantize_int8(fp32_onnx: Path, out_dir: Path) -> Path:
from onnxruntime.quantization import quantize_dynamic, QuantType
fp32_onnx = Path(fp32_onnx)
out_dir = Path(out_dir)
out_dir.mkdir(parents=True, exist_ok=True)
int8_path = out_dir / "model.onnx"
print(f"[quantize] {fp32_onnx} -> {int8_path}")
quantize_dynamic(
model_input=str(fp32_onnx),
model_output=str(int8_path),
weight_type=QuantType.QInt8,
)
print(f"[quantize] wrote {int8_path} ({int8_path.stat().st_size / 1e6:.1f} MB)")
return int8_path