File size: 950 Bytes
075eaa3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 | MODEL=opt-6.7b
# run AWQ search (optional; we provided the pre-computed results)
python -m awq.entry --model_path /dataset/opt/$MODEL \
--w_bit 4 --q_group_size 128 \
--run_awq --dump_awq awq_cache/$MODEL-w4-g128.pt
# evaluate the AWQ quantize model (simulated pseudo quantization)
python -m awq.entry --model_path /dataset/opt/$MODEL \
--tasks wikitext \
--w_bit 4 --q_group_size 128 \
--load_awq awq_cache/$MODEL-w4-g128.pt \
--q_backend fake
# generate real quantized weights (w4)
python -m awq.entry --model_path /dataset/opt/$MODEL \
--w_bit 4 --q_group_size 128 \
--load_awq awq_cache/$MODEL-w4-g128.pt \
--q_backend real --dump_quant quant_cache/$MODEL-w4-g128-awq.pt
# load and evaluate the real quantized model (smaller gpu memory usage)
python -m awq.entry --model_path /dataset/opt/$MODEL \
--tasks wikitext \
--w_bit 4 --q_group_size 128 \
--load_quant quant_cache/$MODEL-w4-g128-awq.pt |