File size: 3,928 Bytes
571e9a7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 |
# model_name=mistralai/Mistral-7B-v0.1
# # Baseline: NF3, no scale-shift
# for blocksize in 64 128 256 512 1024 2048 channel
# do
# manual_quantize=minmax_3_${blocksize}_no_0_False_True
# restore_and_scale_GO=False
# python evaluate.py --model hf-outlier \
# --model_args pretrained=${model_name},manual_quantize=${manual_quantize},restore_and_scale_GO=${restore_and_scale_GO},trust_remote_code=True,dtype=float16 \
# --tasks wikitext \
# --device cuda:0 \
# --batch_size 4 \
# --output_path outputs/${model_name}/groupwise/nf3/minmax/manual_${manual_quantize}_restore_scale-${restore_and_scale_GO}_core \
# --trust_remote_code \
# done
# # Baseline: NF4, no scale-shift
# for blocksize in 64 128 256 512 1024 2048 4096 channel
# do
# manual_quantize=minmax_4_${blocksize}_no_0_False_True
# restore_and_scale_GO=False
# python evaluate.py --model hf-outlier \
# --model_args pretrained=${model_name},manual_quantize=${manual_quantize},restore_and_scale_GO=${restore_and_scale_GO},trust_remote_code=True,dtype=float16 \
# --tasks wikitext \
# --device cuda:0 \
# --batch_size 4 \
# --output_path outputs/${model_name}/groupwise/nf4/minmax/manual_${manual_quantize}_restore_scale-${restore_and_scale_GO}_core \
# --trust_remote_code \
# done
# model_name=allenai/OLMo-7B-0724-hf
model_name=mistralai/Mistral-7B-v0.1
# Baseline: INT4, no scale-shift
# for blocksize in tensor 1048576 262144 65536 16384
# do
# manual_quantize=minmax_4_${blocksize}_no_0_False_False
# restore_and_scale_GO=False
# python evaluate.py --model hf-outlier
# --model_args pretrained=${model_name},manual_quantize=${manual_quantize},restore_and_scale_GO=${restore_and_scale_GO},trust_remote_code=True,dtype=float16 \
# --tasks wikitext,winogrande,arc_challenge,arc_easy,piqa,sciq,hellaswag,lambada_openai \
# --device cuda:0 \
# --batch_size 4 \
# --output_path outputs/${model_name}/groupwise/int4/minmax/manual_${manual_quantize}_restore_scale-${restore_and_scale_GO}_core \
# --trust_remote_code \
# done
for blocksize in 65536
do
restore_and_scale_GO=1.0
for manual_quantize in clip_4_${blocksize}_z_9_False_False clip_4_${blocksize}_z_11_False_False clip_4_${blocksize}_tp_1e-6_False_False
do
python evaluate.py --model hf-outlier
--model_args pretrained=${model_name},manual_quantize=${manual_quantize},restore_and_scale_GO=${restore_and_scale_GO},trust_remote_code=True,dtype=float16 \
--tasks winogrande,arc_challenge,arc_easy,piqa,sciq,hellaswag,lambada_openai \
--device cuda:0 \
--batch_size 4 \
--output_path outputs/${model_name}/groupwise/int4/ours/manual_${manual_quantize}_restore_scale-${restore_and_scale_GO}_core \
--trust_remote_code \
done
done
# # # Ours: NF3, clip + restore
# for blocksize in tensor 1048576 262144 65536 16384
# do
# restore_and_scale_GO in 1.0
# for manual_quantize in clip_4_${blocksize}_z_12_False_False clip_4_${blocksize}_bp_5e-5_False_True clip_4_${blocksize}_bp_1e-6_False_False clip_4_${blocksize}_z_20_False_False
# do
# python evaluate.py --model hf-outlier
# --model_args pretrained=${model_name},manual_quantize=${manual_quantize},restore_and_scale_GO=${restore_and_scale_GO},trust_remote_code=True,dtype=float16 \
# --tasks wikitext,winogrande,arc_challenge,arc_easy,piqa,sciq,hellaswag,lambada_openai \
# --device cuda:0 \
# --batch_size 4 \
# --output_path outputs/${model_name}/groupwise/int4/ours/manual_${manual_quantize}_restore_scale-${restore_and_scale_GO}_core \
# --trust_remote_code \
# done
# done
# --tasks winogrande,arc_challenge,arc_easy,piqa,sciq,hellaswag,lambada_openai \ |