File size: 5,758 Bytes
7667a87 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 | dataset:
dataset_name: "sevirlr"
img_height: 128
img_width: 128
in_len: 7
out_len: 6
seq_len: 13
plot_stride: 1
interval_real_time: 10
sample_mode: "sequent"
stride: 6
layout: "NTHWC"
start_date: null
train_test_split_date: [2019, 6, 1]
end_date: null
val_ratio: 0.1
metrics_mode: "0"
metrics_list: ['csi', 'pod', 'sucr', 'bias']
threshold_list: [16, 74, 133, 160, 181, 219]
aug_mode: "2"
layout:
in_len: 7
out_len: 6
in_step: &in_step 1
out_step: &out_step 1
in_out_diff: &in_out_diff 1
img_height: 128
img_width: 128
data_channels: 1
layout: "NTHWC"
optim:
total_batch_size: 8
micro_batch_size: 2
seed: 0
float32_matmul_precision: "high"
method: "adamw"
lr: 1.0e-3
wd: 1.0e-5
betas: [0.9, 0.999]
gradient_clip_val: 1.0
max_epochs: 2000
loss_type: "l2"
# scheduler
warmup_percentage: 0.1
lr_scheduler_mode: "cosine"
min_lr_ratio: 1.0e-3
warmup_min_lr_ratio: 0.1
# early stopping
monitor: "val/loss"
# monitor: "valid_loss_epoch"
early_stop: false
early_stop_mode: "min"
early_stop_patience: 100
save_top_k: 3
logging:
logging_prefix: "PreDiff"
monitor_lr: true
monitor_device: false
track_grad_norm: -1
use_wandb: false
profiler: null
save_npy: true
trainer:
check_val_every_n_epoch: 50
log_step_ratio: 0.001
precision: 32
find_unused_parameters: false
num_sanity_val_steps: 2
eval:
train_example_data_idx_list: [0, ]
val_example_data_idx_list: [0, 16, 32, 48, 64, 72, 96, 108, 128]
test_example_data_idx_list: [0, 16, 32, 48, 64, 72, 96, 108, 128]
eval_example_only: true
eval_aligned: true
eval_unaligned: true
num_samples_per_context: 1
fs: 20
label_offset: [-0.5, 0.5]
label_avg_int: false
fvd_features: 400
model:
diffusion:
data_shape: [6, 128, 128, 1]
beta_schedule: "linear"
use_ema: true
log_every_t: 100
clip_denoised: false
linear_start: 1e-4
linear_end: 2e-2
cosine_s: 8e-3
given_betas: null
original_elbo_weight: 0.
v_posterior: 0.
l_simple_weight: 1.
parameterization: "eps"
learn_logvar: true
logvar_init: 0.
# latent diffusion
latent_shape: [6, 16, 16, 64]
cond_stage_model: "__is_first_stage__"
num_timesteps_cond: null
cond_stage_trainable: false
cond_stage_forward: null
scale_by_std: false
scale_factor: 1.0
latent_cond_shape: [7, 16, 16, 64]
align:
alignment_type: "avg_x"
guide_scale: 50.0
model_type: "cuboid"
model_args:
input_shape: [6, 16, 16, 64]
out_channels: 1
base_units: 128
scale_alpha: 1.0
depth: [1, 1]
downsample: 2
downsample_type: "patch_merge"
block_attn_patterns: "axial"
num_heads: 4
attn_drop: 0.1
proj_drop: 0.1
ffn_drop: 0.1
ffn_activation: "gelu"
gated_ffn: false
norm_layer: "layer_norm"
use_inter_ffn: true
hierarchical_pos_embed: false
pos_embed_type: "t+h+w"
padding_type: "zeros"
checkpoint_level: 0
use_relative_pos: true
self_attn_use_final_proj: true
# global vectors
num_global_vectors: 0
use_global_vector_ffn: true
use_global_self_attn: false
separate_global_qkv: false
global_dim_ratio: 1
# initialization
attn_linear_init_mode: "0"
ffn_linear_init_mode: "0"
ffn2_linear_init_mode: "2"
attn_proj_linear_init_mode: "2"
conv_init_mode: "0"
down_linear_init_mode: "0"
global_proj_linear_init_mode: "2"
norm_init_mode: "0"
# timestep embedding for diffusion
time_embed_channels_mult: 4
time_embed_use_scale_shift_norm: false
time_embed_dropout: 0.0
# readout
pool: "attention"
readout_seq: true
out_len: 6
model_ckpt_path: "pretrained_sevirlr_alignment_avg_x_cuboid_v1.pt"
latent_model:
input_shape: [7, 16, 16, 64]
target_shape: [6, 16, 16, 64]
base_units: 256
# block_units: null
scale_alpha: 1.0
num_heads: 4
attn_drop: 0.1
proj_drop: 0.1
ffn_drop: 0.1
# inter-attn downsample/upsample
downsample: 2
downsample_type: "patch_merge"
upsample_type: "upsample"
upsample_kernel_size: 3
# cuboid attention
depth: [4, 4]
self_pattern: "axial"
# global vectors
num_global_vectors: 0
use_dec_self_global: false
dec_self_update_global: true
use_dec_cross_global: false
use_global_vector_ffn: false
use_global_self_attn: true
separate_global_qkv: true
global_dim_ratio: 1
# mise
ffn_activation: "gelu"
gated_ffn: false
norm_layer: "layer_norm"
padding_type: "zeros"
pos_embed_type: "t+h+w"
checkpoint_level: 0
use_relative_pos: true
self_attn_use_final_proj: true
# initialization
attn_linear_init_mode: "0"
ffn_linear_init_mode: "0"
ffn2_linear_init_mode: "2"
attn_proj_linear_init_mode: "2"
conv_init_mode: "0"
down_up_linear_init_mode: "0"
global_proj_linear_init_mode: "2"
norm_init_mode: "0"
# timestep embedding for diffusion
time_embed_channels_mult: 4
time_embed_use_scale_shift_norm: false
time_embed_dropout: 0.0
unet_res_connect: true
vae:
pretrained_ckpt_path: "pretrained_sevirlr_vae_8x8x64_v1_2.pt"
data_channels: 1
down_block_types: ['DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D']
in_channels: 1
block_out_channels: [128, 256, 512, 512] # downsample `len(block_out_channels) - 1` times
act_fn: 'silu'
latent_channels: 64
up_block_types: ['UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D']
norm_num_groups: 32
layers_per_block: 2
out_channels: 1
|