Qwen-Image-Edit-Angles

Running on Zero

App Files Files Community

Elea Zhong commited on 12 days ago

Commit

6064267

1 Parent(s): 789676e

triplet loss experiments (prelim)

Browse files

Files changed (26) hide show

configs/base.yaml +6 -3
configs/regression/base.yaml +6 -0
configs/regression/modal-datadirs.yaml +0 -2
configs/regression/modal.yaml +5 -0
configs/regression/mse-dm.yaml +0 -11
configs/regression/mse-neg-mse.yaml +0 -3
configs/regression/mse-pixel-lpips.yaml +0 -4
configs/regression/mse-pixel-mse.yaml +0 -4
configs/regression/mse-triplet.yaml +0 -13
configs/regression/mse.yaml +0 -2
configs/regression/triplet/mse-triplet-a.yaml +9 -0
configs/regression/triplet/mse-triplet-b.yaml +9 -0
configs/regression/triplet/mse-triplet-c.yaml +9 -0
configs/regression/triplet/mse-triplet-d.yaml +9 -0
configs/regression/triplet/mse-triplet-e.yaml +9 -0
configs/regression/val_metrics.yaml +0 -9
qwenimage/datamodels.py +10 -2
qwenimage/foundation.py +81 -30
qwenimage/loss.py +9 -1
scripts/logit_normal_dist.ipynb +29 -10
scripts/save_regression_outputs.py +3 -0
scripts/save_regression_outputs_modal.py +119 -0
scripts/straightness.ipynb +0 -0
scripts/train.py +1 -0
scripts/train_multi copy.sh +30 -0
scripts/train_multi.sh +15 -38

configs/base.yaml CHANGED Viewed

@@ -11,12 +11,12 @@ gradient_accumulation_steps: 1
 train_batch_size: 1
 optim: "adamw"
 learning_rate: 1.0e-4
-num_workers: 4
 resume_from_checkpoint: null
 log_model_steps: null
 preprocessing_epoch_len: 64
 preprocessing_epoch_repetitions: 1
 # Logging
 record_training: true
@@ -33,5 +33,8 @@ sample_steps:
   every: 500
 global_step: 0
 save_steps: 1000
-log_batch_steps: 500
 seed: 67

 train_batch_size: 1
 optim: "adamw"
 learning_rate: 1.0e-4
+num_workers: 8
 resume_from_checkpoint: null
 log_model_steps: null
 preprocessing_epoch_len: 64
 preprocessing_epoch_repetitions: 1
+lora_rank: 16
 # Logging
 record_training: true
   every: 500
 global_step: 0
 save_steps: 1000
+log_batch_steps:
+  "on": [0,1,100]
+  every: 500
 seed: 67

configs/regression/base.yaml CHANGED Viewed

@@ -19,3 +19,9 @@ regression_data_dir: "/data/regression_output"
 regression_gen_steps: 50
 editing_data_dir: "/data/CrispEdit"
 editing_total_per: 1

 regression_gen_steps: 50
 editing_data_dir: "/data/CrispEdit"
 editing_total_per: 1
+validation_loss_terms:
+  mse: 1.0
+  pixel_mse: 1.0
+  pixel_lpips: 1.0

configs/regression/modal-datadirs.yaml DELETED Viewed

	@@ -1,2 +0,0 @@
1	- regression_data_dir: "/data/regression_data/regression_output"
2	- editing_data_dir: "/data/edit_data/CrispEdit"

configs/regression/modal.yaml ADDED Viewed

	@@ -0,0 +1,5 @@

+regression_data_dir: "/data/regression_data/regression_output_1024"
+editing_data_dir: "/data/edit_data/CrispEdit"
+lora_rank: 32
+vae_image_size: 1048576 # 1024 * 1024

configs/regression/mse-dm.yaml DELETED Viewed

@@ -1,11 +0,0 @@
-wandb_run_name: "reg-mse-dm"
-output_dir: "/data/checkpoints/reg-mse-dm"
-train_loss_terms:
-  mse: 1.0
-  distribution_matching: 1.0
-validation_loss_terms:
-  mse: 1.0
-  distribution_matching: 1.0

configs/regression/mse-neg-mse.yaml CHANGED Viewed

@@ -5,7 +5,4 @@ train_loss_terms:
   mse: 1.0
   negative_mse: 0.1
-validation_loss_terms:
-  mse: 1.0
-  negative_mse: 0.1


5	mse: 1.0
6	negative_mse: 0.1
7



8

configs/regression/mse-pixel-lpips.yaml CHANGED Viewed

@@ -4,7 +4,3 @@ output_dir: "/data/checkpoints/reg-mse-pixel-lpips"
 train_loss_terms:
   mse: 1.0
   pixel_lpips: 1.0
-validation_loss_terms:
-  mse: 1.0
-  pixel_lpips: 1.0

 train_loss_terms:
   mse: 1.0
   pixel_lpips: 1.0

configs/regression/mse-pixel-mse.yaml CHANGED Viewed

@@ -5,7 +5,3 @@ train_loss_terms:
   mse: 1.0
   pixel_mse: 1.0
-validation_loss_terms:
-  mse: 1.0
-  pixel_mse: 1.0


5	mse: 1.0
6	pixel_mse: 1.0
7

configs/regression/mse-triplet.yaml DELETED Viewed

@@ -1,13 +0,0 @@
-wandb_run_name: "reg-mse-triplet"
-output_dir: "/data/checkpoints/reg-mse-triplet"
-train_loss_terms:
-  mse: 1.0
-  triplet: 1.0
-validation_loss_terms:
-  mse: 1.0
-  triplet: 1.0
-triplet_margin: -500 # tune

configs/regression/mse.yaml CHANGED Viewed

@@ -4,6 +4,4 @@ output_dir: "/data/checkpoints/reg-mse"
 train_loss_terms:
   mse: 1.0
-validation_loss_terms:
-  mse: 1.0


4	train_loss_terms:
5	mse: 1.0
6


7

configs/regression/triplet/mse-triplet-a.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+wandb_run_name: "reg-mse-triplet-a"
+output_dir: "/data/checkpoints/reg-mse-triplet-a"
+train_loss_terms:
+  mse: 1.0
+  triplet: 1.0
+  triplet_margin: 0.0
+  triplet_min_abs_diff: 0.0

configs/regression/triplet/mse-triplet-b.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+wandb_run_name: "reg-mse-triplet-b"
+output_dir: "/data/checkpoints/reg-mse-triplet-b"
+train_loss_terms:
+  mse: 1.0
+  triplet: 1.0
+  triplet_margin: 0.0
+  triplet_min_abs_diff: 0.1

configs/regression/triplet/mse-triplet-c.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+wandb_run_name: "reg-mse-triplet-c"
+output_dir: "/data/checkpoints/reg-mse-triplet-c"
+train_loss_terms:
+  mse: 1.0
+  triplet: 1.0
+  triplet_margin: 0.1
+  triplet_min_abs_diff: 0.1

configs/regression/triplet/mse-triplet-d.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+wandb_run_name: "reg-mse-triplet-c"
+output_dir: "/data/checkpoints/reg-mse-triplet-c"
+train_loss_terms:
+  mse: 1.0
+  triplet: 1.0
+  triplet_margin: 0.5
+  triplet_min_abs_diff: 0.1

configs/regression/triplet/mse-triplet-e.yaml ADDED Viewed

	@@ -0,0 +1,9 @@

+wandb_run_name: "reg-mse-triplet-e"
+output_dir: "/data/checkpoints/reg-mse-triplet-e"
+train_loss_terms:
+  mse: 1.0
+  triplet: 1.0
+  triplet_margin: 0.1
+  triplet_min_abs_diff: 0.25

configs/regression/val_metrics.yaml DELETED Viewed

@@ -1,9 +0,0 @@
-validation_loss_terms:
-  mse: 1.0
-  pixel_mse: 1.0
-  pixel_lpips: 1.0

qwenimage/datamodels.py CHANGED Viewed

@@ -50,12 +50,20 @@ class QwenLossTerms(BaseModel):
     triplet: LossTermSpecType = 0.0
     negative_mse: LossTermSpecType = 0.0
     distribution_matching: LossTermSpecType = 0.0
-    negative_exponential: LossTermSpecType = 0.0
     pixel_lpips: LossTermSpecType = 0.0
     pixel_mse: LossTermSpecType = 0.0
     adversarial: LossTermSpecType = 0.0
-    triplet_margin: float = 0.2
 class QwenConfig(ExperimentTrainerParameters):
     load_multi_view_lora: bool = False

     triplet: LossTermSpecType = 0.0
     negative_mse: LossTermSpecType = 0.0
     distribution_matching: LossTermSpecType = 0.0
+    pixel_triplet: LossTermSpecType = 0.0
     pixel_lpips: LossTermSpecType = 0.0
     pixel_mse: LossTermSpecType = 0.0
+    pixel_distribution_matching: LossTermSpecType = 0.0
     adversarial: LossTermSpecType = 0.0
+    teacher: LossTermSpecType = 0.0
+    triplet_margin: float = 0.0
+    triplet_min_abs_diff: float = 0.0
+    teacher_steps: int = 4
+    @property
+    def pixel_terms(self) -> bool:
+        return ("pixel_lpips", "pixel_mse", "pixel_triplet", "pixel_distribution_matching",)
 class QwenConfig(ExperimentTrainerParameters):
     load_multi_view_lora: bool = False

qwenimage/foundation.py CHANGED Viewed

@@ -395,13 +395,14 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
         split = batch["split"]
         step = batch["step"]
         if split == "train":
-            loss_terms = self.config.train_loss_terms.model_dump()
         elif split == "validation":
-            loss_terms = self.config.validation_loss_terms.model_dump()
         loss_accumulator = LossAccumulator(
-            terms=loss_terms,
             step=step,
             split=split,
         )
         if loss_accumulator.has("mse"):
@@ -414,31 +415,42 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
             loss_accumulator.accum("mse", mse_loss)
         if loss_accumulator.has("triplet"):
-            # eps = 1e-6
-            margin = loss_terms["triplet_margin"]
-            # v_span = (v_gt_1d - v_neg_1d).pow(2).sum(dim=(-2,-1))
-            # triplet_weight = (v_gt_1d - v_neg_1d).pow(2).mean(dim=(-2,-1))
-            diffv_gt_pred = (v_gt_1d - v_pred_1d).pow(2).sum(dim=(-2,-1))
-            diffv_neg_pred = (v_neg_1d - v_pred_1d).pow(2).sum(dim=(-2,-1))
-            # diffv_gt_pred_reg = diffv_gt_pred # / (v_span + eps)
-            # diffv_neg_pred_reg = diffv_neg_pred # / (v_span + eps)
-            # texam(v_span, name="v_span")
-            # texam(triplet_weight, name="triplet_weight")
-            texam(diffv_gt_pred, name="diffv_gt_pred")
-            texam(diffv_neg_pred, name="diffv_neg_pred")
-            # texam(diffv_gt_pred_reg, name="diffv_gt_pred_reg")
-            # texam(diffv_neg_pred_reg, name="diffv_neg_pred_reg")
-            # texam(diffv_gt_pred_reg - diffv_neg_pred_reg, name="diffv_gt_pred_reg - diffv_neg_pred_reg")
-            triplet_loss = F.relu(diffv_gt_pred - diffv_neg_pred + margin).mean()
-            #
-            # triplet_loss = F.relu(diffv_gt_pred_reg - diffv_neg_pred_reg + margin).mean()
-            # triplet_loss = torch.mean(triplet_loss_batched * triplet_weight)
             loss_accumulator.accum("triplet", triplet_loss)
         if loss_accumulator.has("negative_mse"):
             neg_mse_loss = -F.mse_loss(v_pred_1d, v_neg_1d, reduction="mean")
@@ -453,7 +465,7 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
         if loss_accumulator.has("negative_exponential"):
             raise NotImplementedError()
-        if loss_accumulator.has("pixel_lpips") or loss_accumulator.has("pixel_mse"):
             x_0_pred = x_t_1d - t * v_pred_1d
             pixel_values_x0_gt = self.latents_to_pil(x_0_1d, h=h_f16, w=w_f16, with_grad=True).detach()
             pixel_values_x0_pred = self.latents_to_pil(x_0_pred, h=h_f16, w=w_f16, with_grad=True)
@@ -468,6 +480,14 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
             if loss_accumulator.has("pixel_mse"):
                 pixel_mse_loss = F.mse_loss(pixel_values_x0_pred, pixel_values_x0_gt, reduction="mean")
                 loss_accumulator.accum("pixel_mse", pixel_mse_loss)
         if loss_accumulator.has("adversarial"):
             raise NotImplementedError()
@@ -492,7 +512,7 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
                 v_gt_1d,
                 v_neg_1d,
                 v_pred_1d,
-                visualize_velocities=True,
             )
         return loss
@@ -513,7 +533,7 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
         v_gt_1d,
         v_neg_1d,
         v_pred_1d,
-        visualize_velocities=True,
     ):
         t_float = t.float().cpu().item()
         x_0_pred = x_t_1d - t * v_pred_1d
@@ -526,18 +546,49 @@ class QwenImageRegressionFoundation(QwenImageFoundation):
             "x_0_pred": self.latents_to_pil(x_0_pred, h=h_f16, w=w_f16),
             "x_0_neg": self.latents_to_pil(x_0_neg, h=h_f16, w=w_f16),
         }
-        if visualize_velocities:
             log_pils.update({
                 "v_gt_1d": self.latents_to_pil(v_gt_1d, h=h_f16, w=w_f16),
                 "v_pred_1d": self.latents_to_pil(v_pred_1d, h=h_f16, w=w_f16),
                 "v_neg_1d": self.latents_to_pil(v_neg_1d, h=h_f16, w=w_f16),
             })
         wand_logger.log({
             "train_images": log_pils,
         }, commit=False)
     def base_pipe(self, inputs: QwenInputs) -> list[Image]:
-        inputs.num_inference_steps = self.config.regression_base_pipe_steps  # override
         return super().base_pipe(inputs)

         split = batch["split"]
         step = batch["step"]
         if split == "train":
+            loss_terms = self.config.train_loss_terms
         elif split == "validation":
+            loss_terms = self.config.validation_loss_terms
         loss_accumulator = LossAccumulator(
+            terms=loss_terms.model_dump(),
             step=step,
             split=split,
+            term_groups={"pixel":loss_terms.pixel_terms}
         )
         if loss_accumulator.has("mse"):
             loss_accumulator.accum("mse", mse_loss)
         if loss_accumulator.has("triplet"):
+            # 1d, B,L,C
+            margin = loss_terms.triplet_margin
+            triplet_min_abs_diff = loss_terms.triplet_min_abs_diff
+            print(f"{triplet_min_abs_diff=}")
+            v_gt_neg_diff = (v_gt_1d - v_neg_1d).abs().mean(dim=2, keepdim=True)
+            zero_weight = torch.zeros_like(v_gt_neg_diff)
+            v_weight = torch.where(v_gt_neg_diff > triplet_min_abs_diff, v_gt_neg_diff, zero_weight)
+            ones = torch.ones_like(v_gt_neg_diff)
+            filtered_nums = torch.sum(torch.where(v_gt_neg_diff > triplet_min_abs_diff, ones, zero_weight))
+            wand_logger.log({
+                "filtered_nums": filtered_nums,
+            }, commit=False)
+            diffv_gt_pred = (v_gt_1d - v_pred_1d).pow(2)
+            diffv_neg_pred = (v_neg_1d - v_pred_1d).pow(2)
+            loss_unreduced = diffv_gt_pred - diffv_neg_pred
+            loss_weighted = (loss_unreduced * v_weight).sum(dim=2)
+            triplet_loss = F.relu(loss_weighted + margin).mean()
+            ones = torch.ones_like(loss_weighted)
+            zeros = torch.zeros_like(loss_weighted)
+            loss_nonzero_nums = torch.sum(torch.where((loss_weighted + margin)>0, ones, zeros))
+            wand_logger.log({
+                "loss_nonzero_nums": loss_nonzero_nums,
+            }, commit=False)
             loss_accumulator.accum("triplet", triplet_loss)
+            texam(v_gt_neg_diff, "v_gt_neg_diff")
+            texam(v_weight, "v_weight")
+            texam(diffv_gt_pred, "diffv_gt_pred")
+            texam(diffv_neg_pred, "diffv_neg_pred")
+            texam(loss_unreduced, "loss_unreduced")
+            texam(loss_weighted, "loss_weighted")
         if loss_accumulator.has("negative_mse"):
             neg_mse_loss = -F.mse_loss(v_pred_1d, v_neg_1d, reduction="mean")
         if loss_accumulator.has("negative_exponential"):
             raise NotImplementedError()
+        if loss_accumulator.has_group("pixel"):
             x_0_pred = x_t_1d - t * v_pred_1d
             pixel_values_x0_gt = self.latents_to_pil(x_0_1d, h=h_f16, w=w_f16, with_grad=True).detach()
             pixel_values_x0_pred = self.latents_to_pil(x_0_pred, h=h_f16, w=w_f16, with_grad=True)
             if loss_accumulator.has("pixel_mse"):
                 pixel_mse_loss = F.mse_loss(pixel_values_x0_pred, pixel_values_x0_gt, reduction="mean")
                 loss_accumulator.accum("pixel_mse", pixel_mse_loss)
+            if loss_accumulator.has("pixel_triplet"):
+                raise NotImplementedError()
+                loss_accumulator.accum("pixel_triplet", pixel_triplet_loss)
+            if loss_accumulator.has("pixel_distribution_matching"):
+                raise NotImplementedError()
+                loss_accumulator.accum("pixel_distribution_matching", pixel_distribution_matching_loss)
         if loss_accumulator.has("adversarial"):
             raise NotImplementedError()
                 v_gt_1d,
                 v_neg_1d,
                 v_pred_1d,
+                visualize_velocities=False,
             )
         return loss
         v_gt_1d,
         v_neg_1d,
         v_pred_1d,
+        visualize_velocities=False,
     ):
         t_float = t.float().cpu().item()
         x_0_pred = x_t_1d - t * v_pred_1d
             "x_0_pred": self.latents_to_pil(x_0_pred, h=h_f16, w=w_f16),
             "x_0_neg": self.latents_to_pil(x_0_neg, h=h_f16, w=w_f16),
         }
+        if visualize_velocities: # naively visualizing through vae (works with flux)
             log_pils.update({
                 "v_gt_1d": self.latents_to_pil(v_gt_1d, h=h_f16, w=w_f16),
                 "v_pred_1d": self.latents_to_pil(v_pred_1d, h=h_f16, w=w_f16),
                 "v_neg_1d": self.latents_to_pil(v_neg_1d, h=h_f16, w=w_f16),
             })
+        # create gt-neg difference maps
+        v_pred_2d = self.unpack_latents(v_pred_1d, h_f16, w_f16)
+        v_gt_2d = self.unpack_latents(v_gt_1d, h_f16, w_f16)
+        v_neg_2d = self.unpack_latents(v_neg_1d, h_f16, w_f16)
+        gt_neg_diff_map_2d = (v_gt_2d - v_neg_2d).pow(2).mean(dim=1, keepdim=True)
+        gt_pred_diff_map_2d = (v_gt_2d - v_pred_2d).pow(2).mean(dim=1, keepdim=True)
+        neg_pred_diff_map_2d = (v_neg_2d - v_pred_2d).pow(2).mean(dim=1, keepdim=True)
+        diff_max = torch.max(torch.stack([gt_neg_diff_map_2d, gt_pred_diff_map_2d, neg_pred_diff_map_2d]))
+        diff_min = torch.min(torch.stack([gt_neg_diff_map_2d, gt_pred_diff_map_2d, neg_pred_diff_map_2d]))
+        print(f"{diff_min}, {diff_max}")
+        # norms to 0-1
+        diff_span = diff_max - diff_min
+        gt_neg_diff_map_2d = (gt_neg_diff_map_2d - diff_min) / diff_span
+        gt_pred_diff_map_2d = (gt_pred_diff_map_2d - diff_min) / diff_span
+        neg_pred_diff_map_2d = (neg_pred_diff_map_2d - diff_min) / diff_span
+        log_pils.update({
+            "gt-neg":gt_neg_diff_map_2d.float().cpu(),
+            "gt-pred":gt_pred_diff_map_2d.float().cpu(),
+            "neg-pred":neg_pred_diff_map_2d.float().cpu(),
+        })
         wand_logger.log({
             "train_images": log_pils,
         }, commit=False)
     def base_pipe(self, inputs: QwenInputs) -> list[Image]:
+        # config overrides
+        inputs.num_inference_steps = self.config.regression_base_pipe_steps
+        inputs.latent_size_override = self.config.vae_image_size
+        inputs.vae_image_override = self.config.vae_image_size
+        image = inputs.image[0]
+        w,h = image.size
+        h_r, w_r = calculate_dimensions(self.config.vae_image_size, h/w)
+        image = TF.resize(image, (h_r, w_r))
+        inputs.image = [image]
+        inputs.height = h_r
+        inputs.width = w_r
         return super().base_pipe(inputs)

qwenimage/loss.py CHANGED Viewed

@@ -12,9 +12,11 @@ class LossAccumulator:
         terms: dict[str, int|float|dict],
         step: int|None=None,
         split: str|None=None,
     ):
         self.terms = terms
         self.step = step
         if split is not None:
             self.split = split
             self.prefix = f"{self.split}_"
@@ -55,7 +57,13 @@ class LossAccumulator:
         warnings.warn(f"Unknown spec type {spec}; treat as disabled")
         return 0.0
     def has(self, name: str) -> bool:
         return self.resolve_weight(name) > 0

         terms: dict[str, int|float|dict],
         step: int|None=None,
         split: str|None=None,
+        term_groups: dict[str, tuple[str, ...]]|None = None,
     ):
         self.terms = terms
         self.step = step
+        self.term_groups = term_groups
         if split is not None:
             self.split = split
             self.prefix = f"{self.split}_"
         warnings.warn(f"Unknown spec type {spec}; treat as disabled")
         return 0.0
+    def has_group(self, name: str):
+        if name not in self.term_groups:
+            return False
+        all_group_terms = self.term_groups[name]
+        return any([self.resolve_weight(tn) > 0 for tn in all_group_terms])
     def has(self, name: str) -> bool:
         return self.resolve_weight(name) > 0

scripts/logit_normal_dist.ipynb CHANGED Viewed

@@ -76,17 +76,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
    "id": "aec3ae8f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "[<matplotlib.lines.Line2D at 0x74338838e380>]"
       ]
      },
-     "execution_count": 9,
      "metadata": {},
      "output_type": "execute_result"
     },
@@ -117,7 +117,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
    "id": "3bc68e7c",
    "metadata": {},
    "outputs": [
@@ -127,7 +127,7 @@
        "1.0986122886681098"
       ]
      },
-     "execution_count": 16,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -139,7 +139,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
    "id": "facb782e",
    "metadata": {},
    "outputs": [
@@ -149,7 +149,7 @@
        "tensor([1.0000, 0.8808, 0.0000])"
       ]
      },
-     "execution_count": 17,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -166,11 +166,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
    "id": "f006f2fa",
    "metadata": {},
-   "outputs": [],
-   "source": []
   },
   {
    "cell_type": "code",

   },
   {
    "cell_type": "code",
+   "execution_count": 6,
    "id": "aec3ae8f",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
+       "[<matplotlib.lines.Line2D at 0x7ea4980d6b30>]"
       ]
      },
+     "execution_count": 6,
      "metadata": {},
      "output_type": "execute_result"
     },
   },
   {
    "cell_type": "code",
+   "execution_count": 7,
    "id": "3bc68e7c",
    "metadata": {},
    "outputs": [
        "1.0986122886681098"
       ]
      },
+     "execution_count": 7,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 8,
    "id": "facb782e",
    "metadata": {},
    "outputs": [
        "tensor([1.0000, 0.8808, 0.0000])"
       ]
      },
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
   },
   {
    "cell_type": "code",
+   "execution_count": 9,
    "id": "f006f2fa",
    "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "tensor([1.0000, 0.7484, 0.0000])"
+      ]
+     },
+     "execution_count": 9,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t = torch.tensor([1, 0.5, 0])\n",
+    "t_i = TimestepDistUtils.t_shift(\n",
+    "    mu=torch.tensor(1.09),\n",
+    "    sigma=1.0,\n",
+    "    t=t\n",
+    ")\n",
+    "t_i"
+   ]
   },
   {
    "cell_type": "code",

scripts/save_regression_outputs.py CHANGED Viewed

@@ -15,6 +15,7 @@ def main():
     parser.add_argument("--imsize", type=int, default=512)
     parser.add_argument("--indir", type=str, default="/data/CrispEdit")
     parser.add_argument("--outdir", type=str, default="/data/regression_output")
     args = parser.parse_args()
     total_per = 10
@@ -54,6 +55,8 @@ def main():
             image=[input_data["input_img"]],
             prompt=input_data["instruction"],
             vae_image_override=args.imsize * args.imsize,
         ))
         torch.save(output_dict, save_base_dir/f"{idx:06d}.pt")

     parser.add_argument("--imsize", type=int, default=512)
     parser.add_argument("--indir", type=str, default="/data/CrispEdit")
     parser.add_argument("--outdir", type=str, default="/data/regression_output")
+    parser.add_argument("--steps", type=int, default=50)
     args = parser.parse_args()
     total_per = 10
             image=[input_data["input_img"]],
             prompt=input_data["instruction"],
             vae_image_override=args.imsize * args.imsize,
+            latent_size_override=args.imsize * args.imsize,
+            num_inference_steps=args.steps,
         ))
         torch.save(output_dict, save_base_dir/f"{idx:06d}.pt")

scripts/save_regression_outputs_modal.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import os
+import argparse
+from pathlib import Path
+import sys
+sys.path.append(str(Path(__file__).parent.parent))
+import fal
+import modal
+import torch
+import tqdm
+from datasets import concatenate_datasets, load_dataset, interleave_datasets
+from qwenimage.datamodels import QwenConfig
+from qwenimage.foundation import QwenImageFoundationSaveInterm
+REQUIREMENTS_PATH = os.path.abspath("requirements.txt")
+WAND_REQUIREMENTS_PATH = os.path.abspath("scripts/wand_requirements.txt")
+local_modules = ["qwenimage","wandml","scripts"]
+EDIT_TYPES = [
+    "color",
+    "style",
+    "replace",
+    "remove",
+    "add",
+    "motion change",
+    "background change",
+]
+modalapp = modal.App("next-stroke")
+modalapp.image = (
+    modal.Image.debian_slim(python_version="3.10")
+    .apt_install("git", "ffmpeg", "libsm6", "libxext6")
+    .pip_install_from_requirements(REQUIREMENTS_PATH)
+    .pip_install_from_requirements(WAND_REQUIREMENTS_PATH)
+    .add_local_python_source(*local_modules)
+)
+@modalapp.function(
+    gpu="H100",
+    max_containers=8,
+    timeout=1 * 60 * 60,
+    volumes={
+        "/data/wand_cache": modal.Volume.from_name("FLUX_MODELS"),
+        "/data/checkpoints": modal.Volume.from_name("training_checkpoints", create_if_missing=True),
+        "/root/.cache/torch/hub/checkpoints": modal.Volume.from_name("torch_hub_checkpoints", create_if_missing=True),
+        "/root/.cache/huggingface/hub":  modal.Volume.from_name("hf_cache", create_if_missing=True),
+        "/root/.cache/huggingface/datasets":  modal.Volume.from_name("hf_cache_datasets", create_if_missing=True),
+        "/data/regression_data": modal.Volume.from_name("regression_data"),
+        "/data/edit_data": modal.Volume.from_name("edit_data"),
+    },
+    secrets=[
+        modal.Secret.from_name("wand-modal-gcloud-keyfile"),
+        modal.Secret.from_name("elea-huggingface-secret"),
+    ],
+)
+def generate_regression_data(start_index=0, end_index=None, imsize=1024, indir="/data/edit_data/CrispEdit", outdir="/data/regression_data/regression_output_1024", total_per=10):
+    all_edit_datasets = []
+    for edit_type in EDIT_TYPES:
+        to_concat = []
+        for ds_n in range(total_per):
+            ds = load_dataset("parquet", data_files=f"{indir}/{edit_type}_{ds_n:05d}.parquet", split="train")
+            to_concat.append(ds)
+        edit_type_concat = concatenate_datasets(to_concat)
+        all_edit_datasets.append(edit_type_concat)
+    join_ds = interleave_datasets(all_edit_datasets)
+    save_base_dir = Path(outdir)
+    save_base_dir.mkdir(exist_ok=True, parents=True)
+    foundation = QwenImageFoundationSaveInterm(QwenConfig(vae_image_size=imsize * imsize))
+    if end_index is None:
+        end_index = len(join_ds)
+    dataset_to_process = join_ds.select(range(start_index, end_index))
+    for idx, input_data in enumerate(tqdm.tqdm(dataset_to_process), start=start_index):
+        output_dict = foundation.base_pipe(foundation.INPUT_MODEL(
+            image=[input_data["input_img"]],
+            prompt=input_data["instruction"],
+            vae_image_override=imsize * imsize,
+            latent_size_override=imsize * imsize,
+        ))
+        torch.save(output_dict, save_base_dir/f"{idx:06d}.pt")
+@modalapp.local_entrypoint()
+def main(start:int, end:int, num_workers:int):
+    per_worker_load = (end - start) // num_workers
+    remainder = (end - start) % num_workers
+    if remainder > 0:
+        per_worker_load += 1
+    worker_load_starts = []
+    worker_load_ends = []
+    cur_start = start
+    for worker_idx in range(num_workers):
+        if worker_idx < num_workers -1:
+            cur_end = cur_start + per_worker_load
+        else:
+            cur_end = end # pass last worker less
+        worker_load_starts.append(cur_start)
+        worker_load_ends.append(cur_end)
+        cur_start += per_worker_load
+    print(f"loads: {list(zip(worker_load_starts, worker_load_ends))}")
+    outputs = list(generate_regression_data.map(worker_load_starts, worker_load_ends))
+    print(outputs)

scripts/straightness.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff

scripts/train.py CHANGED Viewed

@@ -70,6 +70,7 @@ modalapp.image = (
         "/root/.cache/torch/hub/checkpoints": modal.Volume.from_name("torch_hub_checkpoints", create_if_missing=True),
         "/root/.cache/huggingface/hub":  modal.Volume.from_name("hf_cache", create_if_missing=True),
         "/data/regression_data": modal.Volume.from_name("regression_data"),
         "/data/edit_data": modal.Volume.from_name("edit_data"),

         "/root/.cache/torch/hub/checkpoints": modal.Volume.from_name("torch_hub_checkpoints", create_if_missing=True),
         "/root/.cache/huggingface/hub":  modal.Volume.from_name("hf_cache", create_if_missing=True),
+        "/root/.cache/huggingface/datasets":  modal.Volume.from_name("hf_cache_datasets", create_if_missing=True),
         "/data/regression_data": modal.Volume.from_name("regression_data"),
         "/data/edit_data": modal.Volume.from_name("edit_data"),

scripts/train_multi copy.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+#!/bin/bash
+nohup python scripts/train.py configs/base.yaml --where modal \
+    --update configs/regression/base.yaml \
+    --update configs/regression/modal.yaml \
+    --update configs/regression/dm/mse-dm-a.yaml \
+    --update configs/compare/5k_steps.yaml \
+    > logs/mse-dm-a.log 2>&1 &
+nohup python scripts/train.py configs/base.yaml --where modal \
+    --update configs/regression/base.yaml \
+    --update configs/regression/modal.yaml \
+    --update configs/regression/dm/mse-dm-b.yaml \
+    --update configs/compare/5k_steps.yaml \
+    > logs/mse-dm-b.log 2>&1 &
+nohup python scripts/train.py configs/base.yaml --where modal \
+    --update configs/regression/base.yaml \
+    --update configs/regression/modal.yaml \
+    --update configs/regression/dm/mse-dm-c.yaml \
+    --update configs/compare/5k_steps.yaml \
+    > logs/mse-dm-c.log 2>&1 &
+nohup python scripts/train.py configs/base.yaml --where modal \
+    --update configs/regression/base.yaml \
+    --update configs/regression/modal.yaml \
+    --update configs/regression/dm/mse-dm-d.yaml \
+    --update configs/compare/5k_steps.yaml \
+    > logs/mse-dm-d.log 2>&1 &

scripts/train_multi.sh CHANGED Viewed

@@ -1,61 +1,38 @@
 #!/bin/bash
 # nohup python scripts/train.py configs/base.yaml --where modal \
 #     --update configs/regression/base.yaml \
-#     --update configs/regression/modal-datadirs.yaml \
 #     --update configs/regression/mse.yaml \
-#     --update configs/regression/val_metrics.yaml \
 #     --update configs/compare/5k_steps.yaml \
-#     --update configs/optim/cosine.yaml \
-#     --update configs/regression/lo_mse.yaml \
 #     > logs/mse.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
-    --update configs/regression/modal-datadirs.yaml \
-    --update configs/regression/mse-triplet.yaml \
-    --update configs/regression/val_metrics.yaml \
     --update configs/compare/5k_steps.yaml \
-    --update configs/optim/cosine.yaml \
-    --update configs/regression/lo_mse.yaml \
-    > logs/mse-triplet.log 2>&1 &
-# nohup python scripts/train.py configs/base.yaml --where modal \
-#     --update configs/regression/base.yaml \
-#     --update configs/regression/modal-datadirs.yaml \
-#     --update configs/regression/mse-neg-mse.yaml \
-#     --update configs/regression/val_metrics.yaml \
-#     --update configs/compare/5k_steps.yaml \
-#     --update configs/optim/cosine.yaml \
-#     --update configs/regression/lo_mse.yaml \
-#     > logs/mse-neg-mse.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
-    --update configs/regression/modal-datadirs.yaml \
-    --update configs/regression/mse-pixel-mse.yaml \
-    --update configs/regression/val_metrics.yaml \
     --update configs/compare/5k_steps.yaml \
-    --update configs/optim/cosine.yaml \
-    --update configs/regression/lo_mse.yaml \
-    > logs/mse-pixel-mse.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
-    --update configs/regression/modal-datadirs.yaml \
-    --update configs/regression/mse-pixel-lpips.yaml \
-    --update configs/regression/val_metrics.yaml \
     --update configs/compare/5k_steps.yaml \
-    --update configs/optim/cosine.yaml \
-    --update configs/regression/lo_mse.yaml \
-    > logs/mse-pixel-lpips.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
-    --update configs/regression/modal-datadirs.yaml \
-    --update configs/regression/mse-dm.yaml \
-    --update configs/regression/val_metrics.yaml \
     --update configs/compare/5k_steps.yaml \
-    --update configs/optim/cosine.yaml \
-    --update configs/regression/lo_mse.yaml \
-    > logs/mse-pixel-lpips.log 2>&1 &

 #!/bin/bash
 # nohup python scripts/train.py configs/base.yaml --where modal \
 #     --update configs/regression/base.yaml \
+#     --update configs/regression/modal.yaml \
 #     --update configs/regression/mse.yaml \
 #     --update configs/compare/5k_steps.yaml \
 #     > logs/mse.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
+    --update configs/regression/modal.yaml \
+    --update configs/regression/triplet/mse-triplet-b.yaml \
     --update configs/compare/5k_steps.yaml \
+    > logs/mse-triplet-b.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
+    --update configs/regression/modal.yaml \
+    --update configs/regression/triplet/mse-triplet-c.yaml \
     --update configs/compare/5k_steps.yaml \
+    > logs/mse-triplet-c.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
+    --update configs/regression/modal.yaml \
+    --update configs/regression/triplet/mse-triplet-d.yaml \
     --update configs/compare/5k_steps.yaml \
+    > logs/mse-triplet-d.log 2>&1 &
 nohup python scripts/train.py configs/base.yaml --where modal \
     --update configs/regression/base.yaml \
+    --update configs/regression/modal.yaml \
+    --update configs/regression/triplet/mse-triplet-e.yaml \
     --update configs/compare/5k_steps.yaml \
+    > logs/mse-triplet-e.log 2>&1 &