Upload config.py with huggingface_hub
Browse files
config.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Text-Conditional Diffusion Configuration
|
| 2 |
+
IMAGE_SIZE = 64
|
| 3 |
+
BATCH_SIZE = 128 # 32 per GPU across 4 GPUs
|
| 4 |
+
NUM_EPOCHS = 100 # Longer training for better text conditioning
|
| 5 |
+
LEARNING_RATE = 1e-4
|
| 6 |
+
TIMESTEPS = 1000
|
| 7 |
+
|
| 8 |
+
# Model architecture
|
| 9 |
+
CHANNELS = 256 # Maximum capacity for best feature learning
|
| 10 |
+
TIME_DIM = 128
|
| 11 |
+
TEXT_DIM = 512 # CLIP embedding dimension
|
| 12 |
+
|
| 13 |
+
# Text encoder
|
| 14 |
+
CLIP_MODEL = "openai/clip-vit-base-patch32"
|
| 15 |
+
FREEZE_CLIP = True # Freeze CLIP weights during training
|
| 16 |
+
|
| 17 |
+
# Classifier-free guidance
|
| 18 |
+
CFG_DROP_PROB = 0.15 # Increased to 15% for stronger conditioning contrast
|
| 19 |
+
CFG_GUIDANCE_SCALE = 5.0 # Higher default guidance scale
|
| 20 |
+
|
| 21 |
+
# Data
|
| 22 |
+
DATASET_NAME = "Xenova/quickdraw-small"
|
| 23 |
+
MAX_SAMPLES = None # Set to None to use all samples, or specify a number
|
| 24 |
+
NUM_CLASSES_FILTER = 5 # Small set for clear proof-of-concept
|
| 25 |
+
NUM_CLASSES = None # Will be set after loading dataset
|