Spaces:
Sleeping
Sleeping
Upload 2 files
Browse fileslocal git upload fails so uploading
- app.py +107 -103
- inference.py +27 -12
app.py
CHANGED
|
@@ -34,98 +34,79 @@ model = GenerativeInferenceModel()
|
|
| 34 |
# Define example images and their parameters with updated values from the research
|
| 35 |
examples = [
|
| 36 |
{
|
| 37 |
-
"image": os.path.join("stimuli", "
|
| 38 |
-
"name": "
|
| 39 |
-
"wiki": "https://en.wikipedia.org/wiki/
|
| 40 |
-
"papers": [
|
| 41 |
-
"[Gestalt Psychology](https://en.wikipedia.org/wiki/Gestalt_psychology)",
|
| 42 |
-
"[Neural Mechanisms](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 43 |
-
],
|
| 44 |
-
"method": "ReverseDiffusion",
|
| 45 |
-
"reverse_diff": {
|
| 46 |
-
"model": "resnet50_robust",
|
| 47 |
-
"layer": "layer4", # last layer
|
| 48 |
-
"initial_noise": 0.1,
|
| 49 |
-
"diffusion_noise": 0.003, # Corrected parameter name
|
| 50 |
-
"step_size": 0.5, # Step size (learning rate parameter)
|
| 51 |
-
"iterations": 50, # Number of iterations
|
| 52 |
-
"epsilon": 0.5
|
| 53 |
-
}
|
| 54 |
-
},
|
| 55 |
-
{
|
| 56 |
-
"image": os.path.join("stimuli", "face_vase.png"),
|
| 57 |
-
"name": "Rubin's Face-Vase (Object Prior)",
|
| 58 |
-
"wiki": "https://en.wikipedia.org/wiki/Rubin_vase",
|
| 59 |
"papers": [
|
| 60 |
-
"[
|
| 61 |
-
"[
|
| 62 |
],
|
| 63 |
-
"method": "
|
| 64 |
"reverse_diff": {
|
| 65 |
"model": "resnet50_robust",
|
| 66 |
-
"layer": "
|
| 67 |
-
"initial_noise": 0.
|
| 68 |
-
"diffusion_noise": 0.
|
| 69 |
-
"step_size": 1.0,
|
| 70 |
-
"iterations":
|
| 71 |
-
"epsilon":
|
| 72 |
}
|
| 73 |
},
|
| 74 |
{
|
| 75 |
-
"image": os.path.join("stimuli", "
|
| 76 |
-
"name": "
|
| 77 |
-
"wiki": "https://en.wikipedia.org/wiki/
|
| 78 |
"papers": [
|
| 79 |
-
"[Gestalt
|
| 80 |
-
"[
|
| 81 |
],
|
| 82 |
-
"method": "
|
| 83 |
"reverse_diff": {
|
| 84 |
"model": "resnet50_robust",
|
| 85 |
-
"layer": "
|
| 86 |
-
"initial_noise": 0.
|
| 87 |
-
"diffusion_noise": 0.005,
|
| 88 |
-
"step_size": 0.
|
| 89 |
-
"iterations":
|
| 90 |
-
"epsilon": 0
|
| 91 |
}
|
| 92 |
},
|
| 93 |
{
|
| 94 |
-
"image": os.path.join("stimuli", "
|
| 95 |
-
"name": "
|
| 96 |
-
"wiki": "https://en.wikipedia.org/wiki/
|
| 97 |
"papers": [
|
| 98 |
-
"[
|
| 99 |
-
"[
|
| 100 |
],
|
| 101 |
-
"method": "
|
| 102 |
"reverse_diff": {
|
| 103 |
"model": "resnet50_robust",
|
| 104 |
"layer": "layer3",
|
| 105 |
"initial_noise": 0.5,
|
| 106 |
-
"diffusion_noise": 0.
|
| 107 |
-
"step_size":
|
| 108 |
-
"iterations":
|
| 109 |
-
"epsilon":
|
| 110 |
}
|
| 111 |
},
|
| 112 |
{
|
| 113 |
-
"image": os.path.join("stimuli", "
|
| 114 |
-
"name": "
|
| 115 |
-
"wiki": "https://en.wikipedia.org/wiki/
|
| 116 |
"papers": [
|
| 117 |
-
"[
|
| 118 |
-
"[
|
| 119 |
],
|
| 120 |
-
"method": "
|
| 121 |
"reverse_diff": {
|
| 122 |
"model": "resnet50_robust",
|
| 123 |
-
"layer": "
|
| 124 |
"initial_noise": 0.5,
|
| 125 |
-
"diffusion_noise": 0.
|
| 126 |
-
"step_size": 0.
|
| 127 |
-
"iterations":
|
| 128 |
-
"epsilon": 0
|
| 129 |
}
|
| 130 |
},
|
| 131 |
{
|
|
@@ -136,34 +117,34 @@ examples = [
|
|
| 136 |
"[Color Perception](https://doi.org/10.1016/j.visres.2000.200.1)",
|
| 137 |
"[Context Effects](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 138 |
],
|
| 139 |
-
"method": "
|
| 140 |
"reverse_diff": {
|
| 141 |
"model": "resnet50_robust",
|
| 142 |
"layer": "layer3",
|
| 143 |
-
"initial_noise": 0.
|
| 144 |
-
"diffusion_noise": 0.
|
| 145 |
-
"step_size":
|
| 146 |
-
"iterations":
|
| 147 |
-
"epsilon":
|
| 148 |
}
|
| 149 |
},
|
| 150 |
{
|
| 151 |
-
"image": os.path.join("stimuli", "
|
| 152 |
-
"name": "
|
| 153 |
-
"wiki": "https://en.wikipedia.org/wiki/
|
| 154 |
"papers": [
|
| 155 |
-
"[
|
| 156 |
-
"[
|
| 157 |
],
|
| 158 |
-
"method": "
|
| 159 |
"reverse_diff": {
|
| 160 |
"model": "resnet50_robust",
|
| 161 |
"layer": "layer3",
|
| 162 |
"initial_noise": 0.5,
|
| 163 |
-
"diffusion_noise": 0.005,
|
| 164 |
-
"step_size": 0.8,
|
| 165 |
-
"iterations":
|
| 166 |
-
"epsilon": 0
|
| 167 |
}
|
| 168 |
},
|
| 169 |
{
|
|
@@ -174,15 +155,34 @@ examples = [
|
|
| 174 |
"[Gestalt Principles](https://en.wikipedia.org/wiki/Gestalt_psychology)",
|
| 175 |
"[Visual Organization](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 176 |
],
|
| 177 |
-
"method": "
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 178 |
"reverse_diff": {
|
| 179 |
"model": "resnet50_robust",
|
| 180 |
"layer": "layer3",
|
| 181 |
"initial_noise": 0.1,
|
| 182 |
-
"diffusion_noise": 0.
|
| 183 |
-
"step_size": 0.
|
| 184 |
-
"iterations":
|
| 185 |
-
"epsilon": 0
|
| 186 |
}
|
| 187 |
}
|
| 188 |
]
|
|
@@ -190,14 +190,18 @@ examples = [
|
|
| 190 |
@GPU
|
| 191 |
def run_inference(image, model_type, inference_type, eps_value, num_iterations,
|
| 192 |
initial_noise=0.05, diffusion_noise=0.3, step_size=0.8, model_layer="layer3"):
|
|
|
|
|
|
|
|
|
|
|
|
|
| 193 |
# Convert eps to float
|
| 194 |
eps = float(eps_value)
|
| 195 |
|
| 196 |
# Load inference configuration based on the selected type
|
| 197 |
config = get_inference_configs(inference_type=inference_type, eps=eps, n_itr=int(num_iterations))
|
| 198 |
|
| 199 |
-
# Handle
|
| 200 |
-
if inference_type == "
|
| 201 |
config['initial_inference_noise_ratio'] = float(initial_noise)
|
| 202 |
config['diffusion_noise_ratio'] = float(diffusion_noise)
|
| 203 |
config['step_size'] = float(step_size) # Added step size parameter
|
|
@@ -247,6 +251,13 @@ with gr.Blocks(title="Generative Inference Demo") as demo:
|
|
| 247 |
gr.Markdown("# Generative Inference Demo")
|
| 248 |
gr.Markdown("This demo showcases how neural networks can perceive visual illusions through generative inference.")
|
| 249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
# Main processing interface
|
| 251 |
with gr.Row():
|
| 252 |
with gr.Column(scale=1):
|
|
@@ -261,14 +272,14 @@ with gr.Blocks(title="Generative Inference Demo") as demo:
|
|
| 261 |
)
|
| 262 |
|
| 263 |
inference_type = gr.Dropdown(
|
| 264 |
-
choices=["
|
| 265 |
-
value="
|
| 266 |
label="Inference Method"
|
| 267 |
)
|
| 268 |
|
| 269 |
with gr.Row():
|
| 270 |
eps_slider = gr.Slider(minimum=0.01, maximum=3.0, value=0.5, step=0.01, label="Epsilon (Perturbation Size)")
|
| 271 |
-
iterations_slider = gr.Slider(minimum=1, maximum=
|
| 272 |
|
| 273 |
with gr.Row():
|
| 274 |
initial_noise_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.05, step=0.01,
|
|
@@ -321,11 +332,7 @@ with gr.Blocks(title="Generative Inference Demo") as demo:
|
|
| 321 |
gr.Markdown(f"### {ex['name']}")
|
| 322 |
gr.Markdown(f"[Read more on Wikipedia]({ex['wiki']})")
|
| 323 |
|
| 324 |
-
gr.Markdown("**
|
| 325 |
-
papers_list = "\n".join([f"- {paper}" for paper in ex["papers"]])
|
| 326 |
-
gr.Markdown(papers_list)
|
| 327 |
-
|
| 328 |
-
gr.Markdown("**Research Parameters:**")
|
| 329 |
params_md = f"""
|
| 330 |
- **Method**: {ex['method']}
|
| 331 |
- **Model Layer**: {ex['reverse_diff']['layer']}
|
|
@@ -356,15 +363,13 @@ with gr.Blocks(title="Generative Inference Demo") as demo:
|
|
| 356 |
gr.Markdown("""
|
| 357 |
## About Generative Inference
|
| 358 |
|
| 359 |
-
Generative inference is a technique that reveals how neural networks perceive visual stimuli. This demo primarily uses the
|
| 360 |
|
| 361 |
-
###
|
| 362 |
-
|
| 363 |
-
This approach reveals different aspects of visual processing and is inspired by diffusion models.
|
| 364 |
|
| 365 |
### IncreaseConfidence
|
| 366 |
-
|
| 367 |
-
features that the network associates with its preferred interpretation.
|
| 368 |
|
| 369 |
### Parameters:
|
| 370 |
- **Initial Noise Ratio**: Controls the amount of noise added to the image at the beginning
|
|
@@ -374,8 +379,7 @@ with gr.Blocks(title="Generative Inference Demo") as demo:
|
|
| 374 |
- **Model Layer**: Select a specific layer of the ResNet50 model to extract features from
|
| 375 |
- **Epsilon**: Controls the size of perturbation during optimization
|
| 376 |
|
| 377 |
-
|
| 378 |
-
like edges and textures, while later layers represent higher-level features and object parts.
|
| 379 |
""")
|
| 380 |
|
| 381 |
# Launch the demo
|
|
|
|
| 34 |
# Define example images and their parameters with updated values from the research
|
| 35 |
examples = [
|
| 36 |
{
|
| 37 |
+
"image": os.path.join("stimuli", "Neon_Color_Circle.jpg"),
|
| 38 |
+
"name": "Neon Color Spreading",
|
| 39 |
+
"wiki": "https://en.wikipedia.org/wiki/Neon_color_spreading",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
"papers": [
|
| 41 |
+
"[Color Assimilation](https://doi.org/10.1016/j.visres.2000.200.1)",
|
| 42 |
+
"[Perceptual Filling-in](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 43 |
],
|
| 44 |
+
"method": "Prior-Guided Drift Diffusion",
|
| 45 |
"reverse_diff": {
|
| 46 |
"model": "resnet50_robust",
|
| 47 |
+
"layer": "layer3",
|
| 48 |
+
"initial_noise": 0.8,
|
| 49 |
+
"diffusion_noise": 0.003,
|
| 50 |
+
"step_size": 1.0,
|
| 51 |
+
"iterations": 101,
|
| 52 |
+
"epsilon": 20.0
|
| 53 |
}
|
| 54 |
},
|
| 55 |
{
|
| 56 |
+
"image": os.path.join("stimuli", "Kanizsa_square.jpg"),
|
| 57 |
+
"name": "Kanizsa Square",
|
| 58 |
+
"wiki": "https://en.wikipedia.org/wiki/Kanizsa_triangle",
|
| 59 |
"papers": [
|
| 60 |
+
"[Gestalt Psychology](https://en.wikipedia.org/wiki/Gestalt_psychology)",
|
| 61 |
+
"[Neural Mechanisms](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 62 |
],
|
| 63 |
+
"method": "Prior-Guided Drift Diffusion",
|
| 64 |
"reverse_diff": {
|
| 65 |
"model": "resnet50_robust",
|
| 66 |
+
"layer": "all",
|
| 67 |
+
"initial_noise": 0.0,
|
| 68 |
+
"diffusion_noise": 0.005,
|
| 69 |
+
"step_size": 0.64,
|
| 70 |
+
"iterations": 100,
|
| 71 |
+
"epsilon": 5.0
|
| 72 |
}
|
| 73 |
},
|
| 74 |
{
|
| 75 |
+
"image": os.path.join("stimuli", "CornsweetBlock.png"),
|
| 76 |
+
"name": "Cornsweet Illusion",
|
| 77 |
+
"wiki": "https://en.wikipedia.org/wiki/Cornsweet_illusion",
|
| 78 |
"papers": [
|
| 79 |
+
"[Brightness Perception](https://doi.org/10.1016/j.visres.2000.200.1)",
|
| 80 |
+
"[Edge Effects](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 81 |
],
|
| 82 |
+
"method": "Prior-Guided Drift Diffusion",
|
| 83 |
"reverse_diff": {
|
| 84 |
"model": "resnet50_robust",
|
| 85 |
"layer": "layer3",
|
| 86 |
"initial_noise": 0.5,
|
| 87 |
+
"diffusion_noise": 0.005,
|
| 88 |
+
"step_size": 0.8,
|
| 89 |
+
"iterations": 51,
|
| 90 |
+
"epsilon": 20.0
|
| 91 |
}
|
| 92 |
},
|
| 93 |
{
|
| 94 |
+
"image": os.path.join("stimuli", "face_vase.png"),
|
| 95 |
+
"name": "Rubin's Face-Vase (Object Prior)",
|
| 96 |
+
"wiki": "https://en.wikipedia.org/wiki/Rubin_vase",
|
| 97 |
"papers": [
|
| 98 |
+
"[Figure-Ground Perception](https://en.wikipedia.org/wiki/Figure-ground_(perception))",
|
| 99 |
+
"[Bistable Perception](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 100 |
],
|
| 101 |
+
"method": "Prior-Guided Drift Diffusion",
|
| 102 |
"reverse_diff": {
|
| 103 |
"model": "resnet50_robust",
|
| 104 |
+
"layer": "layer4",
|
| 105 |
"initial_noise": 0.5,
|
| 106 |
+
"diffusion_noise": 0.01,
|
| 107 |
+
"step_size": 0.2,
|
| 108 |
+
"iterations": 301,
|
| 109 |
+
"epsilon": 40.0
|
| 110 |
}
|
| 111 |
},
|
| 112 |
{
|
|
|
|
| 117 |
"[Color Perception](https://doi.org/10.1016/j.visres.2000.200.1)",
|
| 118 |
"[Context Effects](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 119 |
],
|
| 120 |
+
"method": "Prior-Guided Drift Diffusion",
|
| 121 |
"reverse_diff": {
|
| 122 |
"model": "resnet50_robust",
|
| 123 |
"layer": "layer3",
|
| 124 |
+
"initial_noise": 0.1,
|
| 125 |
+
"diffusion_noise": 0.003,
|
| 126 |
+
"step_size": 0.5,
|
| 127 |
+
"iterations": 101,
|
| 128 |
+
"epsilon": 20.0
|
| 129 |
}
|
| 130 |
},
|
| 131 |
{
|
| 132 |
+
"image": os.path.join("stimuli", "EhresteinSingleColor.png"),
|
| 133 |
+
"name": "Ehrenstein Illusion",
|
| 134 |
+
"wiki": "https://en.wikipedia.org/wiki/Ehrenstein_illusion",
|
| 135 |
"papers": [
|
| 136 |
+
"[Subjective Contours](https://doi.org/10.1016/j.visres.2000.200.1)",
|
| 137 |
+
"[Neural Processing](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 138 |
],
|
| 139 |
+
"method": "Prior-Guided Drift Diffusion",
|
| 140 |
"reverse_diff": {
|
| 141 |
"model": "resnet50_robust",
|
| 142 |
"layer": "layer3",
|
| 143 |
"initial_noise": 0.5,
|
| 144 |
+
"diffusion_noise": 0.005,
|
| 145 |
+
"step_size": 0.8,
|
| 146 |
+
"iterations": 101,
|
| 147 |
+
"epsilon": 20.0
|
| 148 |
}
|
| 149 |
},
|
| 150 |
{
|
|
|
|
| 155 |
"[Gestalt Principles](https://en.wikipedia.org/wiki/Gestalt_psychology)",
|
| 156 |
"[Visual Organization](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 157 |
],
|
| 158 |
+
"method": "Prior-Guided Drift Diffusion",
|
| 159 |
+
"reverse_diff": {
|
| 160 |
+
"model": "resnet50_robust",
|
| 161 |
+
"layer": "layer3",
|
| 162 |
+
"initial_noise": 0.0,
|
| 163 |
+
"diffusion_noise": 0.005,
|
| 164 |
+
"step_size": 0.4,
|
| 165 |
+
"iterations": 101,
|
| 166 |
+
"epsilon": 4.0
|
| 167 |
+
}
|
| 168 |
+
},
|
| 169 |
+
{
|
| 170 |
+
"image": os.path.join("stimuli", "figure_ground.png"),
|
| 171 |
+
"name": "Figure-Ground Illusion",
|
| 172 |
+
"wiki": "https://en.wikipedia.org/wiki/Figure-ground_(perception)",
|
| 173 |
+
"papers": [
|
| 174 |
+
"[Gestalt Principles](https://en.wikipedia.org/wiki/Gestalt_psychology)",
|
| 175 |
+
"[Perceptual Organization](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 176 |
+
],
|
| 177 |
+
"method": "Prior-Guided Drift Diffusion",
|
| 178 |
"reverse_diff": {
|
| 179 |
"model": "resnet50_robust",
|
| 180 |
"layer": "layer3",
|
| 181 |
"initial_noise": 0.1,
|
| 182 |
+
"diffusion_noise": 0.003,
|
| 183 |
+
"step_size": 0.5,
|
| 184 |
+
"iterations": 101,
|
| 185 |
+
"epsilon": 3.0
|
| 186 |
}
|
| 187 |
}
|
| 188 |
]
|
|
|
|
| 190 |
@GPU
|
| 191 |
def run_inference(image, model_type, inference_type, eps_value, num_iterations,
|
| 192 |
initial_noise=0.05, diffusion_noise=0.3, step_size=0.8, model_layer="layer3"):
|
| 193 |
+
# Check if image is provided
|
| 194 |
+
if image is None:
|
| 195 |
+
return None, "Please upload an image before running inference."
|
| 196 |
+
|
| 197 |
# Convert eps to float
|
| 198 |
eps = float(eps_value)
|
| 199 |
|
| 200 |
# Load inference configuration based on the selected type
|
| 201 |
config = get_inference_configs(inference_type=inference_type, eps=eps, n_itr=int(num_iterations))
|
| 202 |
|
| 203 |
+
# Handle Prior-Guided Drift Diffusion specific parameters
|
| 204 |
+
if inference_type == "Prior-Guided Drift Diffusion":
|
| 205 |
config['initial_inference_noise_ratio'] = float(initial_noise)
|
| 206 |
config['diffusion_noise_ratio'] = float(diffusion_noise)
|
| 207 |
config['step_size'] = float(step_size) # Added step size parameter
|
|
|
|
| 251 |
gr.Markdown("# Generative Inference Demo")
|
| 252 |
gr.Markdown("This demo showcases how neural networks can perceive visual illusions through generative inference.")
|
| 253 |
|
| 254 |
+
gr.Markdown("""
|
| 255 |
+
**How to use this demo:**
|
| 256 |
+
- **Load pre-configured examples**: Click on any visual illusion below and hit "Load Parameters" to automatically set up the optimal parameters for that illusion
|
| 257 |
+
- **Upload your own images**: Use the image upload area to test your own images with different parameter settings
|
| 258 |
+
- **Experiment with parameters**: Adjust the inference method, iterations, noise levels, and other parameters to see how they affect the generative inference process
|
| 259 |
+
""")
|
| 260 |
+
|
| 261 |
# Main processing interface
|
| 262 |
with gr.Row():
|
| 263 |
with gr.Column(scale=1):
|
|
|
|
| 272 |
)
|
| 273 |
|
| 274 |
inference_type = gr.Dropdown(
|
| 275 |
+
choices=["Prior-Guided Drift Diffusion", "IncreaseConfidence"],
|
| 276 |
+
value="Prior-Guided Drift Diffusion",
|
| 277 |
label="Inference Method"
|
| 278 |
)
|
| 279 |
|
| 280 |
with gr.Row():
|
| 281 |
eps_slider = gr.Slider(minimum=0.01, maximum=3.0, value=0.5, step=0.01, label="Epsilon (Perturbation Size)")
|
| 282 |
+
iterations_slider = gr.Slider(minimum=1, maximum=600, value=50, step=1, label="Number of Iterations") # Updated max to 600
|
| 283 |
|
| 284 |
with gr.Row():
|
| 285 |
initial_noise_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.05, step=0.01,
|
|
|
|
| 332 |
gr.Markdown(f"### {ex['name']}")
|
| 333 |
gr.Markdown(f"[Read more on Wikipedia]({ex['wiki']})")
|
| 334 |
|
| 335 |
+
gr.Markdown("**Generative Inference Parameters:**")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 336 |
params_md = f"""
|
| 337 |
- **Method**: {ex['method']}
|
| 338 |
- **Model Layer**: {ex['reverse_diff']['layer']}
|
|
|
|
| 363 |
gr.Markdown("""
|
| 364 |
## About Generative Inference
|
| 365 |
|
| 366 |
+
Generative inference is a technique that reveals how neural networks perceive visual stimuli. This demo primarily uses the Prior-Guided Drift Diffusion method.
|
| 367 |
|
| 368 |
+
### Prior-Guided Drift Diffusion
|
| 369 |
+
Moving away from a noisy representation of the input images
|
|
|
|
| 370 |
|
| 371 |
### IncreaseConfidence
|
| 372 |
+
Moving away from the least likely class identified at iteration 0 (fast perception)
|
|
|
|
| 373 |
|
| 374 |
### Parameters:
|
| 375 |
- **Initial Noise Ratio**: Controls the amount of noise added to the image at the beginning
|
|
|
|
| 379 |
- **Model Layer**: Select a specific layer of the ResNet50 model to extract features from
|
| 380 |
- **Epsilon**: Controls the size of perturbation during optimization
|
| 381 |
|
| 382 |
+
**Generative Inference was developed by [Tahereh Toosi](https://toosi.github.io).**
|
|
|
|
| 383 |
""")
|
| 384 |
|
| 385 |
# Launch the demo
|
inference.py
CHANGED
|
@@ -213,11 +213,26 @@ class InferStep:
|
|
| 213 |
scaled_grad = grad / (grad_norm + 1e-10)
|
| 214 |
return scaled_grad * self.step_size
|
| 215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
def get_inference_configs(inference_type='IncreaseConfidence', eps=0.5, n_itr=50, step_size=1.0):
|
| 217 |
"""Generate inference configuration with customizable parameters.
|
| 218 |
|
| 219 |
Args:
|
| 220 |
-
inference_type (str): Type of inference ('IncreaseConfidence' or '
|
| 221 |
eps (float): Maximum perturbation size
|
| 222 |
n_itr (int): Number of iterations
|
| 223 |
step_size (float): Step size for each iteration
|
|
@@ -234,7 +249,7 @@ def get_inference_configs(inference_type='IncreaseConfidence', eps=0.5, n_itr=50
|
|
| 234 |
'top_layer': 'all', # Use all layers of the model
|
| 235 |
'inference_normalization': False, # Apply normalization during inference
|
| 236 |
'recognition_normalization': False, # Apply normalization during recognition
|
| 237 |
-
'iterations_to_show':
|
| 238 |
'misc_info': {'keep_grads': False} # Additional configuration
|
| 239 |
}
|
| 240 |
|
|
@@ -242,7 +257,7 @@ def get_inference_configs(inference_type='IncreaseConfidence', eps=0.5, n_itr=50
|
|
| 242 |
if inference_type == 'IncreaseConfidence':
|
| 243 |
config['loss_function'] = 'CE' # Cross Entropy
|
| 244 |
|
| 245 |
-
elif inference_type == '
|
| 246 |
config['loss_function'] = 'MSE' # Mean Square Error
|
| 247 |
config['initial_inference_noise_ratio'] = 0.05 # Initial noise for diffusion
|
| 248 |
config['diffusion_noise_ratio'] = 0.01 # Add noise during diffusion
|
|
@@ -723,11 +738,11 @@ class GenerativeInferenceModel:
|
|
| 723 |
x = image_tensor.clone().detach().requires_grad_(True)
|
| 724 |
all_steps = [image_tensor[0].detach().cpu()]
|
| 725 |
|
| 726 |
-
# For
|
| 727 |
noisy_features = None
|
| 728 |
layer_model = None
|
| 729 |
-
if config['loss_infer'] == '
|
| 730 |
-
print(f"Setting up
|
| 731 |
|
| 732 |
# Extract model up to the specified layer
|
| 733 |
try:
|
|
@@ -774,7 +789,7 @@ class GenerativeInferenceModel:
|
|
| 774 |
# Compute noisy features - simplified to match original code
|
| 775 |
noisy_features = layer_model(noisy_image_tensor)
|
| 776 |
|
| 777 |
-
print(f"Noisy features computed for
|
| 778 |
|
| 779 |
# Main inference loop
|
| 780 |
print(f"Starting inference loop with {config['n_itr']} iterations for {config['loss_infer']}...")
|
|
@@ -783,9 +798,9 @@ class GenerativeInferenceModel:
|
|
| 783 |
# Reset gradients
|
| 784 |
x.grad = None
|
| 785 |
|
| 786 |
-
# Forward pass - use layer_model for
|
| 787 |
-
if config['loss_infer'] == '
|
| 788 |
-
# Use the extracted layer model for
|
| 789 |
# In original code, normalization is handled at transform time, not during forward pass
|
| 790 |
output = layer_model(x)
|
| 791 |
else:
|
|
@@ -795,14 +810,14 @@ class GenerativeInferenceModel:
|
|
| 795 |
|
| 796 |
# Calculate loss and gradients based on inference type
|
| 797 |
try:
|
| 798 |
-
if config['loss_infer'] == '
|
| 799 |
# Use MSE loss to match the noisy features
|
| 800 |
assert config['loss_function'] == 'MSE', "Reverse Diffusion loss function must be MSE"
|
| 801 |
if noisy_features is not None:
|
| 802 |
loss = F.mse_loss(output, noisy_features)
|
| 803 |
grad = torch.autograd.grad(loss, x)[0] # Removed retain_graph=True to match original
|
| 804 |
else:
|
| 805 |
-
raise ValueError("Noisy features not computed for
|
| 806 |
|
| 807 |
else: # Default 'IncreaseConfidence' approach
|
| 808 |
# Get the least confident classes
|
|
|
|
| 213 |
scaled_grad = grad / (grad_norm + 1e-10)
|
| 214 |
return scaled_grad * self.step_size
|
| 215 |
|
| 216 |
+
def get_iterations_to_show(n_itr):
|
| 217 |
+
"""Generate a dynamic list of iterations to show based on total iterations."""
|
| 218 |
+
if n_itr <= 50:
|
| 219 |
+
return [1, 5, 10, 20, 30, 40, 50, n_itr]
|
| 220 |
+
elif n_itr <= 100:
|
| 221 |
+
return [1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, n_itr]
|
| 222 |
+
elif n_itr <= 200:
|
| 223 |
+
return [1, 5, 10, 20, 30, 40, 50, 75, 100, 125, 150, 175, 200, n_itr]
|
| 224 |
+
elif n_itr <= 500:
|
| 225 |
+
return [1, 5, 10, 20, 30, 40, 50, 75, 100, 150, 200, 250, 300, 350, 400, 450, 500, n_itr]
|
| 226 |
+
else:
|
| 227 |
+
# For very large iterations, show more evenly distributed points
|
| 228 |
+
return [1, 5, 10, 20, 30, 40, 50, 75, 100, 150, 200, 250, 300, 350, 400, 450, 500,
|
| 229 |
+
int(n_itr*0.6), int(n_itr*0.7), int(n_itr*0.8), int(n_itr*0.9), n_itr]
|
| 230 |
+
|
| 231 |
def get_inference_configs(inference_type='IncreaseConfidence', eps=0.5, n_itr=50, step_size=1.0):
|
| 232 |
"""Generate inference configuration with customizable parameters.
|
| 233 |
|
| 234 |
Args:
|
| 235 |
+
inference_type (str): Type of inference ('IncreaseConfidence' or 'Prior-Guided Drift Diffusion')
|
| 236 |
eps (float): Maximum perturbation size
|
| 237 |
n_itr (int): Number of iterations
|
| 238 |
step_size (float): Step size for each iteration
|
|
|
|
| 249 |
'top_layer': 'all', # Use all layers of the model
|
| 250 |
'inference_normalization': False, # Apply normalization during inference
|
| 251 |
'recognition_normalization': False, # Apply normalization during recognition
|
| 252 |
+
'iterations_to_show': get_iterations_to_show(n_itr), # Dynamic iterations to visualize
|
| 253 |
'misc_info': {'keep_grads': False} # Additional configuration
|
| 254 |
}
|
| 255 |
|
|
|
|
| 257 |
if inference_type == 'IncreaseConfidence':
|
| 258 |
config['loss_function'] = 'CE' # Cross Entropy
|
| 259 |
|
| 260 |
+
elif inference_type == 'Prior-Guided Drift Diffusion':
|
| 261 |
config['loss_function'] = 'MSE' # Mean Square Error
|
| 262 |
config['initial_inference_noise_ratio'] = 0.05 # Initial noise for diffusion
|
| 263 |
config['diffusion_noise_ratio'] = 0.01 # Add noise during diffusion
|
|
|
|
| 738 |
x = image_tensor.clone().detach().requires_grad_(True)
|
| 739 |
all_steps = [image_tensor[0].detach().cpu()]
|
| 740 |
|
| 741 |
+
# For Prior-Guided Drift Diffusion, extract selected layer and initialize with noisy features
|
| 742 |
noisy_features = None
|
| 743 |
layer_model = None
|
| 744 |
+
if config['loss_infer'] == 'Prior-Guided Drift Diffusion':
|
| 745 |
+
print(f"Setting up Prior-Guided Drift Diffusion with layer {config['top_layer']} and noise {config['initial_inference_noise_ratio']}...")
|
| 746 |
|
| 747 |
# Extract model up to the specified layer
|
| 748 |
try:
|
|
|
|
| 789 |
# Compute noisy features - simplified to match original code
|
| 790 |
noisy_features = layer_model(noisy_image_tensor)
|
| 791 |
|
| 792 |
+
print(f"Noisy features computed for Prior-Guided Drift Diffusion target with shape: {noisy_features.shape if hasattr(noisy_features, 'shape') else 'unknown'}")
|
| 793 |
|
| 794 |
# Main inference loop
|
| 795 |
print(f"Starting inference loop with {config['n_itr']} iterations for {config['loss_infer']}...")
|
|
|
|
| 798 |
# Reset gradients
|
| 799 |
x.grad = None
|
| 800 |
|
| 801 |
+
# Forward pass - use layer_model for Prior-Guided Drift Diffusion, full model otherwise
|
| 802 |
+
if config['loss_infer'] == 'Prior-Guided Drift Diffusion' and layer_model is not None:
|
| 803 |
+
# Use the extracted layer model for Prior-Guided Drift Diffusion
|
| 804 |
# In original code, normalization is handled at transform time, not during forward pass
|
| 805 |
output = layer_model(x)
|
| 806 |
else:
|
|
|
|
| 810 |
|
| 811 |
# Calculate loss and gradients based on inference type
|
| 812 |
try:
|
| 813 |
+
if config['loss_infer'] == 'Prior-Guided Drift Diffusion':
|
| 814 |
# Use MSE loss to match the noisy features
|
| 815 |
assert config['loss_function'] == 'MSE', "Reverse Diffusion loss function must be MSE"
|
| 816 |
if noisy_features is not None:
|
| 817 |
loss = F.mse_loss(output, noisy_features)
|
| 818 |
grad = torch.autograd.grad(loss, x)[0] # Removed retain_graph=True to match original
|
| 819 |
else:
|
| 820 |
+
raise ValueError("Noisy features not computed for Prior-Guided Drift Diffusion")
|
| 821 |
|
| 822 |
else: # Default 'IncreaseConfidence' approach
|
| 823 |
# Get the least confident classes
|