Spaces:
Running
on
Zero
Running
on
Zero
Added new illusions and edited the format
Browse files
app.py
CHANGED
|
@@ -22,28 +22,185 @@ args = parser.parse_args()
|
|
| 22 |
os.makedirs("models", exist_ok=True)
|
| 23 |
os.makedirs("stimuli", exist_ok=True)
|
| 24 |
|
| 25 |
-
# Check if running on Hugging Face Spaces
|
| 26 |
if "SPACE_ID" in os.environ:
|
| 27 |
-
default_port = int(os.environ.get("PORT", 7860))
|
| 28 |
else:
|
| 29 |
default_port = 8861 # Local default port
|
| 30 |
|
| 31 |
# Initialize model
|
| 32 |
model = GenerativeInferenceModel()
|
| 33 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 34 |
@GPU
|
| 35 |
def run_inference(image, model_type, inference_type, eps_value, num_iterations,
|
| 36 |
-
|
| 37 |
# Convert eps to float
|
| 38 |
eps = float(eps_value)
|
| 39 |
|
| 40 |
# Load inference configuration based on the selected type
|
| 41 |
-
config = get_inference_configs(inference_type=inference_type, eps=eps, n_itr=int(num_iterations)
|
| 42 |
|
| 43 |
# Handle ReverseDiffusion specific parameters
|
| 44 |
if inference_type == "ReverseDiffusion":
|
| 45 |
config['initial_inference_noise_ratio'] = float(initial_noise)
|
| 46 |
-
config['diffusion_noise_ratio'] = float(
|
|
|
|
| 47 |
config['top_layer'] = model_layer
|
| 48 |
|
| 49 |
# Run generative inference
|
|
@@ -71,148 +228,162 @@ def run_inference(image, model_type, inference_type, eps_value, num_iterations,
|
|
| 71 |
# Return the final inferred image and the animation frames directly
|
| 72 |
return final_image, frames
|
| 73 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 74 |
# Define the interface
|
| 75 |
with gr.Blocks(title="Generative Inference Demo") as demo:
|
| 76 |
gr.Markdown("# Generative Inference Demo")
|
| 77 |
gr.Markdown("This demo showcases how neural networks can perceive visual illusions through generative inference.")
|
| 78 |
|
|
|
|
| 79 |
with gr.Row():
|
| 80 |
with gr.Column(scale=1):
|
| 81 |
# Inputs
|
| 82 |
-
image_input = gr.Image(label="
|
| 83 |
|
| 84 |
with gr.Row():
|
| 85 |
model_choice = gr.Dropdown(
|
| 86 |
-
choices=["
|
| 87 |
-
value="
|
| 88 |
label="Model"
|
| 89 |
)
|
| 90 |
|
| 91 |
inference_type = gr.Dropdown(
|
| 92 |
-
choices=["
|
| 93 |
-
value="
|
| 94 |
label="Inference Method"
|
| 95 |
)
|
| 96 |
|
| 97 |
with gr.Row():
|
| 98 |
-
eps_slider = gr.Slider(minimum=0.
|
| 99 |
-
iterations_slider = gr.Slider(minimum=1, maximum=
|
| 100 |
-
step_size_slider = gr.Slider(minimum=0.0, maximum=10.0, value=1.0, step=0.1, label="Step Size")
|
| 101 |
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
initial_noise_slider = gr.Slider(minimum=0.0, maximum=0.5, value=0.05, step=0.01,
|
| 105 |
label="Initial Noise Ratio")
|
| 106 |
-
|
| 107 |
-
|
| 108 |
|
| 109 |
-
with gr.Row(
|
|
|
|
|
|
|
| 110 |
layer_choice = gr.Dropdown(
|
| 111 |
choices=["all", "conv1", "bn1", "relu", "maxpool", "layer1", "layer2", "layer3", "layer4", "avgpool"],
|
| 112 |
value="all",
|
| 113 |
label="Model Layer"
|
| 114 |
)
|
| 115 |
|
| 116 |
-
|
| 117 |
-
def toggle_params(inference):
|
| 118 |
-
if inference == "ReverseDiffusion":
|
| 119 |
-
return gr.update(visible=True), gr.update(visible=True)
|
| 120 |
-
else:
|
| 121 |
-
return gr.update(visible=False), gr.update(visible=False)
|
| 122 |
-
|
| 123 |
-
inference_type.change(toggle_params, [inference_type], [diffusion_params, layer_params])
|
| 124 |
-
|
| 125 |
-
run_button = gr.Button("Run Inference")
|
| 126 |
|
| 127 |
with gr.Column(scale=2):
|
| 128 |
# Outputs
|
| 129 |
output_image = gr.Image(label="Final Inferred Image")
|
| 130 |
-
output_frames = gr.Gallery(label="Inference Steps", columns=
|
| 131 |
-
|
| 132 |
-
# Set up example images with default parameters for all inputs
|
| 133 |
-
examples = [
|
| 134 |
-
# IncreaseConfidence examples
|
| 135 |
-
[os.path.join("stimuli", "Kanizsa_square.jpg"), "robust_resnet50", "IncreaseConfidence",
|
| 136 |
-
0.5, 50, 1.0, 0.05, 0.01, "all"],
|
| 137 |
-
[os.path.join("stimuli", "face_vase.png"), "robust_resnet50", "IncreaseConfidence",
|
| 138 |
-
0.5, 50, 1.0, 0.05, 0.01, "all"],
|
| 139 |
-
[os.path.join("stimuli", "figure_ground.png"), "robust_resnet50", "IncreaseConfidence",
|
| 140 |
-
0.7, 100, 1.0, 0.05, 0.01, "all"],
|
| 141 |
-
|
| 142 |
-
# ReverseDiffusion examples with different layers and noise values
|
| 143 |
-
[os.path.join("stimuli", "Neon_Color_Circle.jpg"), "robust_resnet50", "ReverseDiffusion",
|
| 144 |
-
0.3, 80, 0.8, 0.05, 0.01, "all"],
|
| 145 |
-
[os.path.join("stimuli", "Kanizsa_square.jpg"), "robust_resnet50", "ReverseDiffusion",
|
| 146 |
-
0.5, 50, 0.8, 0.1, 0.02, "layer4"], # Using layer4 (high-level features)
|
| 147 |
-
[os.path.join("stimuli", "face_vase.png"), "robust_resnet50", "ReverseDiffusion",
|
| 148 |
-
0.4, 60, 0.8, 0.15, 0.03, "layer1"] # Using layer1 (lower-level features)
|
| 149 |
-
]
|
| 150 |
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
initial_noise_slider, step_noise_slider, layer_choice
|
| 155 |
-
])
|
| 156 |
|
| 157 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 158 |
run_button.click(
|
| 159 |
fn=run_inference,
|
| 160 |
inputs=[
|
| 161 |
image_input, model_choice, inference_type,
|
| 162 |
-
eps_slider, iterations_slider,
|
| 163 |
-
initial_noise_slider,
|
|
|
|
| 164 |
],
|
| 165 |
outputs=[output_image, output_frames]
|
| 166 |
)
|
| 167 |
|
| 168 |
-
#
|
| 169 |
gr.Markdown("""
|
| 170 |
## About Generative Inference
|
| 171 |
|
| 172 |
-
Generative inference is a technique that reveals how neural networks perceive visual stimuli. This demo
|
| 173 |
|
| 174 |
-
###
|
| 175 |
-
Optimizes the input to increase the network's confidence in its least confident predictions. This reveals how the
|
| 176 |
-
network perceives contours, figure-ground separation, and other visual phenomena similar to human perception.
|
| 177 |
-
|
| 178 |
-
### 2. ReverseDiffusion
|
| 179 |
Starts with a noisy version of the image and guides the optimization to match features of the noisy image.
|
| 180 |
-
This approach
|
|
|
|
|
|
|
|
|
|
|
|
|
| 181 |
|
| 182 |
-
|
| 183 |
- **Initial Noise Ratio**: Controls the amount of noise added to the image at the beginning
|
| 184 |
-
- **
|
| 185 |
-
- **
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
|
| 189 |
-
- `relu`: First ReLU activation
|
| 190 |
-
- `maxpool`: Max pooling layer
|
| 191 |
-
- `layer1`: First residual block
|
| 192 |
-
- `layer2`: Second residual block
|
| 193 |
-
- `layer3`: Third residual block
|
| 194 |
-
- `layer4`: Fourth residual block
|
| 195 |
-
- `avgpool`: Average pooling layer
|
| 196 |
|
| 197 |
Different layers capture different levels of abstraction - earlier layers represent low-level features
|
| 198 |
like edges and textures, while later layers represent higher-level features and object parts.
|
| 199 |
-
|
| 200 |
-
This demo allows you to:
|
| 201 |
-
1. Upload your own images or select from example images
|
| 202 |
-
2. Choose between inference methods (IncreaseConfidence or ReverseDiffusion)
|
| 203 |
-
3. Select between robust or standard ResNet50 models
|
| 204 |
-
4. Adjust parameters like perturbation size (epsilon) and number of iterations
|
| 205 |
-
5. For ReverseDiffusion, fine-tune noise levels and select specific model layers
|
| 206 |
-
6. Visualize how the perception emerges over time
|
| 207 |
""")
|
| 208 |
|
| 209 |
-
# Launch the demo
|
| 210 |
if __name__ == "__main__":
|
| 211 |
print(f"Starting server on port {args.port}")
|
| 212 |
-
# Simplified launch parameters
|
| 213 |
demo.launch(
|
| 214 |
-
server_name="0.0.0.0",
|
| 215 |
-
server_port=args.port,
|
| 216 |
share=False,
|
| 217 |
debug=True
|
| 218 |
-
)
|
|
|
|
| 22 |
os.makedirs("models", exist_ok=True)
|
| 23 |
os.makedirs("stimuli", exist_ok=True)
|
| 24 |
|
| 25 |
+
# Check if running on Hugging Face Spaces
|
| 26 |
if "SPACE_ID" in os.environ:
|
| 27 |
+
default_port = int(os.environ.get("PORT", 7860))
|
| 28 |
else:
|
| 29 |
default_port = 8861 # Local default port
|
| 30 |
|
| 31 |
# Initialize model
|
| 32 |
model = GenerativeInferenceModel()
|
| 33 |
|
| 34 |
+
# Define example images and their parameters with updated values from the research
|
| 35 |
+
examples = [
|
| 36 |
+
{
|
| 37 |
+
"image": os.path.join("stimuli", "Kanizsa_square.jpg"),
|
| 38 |
+
"name": "Kanizsa Square",
|
| 39 |
+
"wiki": "https://en.wikipedia.org/wiki/Kanizsa_triangle",
|
| 40 |
+
"papers": [
|
| 41 |
+
"[Gestalt Psychology](https://en.wikipedia.org/wiki/Gestalt_psychology)",
|
| 42 |
+
"[Neural Mechanisms](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 43 |
+
],
|
| 44 |
+
"method": "ReverseDiffusion",
|
| 45 |
+
"reverse_diff": {
|
| 46 |
+
"model": "resnet50_robust",
|
| 47 |
+
"layer": "layer4", # last layer
|
| 48 |
+
"initial_noise": 0.1,
|
| 49 |
+
"diffusion_noise": 0.003, # Corrected parameter name
|
| 50 |
+
"step_size": 0.5, # Step size (learning rate parameter)
|
| 51 |
+
"iterations": 50, # Number of iterations
|
| 52 |
+
"epsilon": 0.5
|
| 53 |
+
}
|
| 54 |
+
},
|
| 55 |
+
{
|
| 56 |
+
"image": os.path.join("stimuli", "face_vase.png"),
|
| 57 |
+
"name": "Rubin's Face-Vase (Object Prior)",
|
| 58 |
+
"wiki": "https://en.wikipedia.org/wiki/Rubin_vase",
|
| 59 |
+
"papers": [
|
| 60 |
+
"[Figure-Ground Perception](https://en.wikipedia.org/wiki/Figure-ground_(perception))",
|
| 61 |
+
"[Bistable Perception](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 62 |
+
],
|
| 63 |
+
"method": "ReverseDiffusion",
|
| 64 |
+
"reverse_diff": {
|
| 65 |
+
"model": "resnet50_robust",
|
| 66 |
+
"layer": "layer4", # last layer
|
| 67 |
+
"initial_noise": 0.7,
|
| 68 |
+
"diffusion_noise": 0.005, # Corrected parameter name
|
| 69 |
+
"step_size": 1.0, # Step size (learning rate parameter)
|
| 70 |
+
"iterations": 50, # Number of iterations
|
| 71 |
+
"epsilon": 1.0
|
| 72 |
+
}
|
| 73 |
+
},
|
| 74 |
+
{
|
| 75 |
+
"image": os.path.join("stimuli", "figure_ground.png"),
|
| 76 |
+
"name": "Figure-Ground Illusion",
|
| 77 |
+
"wiki": "https://en.wikipedia.org/wiki/Figure-ground_(perception)",
|
| 78 |
+
"papers": [
|
| 79 |
+
"[Gestalt Principles](https://en.wikipedia.org/wiki/Gestalt_psychology)",
|
| 80 |
+
"[Perceptual Organization](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 81 |
+
],
|
| 82 |
+
"method": "ReverseDiffusion",
|
| 83 |
+
"reverse_diff": {
|
| 84 |
+
"model": "resnet50_robust",
|
| 85 |
+
"layer": "layer3",
|
| 86 |
+
"initial_noise": 0.5,
|
| 87 |
+
"diffusion_noise": 0.005, # Corrected parameter name
|
| 88 |
+
"step_size": 0.8, # Step size (learning rate parameter)
|
| 89 |
+
"iterations": 50, # Number of iterations
|
| 90 |
+
"epsilon": 0.8
|
| 91 |
+
}
|
| 92 |
+
},
|
| 93 |
+
{
|
| 94 |
+
"image": os.path.join("stimuli", "Neon_Color_Circle.jpg"),
|
| 95 |
+
"name": "Neon Color Spreading",
|
| 96 |
+
"wiki": "https://en.wikipedia.org/wiki/Neon_color_spreading",
|
| 97 |
+
"papers": [
|
| 98 |
+
"[Color Assimilation](https://doi.org/10.1016/j.visres.2000.200.1)",
|
| 99 |
+
"[Perceptual Filling-in](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 100 |
+
],
|
| 101 |
+
"method": "ReverseDiffusion",
|
| 102 |
+
"reverse_diff": {
|
| 103 |
+
"model": "resnet50_robust",
|
| 104 |
+
"layer": "layer3",
|
| 105 |
+
"initial_noise": 0.5,
|
| 106 |
+
"diffusion_noise": 0.003, # Corrected parameter name
|
| 107 |
+
"step_size": 1.0, # Step size (learning rate parameter)
|
| 108 |
+
"iterations": 50, # Number of iterations
|
| 109 |
+
"epsilon": 1.0
|
| 110 |
+
}
|
| 111 |
+
},
|
| 112 |
+
{
|
| 113 |
+
"image": os.path.join("stimuli", "EhresteinSingleColor.png"),
|
| 114 |
+
"name": "Ehrenstein Illusion",
|
| 115 |
+
"wiki": "https://en.wikipedia.org/wiki/Ehrenstein_illusion",
|
| 116 |
+
"papers": [
|
| 117 |
+
"[Subjective Contours](https://doi.org/10.1016/j.visres.2000.200.1)",
|
| 118 |
+
"[Neural Processing](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 119 |
+
],
|
| 120 |
+
"method": "ReverseDiffusion",
|
| 121 |
+
"reverse_diff": {
|
| 122 |
+
"model": "resnet50_robust",
|
| 123 |
+
"layer": "layer3",
|
| 124 |
+
"initial_noise": 0.5,
|
| 125 |
+
"diffusion_noise": 0.005, # Corrected parameter name
|
| 126 |
+
"step_size": 0.8, # Step size (learning rate parameter)
|
| 127 |
+
"iterations": 50, # Number of iterations
|
| 128 |
+
"epsilon": 0.8
|
| 129 |
+
}
|
| 130 |
+
},
|
| 131 |
+
{
|
| 132 |
+
"image": os.path.join("stimuli", "Confetti_illusion.png"),
|
| 133 |
+
"name": "Confetti Illusion",
|
| 134 |
+
"wiki": "https://en.wikipedia.org/wiki/Optical_illusion",
|
| 135 |
+
"papers": [
|
| 136 |
+
"[Color Perception](https://doi.org/10.1016/j.visres.2000.200.1)",
|
| 137 |
+
"[Context Effects](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 138 |
+
],
|
| 139 |
+
"method": "ReverseDiffusion",
|
| 140 |
+
"reverse_diff": {
|
| 141 |
+
"model": "resnet50_robust",
|
| 142 |
+
"layer": "layer3",
|
| 143 |
+
"initial_noise": 0.7,
|
| 144 |
+
"diffusion_noise": 0.01, # Corrected parameter name
|
| 145 |
+
"step_size": 1.0, # Step size (learning rate parameter)
|
| 146 |
+
"iterations": 50, # Number of iterations
|
| 147 |
+
"epsilon": 1.0
|
| 148 |
+
}
|
| 149 |
+
},
|
| 150 |
+
{
|
| 151 |
+
"image": os.path.join("stimuli", "CornsweetBlock.png"),
|
| 152 |
+
"name": "Cornsweet Illusion",
|
| 153 |
+
"wiki": "https://en.wikipedia.org/wiki/Cornsweet_illusion",
|
| 154 |
+
"papers": [
|
| 155 |
+
"[Brightness Perception](https://doi.org/10.1016/j.visres.2000.200.1)",
|
| 156 |
+
"[Edge Effects](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 157 |
+
],
|
| 158 |
+
"method": "ReverseDiffusion",
|
| 159 |
+
"reverse_diff": {
|
| 160 |
+
"model": "resnet50_robust",
|
| 161 |
+
"layer": "layer3",
|
| 162 |
+
"initial_noise": 0.5,
|
| 163 |
+
"diffusion_noise": 0.005, # Corrected parameter name
|
| 164 |
+
"step_size": 0.8, # Step size (learning rate parameter)
|
| 165 |
+
"iterations": 50, # Number of iterations
|
| 166 |
+
"epsilon": 0.8
|
| 167 |
+
}
|
| 168 |
+
},
|
| 169 |
+
{
|
| 170 |
+
"image": os.path.join("stimuli", "GroupingByContinuity.png"),
|
| 171 |
+
"name": "Grouping by Continuity",
|
| 172 |
+
"wiki": "https://en.wikipedia.org/wiki/Principles_of_grouping",
|
| 173 |
+
"papers": [
|
| 174 |
+
"[Gestalt Principles](https://en.wikipedia.org/wiki/Gestalt_psychology)",
|
| 175 |
+
"[Visual Organization](https://doi.org/10.1016/j.tics.2003.08.003)"
|
| 176 |
+
],
|
| 177 |
+
"method": "ReverseDiffusion",
|
| 178 |
+
"reverse_diff": {
|
| 179 |
+
"model": "resnet50_robust",
|
| 180 |
+
"layer": "layer3",
|
| 181 |
+
"initial_noise": 0.1,
|
| 182 |
+
"diffusion_noise": 0.005, # Corrected parameter name
|
| 183 |
+
"step_size": 0.4, # Step size (learning rate parameter)
|
| 184 |
+
"iterations": 100, # Number of iterations
|
| 185 |
+
"epsilon": 0.4
|
| 186 |
+
}
|
| 187 |
+
}
|
| 188 |
+
]
|
| 189 |
+
|
| 190 |
@GPU
|
| 191 |
def run_inference(image, model_type, inference_type, eps_value, num_iterations,
|
| 192 |
+
initial_noise=0.05, diffusion_noise=0.3, step_size=0.8, model_layer="layer3"):
|
| 193 |
# Convert eps to float
|
| 194 |
eps = float(eps_value)
|
| 195 |
|
| 196 |
# Load inference configuration based on the selected type
|
| 197 |
+
config = get_inference_configs(inference_type=inference_type, eps=eps, n_itr=int(num_iterations))
|
| 198 |
|
| 199 |
# Handle ReverseDiffusion specific parameters
|
| 200 |
if inference_type == "ReverseDiffusion":
|
| 201 |
config['initial_inference_noise_ratio'] = float(initial_noise)
|
| 202 |
+
config['diffusion_noise_ratio'] = float(diffusion_noise)
|
| 203 |
+
config['step_size'] = float(step_size) # Added step size parameter
|
| 204 |
config['top_layer'] = model_layer
|
| 205 |
|
| 206 |
# Run generative inference
|
|
|
|
| 228 |
# Return the final inferred image and the animation frames directly
|
| 229 |
return final_image, frames
|
| 230 |
|
| 231 |
+
# Helper function to apply example parameters
|
| 232 |
+
def apply_example(example):
|
| 233 |
+
return [
|
| 234 |
+
example["image"],
|
| 235 |
+
"resnet50_robust", # Model type
|
| 236 |
+
example["method"], # Inference type
|
| 237 |
+
example["reverse_diff"]["epsilon"], # Epsilon value
|
| 238 |
+
example["reverse_diff"]["iterations"], # Number of iterations
|
| 239 |
+
example["reverse_diff"]["initial_noise"], # Initial noise
|
| 240 |
+
example["reverse_diff"]["diffusion_noise"], # Diffusion noise value (corrected)
|
| 241 |
+
example["reverse_diff"]["step_size"], # Step size (added)
|
| 242 |
+
example["reverse_diff"]["layer"] # Model layer
|
| 243 |
+
]
|
| 244 |
+
|
| 245 |
# Define the interface
|
| 246 |
with gr.Blocks(title="Generative Inference Demo") as demo:
|
| 247 |
gr.Markdown("# Generative Inference Demo")
|
| 248 |
gr.Markdown("This demo showcases how neural networks can perceive visual illusions through generative inference.")
|
| 249 |
|
| 250 |
+
# Main processing interface
|
| 251 |
with gr.Row():
|
| 252 |
with gr.Column(scale=1):
|
| 253 |
# Inputs
|
| 254 |
+
image_input = gr.Image(label="Input Image", type="pil")
|
| 255 |
|
| 256 |
with gr.Row():
|
| 257 |
model_choice = gr.Dropdown(
|
| 258 |
+
choices=["resnet50_robust", "standard_resnet50"],
|
| 259 |
+
value="resnet50_robust",
|
| 260 |
label="Model"
|
| 261 |
)
|
| 262 |
|
| 263 |
inference_type = gr.Dropdown(
|
| 264 |
+
choices=["ReverseDiffusion", "IncreaseConfidence"],
|
| 265 |
+
value="ReverseDiffusion",
|
| 266 |
label="Inference Method"
|
| 267 |
)
|
| 268 |
|
| 269 |
with gr.Row():
|
| 270 |
+
eps_slider = gr.Slider(minimum=0.01, maximum=3.0, value=0.5, step=0.01, label="Epsilon (Perturbation Size)")
|
| 271 |
+
iterations_slider = gr.Slider(minimum=1, maximum=50, value=50, step=1, label="Number of Iterations") # Default 50
|
|
|
|
| 272 |
|
| 273 |
+
with gr.Row():
|
| 274 |
+
initial_noise_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.05, step=0.01,
|
|
|
|
| 275 |
label="Initial Noise Ratio")
|
| 276 |
+
diffusion_noise_slider = gr.Slider(minimum=0.0, maximum=0.05, value=0.01, step=0.001,
|
| 277 |
+
label="Diffusion Noise Ratio") # Corrected name
|
| 278 |
|
| 279 |
+
with gr.Row():
|
| 280 |
+
step_size_slider = gr.Slider(minimum=0.01, maximum=2.0, value=0.5, step=0.01,
|
| 281 |
+
label="Step Size") # Added step size slider
|
| 282 |
layer_choice = gr.Dropdown(
|
| 283 |
choices=["all", "conv1", "bn1", "relu", "maxpool", "layer1", "layer2", "layer3", "layer4", "avgpool"],
|
| 284 |
value="all",
|
| 285 |
label="Model Layer"
|
| 286 |
)
|
| 287 |
|
| 288 |
+
run_button = gr.Button("Run Inference", variant="primary")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 289 |
|
| 290 |
with gr.Column(scale=2):
|
| 291 |
# Outputs
|
| 292 |
output_image = gr.Image(label="Final Inferred Image")
|
| 293 |
+
output_frames = gr.Gallery(label="Inference Steps", columns=5, rows=2)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
+
# Examples section with integrated explanations
|
| 296 |
+
gr.Markdown("## Visual Illusion Examples")
|
| 297 |
+
gr.Markdown("Select an illusion to load its parameters and see how generative inference reveals perceptual effects")
|
|
|
|
|
|
|
| 298 |
|
| 299 |
+
# For each example, create a row with the image and explanation side by side
|
| 300 |
+
for i, ex in enumerate(examples):
|
| 301 |
+
with gr.Row():
|
| 302 |
+
# Left column for the image
|
| 303 |
+
with gr.Column(scale=1):
|
| 304 |
+
# Display the example image
|
| 305 |
+
example_img = gr.Image(value=ex["image"], type="filepath", label=f"{ex['name']}")
|
| 306 |
+
load_btn = gr.Button(f"Load Parameters", variant="primary")
|
| 307 |
+
|
| 308 |
+
# Set up the load button to apply this example's parameters
|
| 309 |
+
load_btn.click(
|
| 310 |
+
fn=lambda ex=ex: apply_example(ex),
|
| 311 |
+
outputs=[
|
| 312 |
+
image_input, model_choice, inference_type,
|
| 313 |
+
eps_slider, iterations_slider,
|
| 314 |
+
initial_noise_slider, diffusion_noise_slider,
|
| 315 |
+
step_size_slider, layer_choice
|
| 316 |
+
]
|
| 317 |
+
)
|
| 318 |
+
|
| 319 |
+
# Right column for the explanation
|
| 320 |
+
with gr.Column(scale=2):
|
| 321 |
+
gr.Markdown(f"### {ex['name']}")
|
| 322 |
+
gr.Markdown(f"[Read more on Wikipedia]({ex['wiki']})")
|
| 323 |
+
|
| 324 |
+
gr.Markdown("**Previous Explanations:**")
|
| 325 |
+
papers_list = "\n".join([f"- {paper}" for paper in ex["papers"]])
|
| 326 |
+
gr.Markdown(papers_list)
|
| 327 |
+
|
| 328 |
+
gr.Markdown("**Research Parameters:**")
|
| 329 |
+
params_md = f"""
|
| 330 |
+
- **Method**: {ex['method']}
|
| 331 |
+
- **Model Layer**: {ex['reverse_diff']['layer']}
|
| 332 |
+
- **Initial Noise**: {ex['reverse_diff']['initial_noise']}
|
| 333 |
+
- **Diffusion Noise**: {ex['reverse_diff']['diffusion_noise']}
|
| 334 |
+
- **Step Size**: {ex['reverse_diff']['step_size']}
|
| 335 |
+
- **Iterations**: {ex['reverse_diff']['iterations']}
|
| 336 |
+
- **Epsilon**: {ex['reverse_diff']['epsilon']}
|
| 337 |
+
"""
|
| 338 |
+
gr.Markdown(params_md)
|
| 339 |
+
|
| 340 |
+
if i < len(examples) - 1: # Don't add separator after the last example
|
| 341 |
+
gr.Markdown("---")
|
| 342 |
+
|
| 343 |
+
# Set up event handler for the main inference
|
| 344 |
run_button.click(
|
| 345 |
fn=run_inference,
|
| 346 |
inputs=[
|
| 347 |
image_input, model_choice, inference_type,
|
| 348 |
+
eps_slider, iterations_slider,
|
| 349 |
+
initial_noise_slider, diffusion_noise_slider,
|
| 350 |
+
step_size_slider, layer_choice
|
| 351 |
],
|
| 352 |
outputs=[output_image, output_frames]
|
| 353 |
)
|
| 354 |
|
| 355 |
+
# About section
|
| 356 |
gr.Markdown("""
|
| 357 |
## About Generative Inference
|
| 358 |
|
| 359 |
+
Generative inference is a technique that reveals how neural networks perceive visual stimuli. This demo primarily uses the ReverseDiffusion method.
|
| 360 |
|
| 361 |
+
### ReverseDiffusion
|
|
|
|
|
|
|
|
|
|
|
|
|
| 362 |
Starts with a noisy version of the image and guides the optimization to match features of the noisy image.
|
| 363 |
+
This approach reveals different aspects of visual processing and is inspired by diffusion models.
|
| 364 |
+
|
| 365 |
+
### IncreaseConfidence
|
| 366 |
+
Optimizes the network's activations to increase confidence in classification, leading to enhanced
|
| 367 |
+
features that the network associates with its preferred interpretation.
|
| 368 |
|
| 369 |
+
### Parameters:
|
| 370 |
- **Initial Noise Ratio**: Controls the amount of noise added to the image at the beginning
|
| 371 |
+
- **Diffusion Noise Ratio**: Controls the amount of noise added at each optimization step
|
| 372 |
+
- **Step Size**: Learning rate for the optimization process
|
| 373 |
+
- **Number of Iterations**: How many optimization steps to perform
|
| 374 |
+
- **Model Layer**: Select a specific layer of the ResNet50 model to extract features from
|
| 375 |
+
- **Epsilon**: Controls the size of perturbation during optimization
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
|
| 377 |
Different layers capture different levels of abstraction - earlier layers represent low-level features
|
| 378 |
like edges and textures, while later layers represent higher-level features and object parts.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
""")
|
| 380 |
|
| 381 |
+
# Launch the demo
|
| 382 |
if __name__ == "__main__":
|
| 383 |
print(f"Starting server on port {args.port}")
|
|
|
|
| 384 |
demo.launch(
|
| 385 |
+
server_name="0.0.0.0",
|
| 386 |
+
server_port=args.port,
|
| 387 |
share=False,
|
| 388 |
debug=True
|
| 389 |
+
)
|