ttoosi's picture
Added new illusions and edited the format
11a7475 verified
raw
history blame
16.3 kB
import gradio as gr
import torch
import numpy as np
from PIL import Image
try:
from spaces import GPU
except ImportError:
# Define a no-op decorator if running locally
def GPU(func):
return func
import os
import argparse
from inference import GenerativeInferenceModel, get_inference_configs
# Parse command line arguments
parser = argparse.ArgumentParser(description='Run Generative Inference Demo')
parser.add_argument('--port', type=int, default=7860, help='Port to run the server on')
args = parser.parse_args()
# Create model directories if they don't exist
os.makedirs("models", exist_ok=True)
os.makedirs("stimuli", exist_ok=True)
# Check if running on Hugging Face Spaces
if "SPACE_ID" in os.environ:
default_port = int(os.environ.get("PORT", 7860))
else:
default_port = 8861 # Local default port
# Initialize model
model = GenerativeInferenceModel()
# Define example images and their parameters with updated values from the research
examples = [
{
"image": os.path.join("stimuli", "Kanizsa_square.jpg"),
"name": "Kanizsa Square",
"wiki": "https://en.wikipedia.org/wiki/Kanizsa_triangle",
"papers": [
"[Gestalt Psychology](https://en.wikipedia.org/wiki/Gestalt_psychology)",
"[Neural Mechanisms](https://doi.org/10.1016/j.tics.2003.08.003)"
],
"method": "ReverseDiffusion",
"reverse_diff": {
"model": "resnet50_robust",
"layer": "layer4", # last layer
"initial_noise": 0.1,
"diffusion_noise": 0.003, # Corrected parameter name
"step_size": 0.5, # Step size (learning rate parameter)
"iterations": 50, # Number of iterations
"epsilon": 0.5
}
},
{
"image": os.path.join("stimuli", "face_vase.png"),
"name": "Rubin's Face-Vase (Object Prior)",
"wiki": "https://en.wikipedia.org/wiki/Rubin_vase",
"papers": [
"[Figure-Ground Perception](https://en.wikipedia.org/wiki/Figure-ground_(perception))",
"[Bistable Perception](https://doi.org/10.1016/j.tics.2003.08.003)"
],
"method": "ReverseDiffusion",
"reverse_diff": {
"model": "resnet50_robust",
"layer": "layer4", # last layer
"initial_noise": 0.7,
"diffusion_noise": 0.005, # Corrected parameter name
"step_size": 1.0, # Step size (learning rate parameter)
"iterations": 50, # Number of iterations
"epsilon": 1.0
}
},
{
"image": os.path.join("stimuli", "figure_ground.png"),
"name": "Figure-Ground Illusion",
"wiki": "https://en.wikipedia.org/wiki/Figure-ground_(perception)",
"papers": [
"[Gestalt Principles](https://en.wikipedia.org/wiki/Gestalt_psychology)",
"[Perceptual Organization](https://doi.org/10.1016/j.tics.2003.08.003)"
],
"method": "ReverseDiffusion",
"reverse_diff": {
"model": "resnet50_robust",
"layer": "layer3",
"initial_noise": 0.5,
"diffusion_noise": 0.005, # Corrected parameter name
"step_size": 0.8, # Step size (learning rate parameter)
"iterations": 50, # Number of iterations
"epsilon": 0.8
}
},
{
"image": os.path.join("stimuli", "Neon_Color_Circle.jpg"),
"name": "Neon Color Spreading",
"wiki": "https://en.wikipedia.org/wiki/Neon_color_spreading",
"papers": [
"[Color Assimilation](https://doi.org/10.1016/j.visres.2000.200.1)",
"[Perceptual Filling-in](https://doi.org/10.1016/j.tics.2003.08.003)"
],
"method": "ReverseDiffusion",
"reverse_diff": {
"model": "resnet50_robust",
"layer": "layer3",
"initial_noise": 0.5,
"diffusion_noise": 0.003, # Corrected parameter name
"step_size": 1.0, # Step size (learning rate parameter)
"iterations": 50, # Number of iterations
"epsilon": 1.0
}
},
{
"image": os.path.join("stimuli", "EhresteinSingleColor.png"),
"name": "Ehrenstein Illusion",
"wiki": "https://en.wikipedia.org/wiki/Ehrenstein_illusion",
"papers": [
"[Subjective Contours](https://doi.org/10.1016/j.visres.2000.200.1)",
"[Neural Processing](https://doi.org/10.1016/j.tics.2003.08.003)"
],
"method": "ReverseDiffusion",
"reverse_diff": {
"model": "resnet50_robust",
"layer": "layer3",
"initial_noise": 0.5,
"diffusion_noise": 0.005, # Corrected parameter name
"step_size": 0.8, # Step size (learning rate parameter)
"iterations": 50, # Number of iterations
"epsilon": 0.8
}
},
{
"image": os.path.join("stimuli", "Confetti_illusion.png"),
"name": "Confetti Illusion",
"wiki": "https://en.wikipedia.org/wiki/Optical_illusion",
"papers": [
"[Color Perception](https://doi.org/10.1016/j.visres.2000.200.1)",
"[Context Effects](https://doi.org/10.1016/j.tics.2003.08.003)"
],
"method": "ReverseDiffusion",
"reverse_diff": {
"model": "resnet50_robust",
"layer": "layer3",
"initial_noise": 0.7,
"diffusion_noise": 0.01, # Corrected parameter name
"step_size": 1.0, # Step size (learning rate parameter)
"iterations": 50, # Number of iterations
"epsilon": 1.0
}
},
{
"image": os.path.join("stimuli", "CornsweetBlock.png"),
"name": "Cornsweet Illusion",
"wiki": "https://en.wikipedia.org/wiki/Cornsweet_illusion",
"papers": [
"[Brightness Perception](https://doi.org/10.1016/j.visres.2000.200.1)",
"[Edge Effects](https://doi.org/10.1016/j.tics.2003.08.003)"
],
"method": "ReverseDiffusion",
"reverse_diff": {
"model": "resnet50_robust",
"layer": "layer3",
"initial_noise": 0.5,
"diffusion_noise": 0.005, # Corrected parameter name
"step_size": 0.8, # Step size (learning rate parameter)
"iterations": 50, # Number of iterations
"epsilon": 0.8
}
},
{
"image": os.path.join("stimuli", "GroupingByContinuity.png"),
"name": "Grouping by Continuity",
"wiki": "https://en.wikipedia.org/wiki/Principles_of_grouping",
"papers": [
"[Gestalt Principles](https://en.wikipedia.org/wiki/Gestalt_psychology)",
"[Visual Organization](https://doi.org/10.1016/j.tics.2003.08.003)"
],
"method": "ReverseDiffusion",
"reverse_diff": {
"model": "resnet50_robust",
"layer": "layer3",
"initial_noise": 0.1,
"diffusion_noise": 0.005, # Corrected parameter name
"step_size": 0.4, # Step size (learning rate parameter)
"iterations": 100, # Number of iterations
"epsilon": 0.4
}
}
]
@GPU
def run_inference(image, model_type, inference_type, eps_value, num_iterations,
initial_noise=0.05, diffusion_noise=0.3, step_size=0.8, model_layer="layer3"):
# Convert eps to float
eps = float(eps_value)
# Load inference configuration based on the selected type
config = get_inference_configs(inference_type=inference_type, eps=eps, n_itr=int(num_iterations))
# Handle ReverseDiffusion specific parameters
if inference_type == "ReverseDiffusion":
config['initial_inference_noise_ratio'] = float(initial_noise)
config['diffusion_noise_ratio'] = float(diffusion_noise)
config['step_size'] = float(step_size) # Added step size parameter
config['top_layer'] = model_layer
# Run generative inference
result = model.inference(image, model_type, config)
# Extract results based on return type
if isinstance(result, tuple):
# Old format returning (output_image, all_steps)
output_image, all_steps = result
else:
# New format returning dictionary
output_image = result['final_image']
all_steps = result['steps']
# Create animation frames
frames = []
for i, step_image in enumerate(all_steps):
# Convert tensor to PIL image
step_pil = Image.fromarray((step_image.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8))
frames.append(step_pil)
# Convert the final output image to PIL
final_image = Image.fromarray((output_image.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8))
# Return the final inferred image and the animation frames directly
return final_image, frames
# Helper function to apply example parameters
def apply_example(example):
return [
example["image"],
"resnet50_robust", # Model type
example["method"], # Inference type
example["reverse_diff"]["epsilon"], # Epsilon value
example["reverse_diff"]["iterations"], # Number of iterations
example["reverse_diff"]["initial_noise"], # Initial noise
example["reverse_diff"]["diffusion_noise"], # Diffusion noise value (corrected)
example["reverse_diff"]["step_size"], # Step size (added)
example["reverse_diff"]["layer"] # Model layer
]
# Define the interface
with gr.Blocks(title="Generative Inference Demo") as demo:
gr.Markdown("# Generative Inference Demo")
gr.Markdown("This demo showcases how neural networks can perceive visual illusions through generative inference.")
# Main processing interface
with gr.Row():
with gr.Column(scale=1):
# Inputs
image_input = gr.Image(label="Input Image", type="pil")
with gr.Row():
model_choice = gr.Dropdown(
choices=["resnet50_robust", "standard_resnet50"],
value="resnet50_robust",
label="Model"
)
inference_type = gr.Dropdown(
choices=["ReverseDiffusion", "IncreaseConfidence"],
value="ReverseDiffusion",
label="Inference Method"
)
with gr.Row():
eps_slider = gr.Slider(minimum=0.01, maximum=3.0, value=0.5, step=0.01, label="Epsilon (Perturbation Size)")
iterations_slider = gr.Slider(minimum=1, maximum=50, value=50, step=1, label="Number of Iterations") # Default 50
with gr.Row():
initial_noise_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.05, step=0.01,
label="Initial Noise Ratio")
diffusion_noise_slider = gr.Slider(minimum=0.0, maximum=0.05, value=0.01, step=0.001,
label="Diffusion Noise Ratio") # Corrected name
with gr.Row():
step_size_slider = gr.Slider(minimum=0.01, maximum=2.0, value=0.5, step=0.01,
label="Step Size") # Added step size slider
layer_choice = gr.Dropdown(
choices=["all", "conv1", "bn1", "relu", "maxpool", "layer1", "layer2", "layer3", "layer4", "avgpool"],
value="all",
label="Model Layer"
)
run_button = gr.Button("Run Inference", variant="primary")
with gr.Column(scale=2):
# Outputs
output_image = gr.Image(label="Final Inferred Image")
output_frames = gr.Gallery(label="Inference Steps", columns=5, rows=2)
# Examples section with integrated explanations
gr.Markdown("## Visual Illusion Examples")
gr.Markdown("Select an illusion to load its parameters and see how generative inference reveals perceptual effects")
# For each example, create a row with the image and explanation side by side
for i, ex in enumerate(examples):
with gr.Row():
# Left column for the image
with gr.Column(scale=1):
# Display the example image
example_img = gr.Image(value=ex["image"], type="filepath", label=f"{ex['name']}")
load_btn = gr.Button(f"Load Parameters", variant="primary")
# Set up the load button to apply this example's parameters
load_btn.click(
fn=lambda ex=ex: apply_example(ex),
outputs=[
image_input, model_choice, inference_type,
eps_slider, iterations_slider,
initial_noise_slider, diffusion_noise_slider,
step_size_slider, layer_choice
]
)
# Right column for the explanation
with gr.Column(scale=2):
gr.Markdown(f"### {ex['name']}")
gr.Markdown(f"[Read more on Wikipedia]({ex['wiki']})")
gr.Markdown("**Previous Explanations:**")
papers_list = "\n".join([f"- {paper}" for paper in ex["papers"]])
gr.Markdown(papers_list)
gr.Markdown("**Research Parameters:**")
params_md = f"""
- **Method**: {ex['method']}
- **Model Layer**: {ex['reverse_diff']['layer']}
- **Initial Noise**: {ex['reverse_diff']['initial_noise']}
- **Diffusion Noise**: {ex['reverse_diff']['diffusion_noise']}
- **Step Size**: {ex['reverse_diff']['step_size']}
- **Iterations**: {ex['reverse_diff']['iterations']}
- **Epsilon**: {ex['reverse_diff']['epsilon']}
"""
gr.Markdown(params_md)
if i < len(examples) - 1: # Don't add separator after the last example
gr.Markdown("---")
# Set up event handler for the main inference
run_button.click(
fn=run_inference,
inputs=[
image_input, model_choice, inference_type,
eps_slider, iterations_slider,
initial_noise_slider, diffusion_noise_slider,
step_size_slider, layer_choice
],
outputs=[output_image, output_frames]
)
# About section
gr.Markdown("""
## About Generative Inference
Generative inference is a technique that reveals how neural networks perceive visual stimuli. This demo primarily uses the ReverseDiffusion method.
### ReverseDiffusion
Starts with a noisy version of the image and guides the optimization to match features of the noisy image.
This approach reveals different aspects of visual processing and is inspired by diffusion models.
### IncreaseConfidence
Optimizes the network's activations to increase confidence in classification, leading to enhanced
features that the network associates with its preferred interpretation.
### Parameters:
- **Initial Noise Ratio**: Controls the amount of noise added to the image at the beginning
- **Diffusion Noise Ratio**: Controls the amount of noise added at each optimization step
- **Step Size**: Learning rate for the optimization process
- **Number of Iterations**: How many optimization steps to perform
- **Model Layer**: Select a specific layer of the ResNet50 model to extract features from
- **Epsilon**: Controls the size of perturbation during optimization
Different layers capture different levels of abstraction - earlier layers represent low-level features
like edges and textures, while later layers represent higher-level features and object parts.
""")
# Launch the demo
if __name__ == "__main__":
print(f"Starting server on port {args.port}")
demo.launch(
server_name="0.0.0.0",
server_port=args.port,
share=False,
debug=True
)