import gradio as gr import torch import numpy as np from PIL import Image try: from spaces import GPU except ImportError: # Define a no-op decorator if running locally def GPU(func): return func import os import argparse from inference import GenerativeInferenceModel, get_inference_configs # Parse command line arguments parser = argparse.ArgumentParser(description='Run Generative Inference Demo') parser.add_argument('--port', type=int, default=7860, help='Port to run the server on') args = parser.parse_args() # Create model directories if they don't exist os.makedirs("models", exist_ok=True) os.makedirs("stimuli", exist_ok=True) # Check if running on Hugging Face Spaces if "SPACE_ID" in os.environ: default_port = int(os.environ.get("PORT", 7860)) else: default_port = 8861 # Local default port # Initialize model model = GenerativeInferenceModel() # Define example images and their parameters with updated values from the research examples = [ { "image": os.path.join("stimuli", "Kanizsa_square.jpg"), "name": "Kanizsa Square", "wiki": "https://en.wikipedia.org/wiki/Kanizsa_triangle", "papers": [ "[Gestalt Psychology](https://en.wikipedia.org/wiki/Gestalt_psychology)", "[Neural Mechanisms](https://doi.org/10.1016/j.tics.2003.08.003)" ], "method": "ReverseDiffusion", "reverse_diff": { "model": "resnet50_robust", "layer": "layer4", # last layer "initial_noise": 0.1, "diffusion_noise": 0.003, # Corrected parameter name "step_size": 0.5, # Step size (learning rate parameter) "iterations": 50, # Number of iterations "epsilon": 0.5 } }, { "image": os.path.join("stimuli", "face_vase.png"), "name": "Rubin's Face-Vase (Object Prior)", "wiki": "https://en.wikipedia.org/wiki/Rubin_vase", "papers": [ "[Figure-Ground Perception](https://en.wikipedia.org/wiki/Figure-ground_(perception))", "[Bistable Perception](https://doi.org/10.1016/j.tics.2003.08.003)" ], "method": "ReverseDiffusion", "reverse_diff": { "model": "resnet50_robust", "layer": "layer4", # last layer "initial_noise": 0.7, "diffusion_noise": 0.005, # Corrected parameter name "step_size": 1.0, # Step size (learning rate parameter) "iterations": 50, # Number of iterations "epsilon": 1.0 } }, { "image": os.path.join("stimuli", "figure_ground.png"), "name": "Figure-Ground Illusion", "wiki": "https://en.wikipedia.org/wiki/Figure-ground_(perception)", "papers": [ "[Gestalt Principles](https://en.wikipedia.org/wiki/Gestalt_psychology)", "[Perceptual Organization](https://doi.org/10.1016/j.tics.2003.08.003)" ], "method": "ReverseDiffusion", "reverse_diff": { "model": "resnet50_robust", "layer": "layer3", "initial_noise": 0.5, "diffusion_noise": 0.005, # Corrected parameter name "step_size": 0.8, # Step size (learning rate parameter) "iterations": 50, # Number of iterations "epsilon": 0.8 } }, { "image": os.path.join("stimuli", "Neon_Color_Circle.jpg"), "name": "Neon Color Spreading", "wiki": "https://en.wikipedia.org/wiki/Neon_color_spreading", "papers": [ "[Color Assimilation](https://doi.org/10.1016/j.visres.2000.200.1)", "[Perceptual Filling-in](https://doi.org/10.1016/j.tics.2003.08.003)" ], "method": "ReverseDiffusion", "reverse_diff": { "model": "resnet50_robust", "layer": "layer3", "initial_noise": 0.5, "diffusion_noise": 0.003, # Corrected parameter name "step_size": 1.0, # Step size (learning rate parameter) "iterations": 50, # Number of iterations "epsilon": 1.0 } }, { "image": os.path.join("stimuli", "EhresteinSingleColor.png"), "name": "Ehrenstein Illusion", "wiki": "https://en.wikipedia.org/wiki/Ehrenstein_illusion", "papers": [ "[Subjective Contours](https://doi.org/10.1016/j.visres.2000.200.1)", "[Neural Processing](https://doi.org/10.1016/j.tics.2003.08.003)" ], "method": "ReverseDiffusion", "reverse_diff": { "model": "resnet50_robust", "layer": "layer3", "initial_noise": 0.5, "diffusion_noise": 0.005, # Corrected parameter name "step_size": 0.8, # Step size (learning rate parameter) "iterations": 50, # Number of iterations "epsilon": 0.8 } }, { "image": os.path.join("stimuli", "Confetti_illusion.png"), "name": "Confetti Illusion", "wiki": "https://en.wikipedia.org/wiki/Optical_illusion", "papers": [ "[Color Perception](https://doi.org/10.1016/j.visres.2000.200.1)", "[Context Effects](https://doi.org/10.1016/j.tics.2003.08.003)" ], "method": "ReverseDiffusion", "reverse_diff": { "model": "resnet50_robust", "layer": "layer3", "initial_noise": 0.7, "diffusion_noise": 0.01, # Corrected parameter name "step_size": 1.0, # Step size (learning rate parameter) "iterations": 50, # Number of iterations "epsilon": 1.0 } }, { "image": os.path.join("stimuli", "CornsweetBlock.png"), "name": "Cornsweet Illusion", "wiki": "https://en.wikipedia.org/wiki/Cornsweet_illusion", "papers": [ "[Brightness Perception](https://doi.org/10.1016/j.visres.2000.200.1)", "[Edge Effects](https://doi.org/10.1016/j.tics.2003.08.003)" ], "method": "ReverseDiffusion", "reverse_diff": { "model": "resnet50_robust", "layer": "layer3", "initial_noise": 0.5, "diffusion_noise": 0.005, # Corrected parameter name "step_size": 0.8, # Step size (learning rate parameter) "iterations": 50, # Number of iterations "epsilon": 0.8 } }, { "image": os.path.join("stimuli", "GroupingByContinuity.png"), "name": "Grouping by Continuity", "wiki": "https://en.wikipedia.org/wiki/Principles_of_grouping", "papers": [ "[Gestalt Principles](https://en.wikipedia.org/wiki/Gestalt_psychology)", "[Visual Organization](https://doi.org/10.1016/j.tics.2003.08.003)" ], "method": "ReverseDiffusion", "reverse_diff": { "model": "resnet50_robust", "layer": "layer3", "initial_noise": 0.1, "diffusion_noise": 0.005, # Corrected parameter name "step_size": 0.4, # Step size (learning rate parameter) "iterations": 100, # Number of iterations "epsilon": 0.4 } } ] @GPU def run_inference(image, model_type, inference_type, eps_value, num_iterations, initial_noise=0.05, diffusion_noise=0.3, step_size=0.8, model_layer="layer3"): # Convert eps to float eps = float(eps_value) # Load inference configuration based on the selected type config = get_inference_configs(inference_type=inference_type, eps=eps, n_itr=int(num_iterations)) # Handle ReverseDiffusion specific parameters if inference_type == "ReverseDiffusion": config['initial_inference_noise_ratio'] = float(initial_noise) config['diffusion_noise_ratio'] = float(diffusion_noise) config['step_size'] = float(step_size) # Added step size parameter config['top_layer'] = model_layer # Run generative inference result = model.inference(image, model_type, config) # Extract results based on return type if isinstance(result, tuple): # Old format returning (output_image, all_steps) output_image, all_steps = result else: # New format returning dictionary output_image = result['final_image'] all_steps = result['steps'] # Create animation frames frames = [] for i, step_image in enumerate(all_steps): # Convert tensor to PIL image step_pil = Image.fromarray((step_image.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)) frames.append(step_pil) # Convert the final output image to PIL final_image = Image.fromarray((output_image.permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8)) # Return the final inferred image and the animation frames directly return final_image, frames # Helper function to apply example parameters def apply_example(example): return [ example["image"], "resnet50_robust", # Model type example["method"], # Inference type example["reverse_diff"]["epsilon"], # Epsilon value example["reverse_diff"]["iterations"], # Number of iterations example["reverse_diff"]["initial_noise"], # Initial noise example["reverse_diff"]["diffusion_noise"], # Diffusion noise value (corrected) example["reverse_diff"]["step_size"], # Step size (added) example["reverse_diff"]["layer"] # Model layer ] # Define the interface with gr.Blocks(title="Generative Inference Demo") as demo: gr.Markdown("# Generative Inference Demo") gr.Markdown("This demo showcases how neural networks can perceive visual illusions through generative inference.") # Main processing interface with gr.Row(): with gr.Column(scale=1): # Inputs image_input = gr.Image(label="Input Image", type="pil") with gr.Row(): model_choice = gr.Dropdown( choices=["resnet50_robust", "standard_resnet50"], value="resnet50_robust", label="Model" ) inference_type = gr.Dropdown( choices=["ReverseDiffusion", "IncreaseConfidence"], value="ReverseDiffusion", label="Inference Method" ) with gr.Row(): eps_slider = gr.Slider(minimum=0.01, maximum=3.0, value=0.5, step=0.01, label="Epsilon (Perturbation Size)") iterations_slider = gr.Slider(minimum=1, maximum=50, value=50, step=1, label="Number of Iterations") # Default 50 with gr.Row(): initial_noise_slider = gr.Slider(minimum=0.0, maximum=1.0, value=0.05, step=0.01, label="Initial Noise Ratio") diffusion_noise_slider = gr.Slider(minimum=0.0, maximum=0.05, value=0.01, step=0.001, label="Diffusion Noise Ratio") # Corrected name with gr.Row(): step_size_slider = gr.Slider(minimum=0.01, maximum=2.0, value=0.5, step=0.01, label="Step Size") # Added step size slider layer_choice = gr.Dropdown( choices=["all", "conv1", "bn1", "relu", "maxpool", "layer1", "layer2", "layer3", "layer4", "avgpool"], value="all", label="Model Layer" ) run_button = gr.Button("Run Inference", variant="primary") with gr.Column(scale=2): # Outputs output_image = gr.Image(label="Final Inferred Image") output_frames = gr.Gallery(label="Inference Steps", columns=5, rows=2) # Examples section with integrated explanations gr.Markdown("## Visual Illusion Examples") gr.Markdown("Select an illusion to load its parameters and see how generative inference reveals perceptual effects") # For each example, create a row with the image and explanation side by side for i, ex in enumerate(examples): with gr.Row(): # Left column for the image with gr.Column(scale=1): # Display the example image example_img = gr.Image(value=ex["image"], type="filepath", label=f"{ex['name']}") load_btn = gr.Button(f"Load Parameters", variant="primary") # Set up the load button to apply this example's parameters load_btn.click( fn=lambda ex=ex: apply_example(ex), outputs=[ image_input, model_choice, inference_type, eps_slider, iterations_slider, initial_noise_slider, diffusion_noise_slider, step_size_slider, layer_choice ] ) # Right column for the explanation with gr.Column(scale=2): gr.Markdown(f"### {ex['name']}") gr.Markdown(f"[Read more on Wikipedia]({ex['wiki']})") gr.Markdown("**Previous Explanations:**") papers_list = "\n".join([f"- {paper}" for paper in ex["papers"]]) gr.Markdown(papers_list) gr.Markdown("**Research Parameters:**") params_md = f""" - **Method**: {ex['method']} - **Model Layer**: {ex['reverse_diff']['layer']} - **Initial Noise**: {ex['reverse_diff']['initial_noise']} - **Diffusion Noise**: {ex['reverse_diff']['diffusion_noise']} - **Step Size**: {ex['reverse_diff']['step_size']} - **Iterations**: {ex['reverse_diff']['iterations']} - **Epsilon**: {ex['reverse_diff']['epsilon']} """ gr.Markdown(params_md) if i < len(examples) - 1: # Don't add separator after the last example gr.Markdown("---") # Set up event handler for the main inference run_button.click( fn=run_inference, inputs=[ image_input, model_choice, inference_type, eps_slider, iterations_slider, initial_noise_slider, diffusion_noise_slider, step_size_slider, layer_choice ], outputs=[output_image, output_frames] ) # About section gr.Markdown(""" ## About Generative Inference Generative inference is a technique that reveals how neural networks perceive visual stimuli. This demo primarily uses the ReverseDiffusion method. ### ReverseDiffusion Starts with a noisy version of the image and guides the optimization to match features of the noisy image. This approach reveals different aspects of visual processing and is inspired by diffusion models. ### IncreaseConfidence Optimizes the network's activations to increase confidence in classification, leading to enhanced features that the network associates with its preferred interpretation. ### Parameters: - **Initial Noise Ratio**: Controls the amount of noise added to the image at the beginning - **Diffusion Noise Ratio**: Controls the amount of noise added at each optimization step - **Step Size**: Learning rate for the optimization process - **Number of Iterations**: How many optimization steps to perform - **Model Layer**: Select a specific layer of the ResNet50 model to extract features from - **Epsilon**: Controls the size of perturbation during optimization Different layers capture different levels of abstraction - earlier layers represent low-level features like edges and textures, while later layers represent higher-level features and object parts. """) # Launch the demo if __name__ == "__main__": print(f"Starting server on port {args.port}") demo.launch( server_name="0.0.0.0", server_port=args.port, share=False, debug=True )