""" Example script showing how to use the SAM2 Video Background Remover API. This script demonstrates various use cases: 1. Simple single object tracking 2. Multiple object tracking 3. Refined segmentation with background points 4. Batch processing multiple videos """ from gradio_client import Client import json from pathlib import Path def example_1_simple_tracking(): """ Example 1: Track a single object (e.g., person, ball, car) """ print("=" * 60) print("Example 1: Simple Single Object Tracking") print("=" * 60) # Connect to your Space client = Client("furbola/chaskick") # Simple annotation: click on the center of your object in the first frame annotations = [ { "frame_idx": 0, # First frame "object_id": 1, # First object "points": [[320, 240]], # x, y coordinates of the object center "labels": [1] # 1 = this is a foreground point } ] # Process the video result = client.predict( video_file="./input_video.mp4", annotations_json=json.dumps(annotations), remove_background=True, max_frames=None, # Process all frames api_name="/segment_video_api" ) print(f"✅ Output saved to: {result}") def example_2_multi_object_tracking(): """ Example 2: Track multiple objects simultaneously Useful for: tracking player + ball, multiple people, etc. """ print("\n" + "=" * 60) print("Example 2: Multi-Object Tracking") print("=" * 60) client = Client("furbola/chaskick") annotations = [ # Object 1: Player { "frame_idx": 0, "object_id": 1, "points": [[320, 240]], "labels": [1] }, # Object 2: Ball { "frame_idx": 0, "object_id": 2, "points": [[500, 300]], "labels": [1] }, # Object 3: Another player { "frame_idx": 0, "object_id": 3, "points": [[150, 200]], "labels": [1] } ] result = client.predict( video_file="./soccer_match.mp4", annotations_json=json.dumps(annotations), remove_background=True, max_frames=300, # Limit to 300 frames for speed api_name="/segment_video_api" ) print(f"✅ Tracked 3 objects! Output: {result}") def example_3_refined_segmentation(): """ Example 3: Use both foreground AND background points for better accuracy Useful when: object is complex, background is similar color, etc. """ print("\n" + "=" * 60) print("Example 3: Refined Segmentation with Negative Points") print("=" * 60) client = Client("furbola/chaskick") annotations = [ { "frame_idx": 0, "object_id": 1, "points": [ [320, 240], # ✅ Point ON the person's body [350, 250], # ✅ Another point on the person [280, 220], # ✅ Third point for better coverage [100, 100], # ❌ Point on the BACKGROUND to exclude [600, 400] # ❌ Another background point ], "labels": [ 1, # foreground 1, # foreground 1, # foreground 0, # background (exclude this area) 0 # background (exclude this area) ] } ] result = client.predict( video_file="./person_video.mp4", annotations_json=json.dumps(annotations), remove_background=True, max_frames=None, api_name="/segment_video_api" ) print(f"✅ Refined segmentation complete: {result}") def example_4_temporal_annotations(): """ Example 4: Add annotations on multiple frames Useful when: object changes appearance, camera cuts, occlusions """ print("\n" + "=" * 60) print("Example 4: Multi-Frame Annotations") print("=" * 60) client = Client("furbola/chaskick") annotations = [ # Annotate frame 0 { "frame_idx": 0, "object_id": 1, "points": [[320, 240]], "labels": [1] }, # Annotate frame 50 (object might have moved or changed) { "frame_idx": 50, "object_id": 1, "points": [[450, 300]], "labels": [1] }, # Annotate frame 100 (after a camera cut or scene change) { "frame_idx": 100, "object_id": 1, "points": [[200, 180]], "labels": [1] } ] result = client.predict( video_file="./long_video.mp4", annotations_json=json.dumps(annotations), remove_background=True, max_frames=None, api_name="/segment_video_api" ) print(f"✅ Multi-frame tracking complete: {result}") def example_5_batch_processing(): """ Example 5: Process multiple videos in batch """ print("\n" + "=" * 60) print("Example 5: Batch Processing Multiple Videos") print("=" * 60) client = Client("furbola/chaskick") # List of videos to process videos = [ {"path": "./video1.mp4", "point": [320, 240]}, {"path": "./video2.mp4", "point": [400, 300]}, {"path": "./video3.mp4", "point": [250, 200]}, ] results = [] for i, video in enumerate(videos, 1): print(f"\nProcessing video {i}/{len(videos)}: {video['path']}") annotations = [{ "frame_idx": 0, "object_id": 1, "points": [video['point']], "labels": [1] }] try: result = client.predict( video_file=video['path'], annotations_json=json.dumps(annotations), remove_background=True, max_frames=200, # Limit frames for faster batch processing api_name="/segment_video_api" ) results.append({"input": video['path'], "output": result, "status": "✅"}) print(f" ✅ Success: {result}") except Exception as e: results.append({"input": video['path'], "output": None, "status": f"❌ {str(e)}"}) print(f" ❌ Failed: {e}") print("\n" + "=" * 60) print("Batch Processing Summary:") print("=" * 60) for r in results: print(f"{r['status']} {r['input']} -> {r['output']}") def example_6_highlight_mode(): """ Example 6: Highlight objects instead of removing background Useful for: visualization, debugging, object detection demos """ print("\n" + "=" * 60) print("Example 6: Highlight Mode (Keep Background)") print("=" * 60) client = Client("furbola/chaskick") annotations = [{ "frame_idx": 0, "object_id": 1, "points": [[320, 240]], "labels": [1] }] result = client.predict( video_file="./input_video.mp4", annotations_json=json.dumps(annotations), remove_background=False, # Keep background, just highlight the object max_frames=None, api_name="/segment_video_api" ) print(f"✅ Object highlighted: {result}") def example_7_find_coordinates(): """ Example 7: Helper to find coordinates in a video Opens the first frame so you can identify x,y coordinates """ print("\n" + "=" * 60) print("Example 7: Find Coordinates Helper") print("=" * 60) import cv2 video_path = "./input_video.mp4" # Read first frame cap = cv2.VideoCapture(video_path) ret, frame = cap.read() cap.release() if ret: # Save first frame cv2.imwrite("first_frame.jpg", frame) print(f"✅ Saved first frame to: first_frame.jpg") print(f" Video size: {frame.shape[1]}x{frame.shape[0]} (width x height)") print(f" Open this image and note the x,y coordinates of your object") print(f" Then use those coordinates in your annotation!") else: print("❌ Could not read video") # ============================================================================ # UTILITY FUNCTIONS # ============================================================================ def create_annotation(frame_idx, object_id, points, labels=None): """ Helper function to create annotation objects. Args: frame_idx: Frame number (0 = first frame) object_id: Unique object ID (1, 2, 3, ...) points: List of [x, y] coordinates, e.g., [[320, 240]] labels: List of labels (1=foreground, 0=background). Defaults to all 1s. Returns: Dictionary with annotation """ if labels is None: labels = [1] * len(points) return { "frame_idx": frame_idx, "object_id": object_id, "points": points, "labels": labels } def load_annotations_from_file(json_file): """Load annotations from a JSON file.""" with open(json_file, 'r') as f: return json.load(f) def save_annotations_to_file(annotations, json_file): """Save annotations to a JSON file.""" with open(json_file, 'w') as f: json.dump(annotations, f, indent=2) # ============================================================================ # MAIN # ============================================================================ if __name__ == "__main__": print(""" ╔════════════════════════════════════════════════════════════╗ ║ SAM2 Video Background Remover - API Examples ║ ║ Choose an example to run or uncomment in the code ║ ╚════════════════════════════════════════════════════════════╝ """) # Uncomment the examples you want to run: # example_1_simple_tracking() # example_2_multi_object_tracking() # example_3_refined_segmentation() # example_4_temporal_annotations() # example_5_batch_processing() # example_6_highlight_mode() # example_7_find_coordinates() print("\n✅ Done! Check the output files.") print("\n🎉 Your Space: https://huggingface.co/spaces/furbola/chaskick")