Spaces:

vllab
/

controlnet-hands

Runtime error

App Files Files Community

Vincent-luo commited on Apr 29, 2023

Commit

eb4334e

1 Parent(s): 8527cc7

Update app.py

Browse files

Files changed (1) hide show

app.py +64 -3

app.py CHANGED Viewed

@@ -6,12 +6,71 @@ from PIL import Image
 from argparse import Namespace
 import gradio as gr
 from diffusers import (
     FlaxControlNetModel,
     FlaxStableDiffusionControlNetPipeline,
 )
 args = Namespace(
     pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5",
     revision="non-ema",
@@ -53,7 +112,8 @@ def infer(prompt, negative_prompt, image):
     prompt_ids = pipeline.prepare_text_inputs(prompts)
     prompt_ids = shard(prompt_ids)
-    validation_image = Image.fromarray(image).convert("RGB")
     processed_image = pipeline.prepare_image_inputs(num_samples * [validation_image])
     processed_image = shard(processed_image)
@@ -73,7 +133,8 @@ def infer(prompt, negative_prompt, image):
     images = images.reshape((images.shape[0] * images.shape[1],) + images.shape[-3:])
-    return images[0]
 with gr.Blocks(theme='gradio/soft') as demo:
@@ -84,7 +145,7 @@ with gr.Blocks(theme='gradio/soft') as demo:
         prompt_input = gr.Textbox(label="Prompt")
         negative_prompt = gr.Textbox(label="Negative Prompt")
         input_image = gr.Image(label="Input Image")
-        output_image = gr.Image(label="Output Image")
         submit_btn = gr.Button(value = "Submit")
         inputs = [prompt_input, negative_prompt, input_image]
         submit_btn.click(fn=infer, inputs=inputs, outputs=[output_image])

 from argparse import Namespace
 import gradio as gr
+import numpy as np
+import mediapipe as mp
+from mediapipe import solutions
+from mediapipe.framework.formats import landmark_pb2
+from mediapipe.tasks import python
+from mediapipe.tasks.python import vision
+import cv2
 from diffusers import (
     FlaxControlNetModel,
     FlaxStableDiffusionControlNetPipeline,
 )
+# mediapipe annotation
+MARGIN = 10  # pixels
+FONT_SIZE = 1
+FONT_THICKNESS = 1
+HANDEDNESS_TEXT_COLOR = (88, 205, 54) # vibrant green
+def draw_landmarks_on_image(rgb_image, detection_result):
+  hand_landmarks_list = detection_result.hand_landmarks
+  handedness_list = detection_result.handedness
+  annotated_image = np.zeros_like(rgb_image)
+  # Loop through the detected hands to visualize.
+  for idx in range(len(hand_landmarks_list)):
+    hand_landmarks = hand_landmarks_list[idx]
+    handedness = handedness_list[idx]
+    # Draw the hand landmarks.
+    hand_landmarks_proto = landmark_pb2.NormalizedLandmarkList()
+    hand_landmarks_proto.landmark.extend([
+      landmark_pb2.NormalizedLandmark(x=landmark.x, y=landmark.y, z=landmark.z) for landmark in hand_landmarks
+    ])
+    solutions.drawing_utils.draw_landmarks(
+      annotated_image,
+      hand_landmarks_proto,
+      solutions.hands.HAND_CONNECTIONS,
+      solutions.drawing_styles.get_default_hand_landmarks_style(),
+      solutions.drawing_styles.get_default_hand_connections_style())
+  return annotated_image
+def generate_annotation(img):
+    """img(input): numpy array
+       annotated_image(output): numpy array
+    """
+    # STEP 2: Create an HandLandmarker object.
+    base_options = python.BaseOptions(model_asset_path='hand_landmarker.task')
+    options = vision.HandLandmarkerOptions(base_options=base_options,
+                                        num_hands=2)
+    detector = vision.HandLandmarker.create_from_options(options)
+    # STEP 3: Load the input image.
+    image = mp.Image(
+        image_format=mp.ImageFormat.SRGB, data=img)
+    # STEP 4: Detect hand landmarks from the input image.
+    detection_result = detector.detect(image)
+    # STEP 5: Process the classification result. In this case, visualize it.
+    annotated_image = draw_landmarks_on_image(image.numpy_view(), detection_result)
+    return annotated_image
 args = Namespace(
     pretrained_model_name_or_path="runwayml/stable-diffusion-v1-5",
     revision="non-ema",
     prompt_ids = pipeline.prepare_text_inputs(prompts)
     prompt_ids = shard(prompt_ids)
+    annotated_image = generate_annotation(image)
+    validation_image = Image.fromarray(annotated_image).convert("RGB")
     processed_image = pipeline.prepare_image_inputs(num_samples * [validation_image])
     processed_image = shard(processed_image)
     images = images.reshape((images.shape[0] * images.shape[1],) + images.shape[-3:])
+    results = [i for i in images]
+    return [annotated_image] + results
 with gr.Blocks(theme='gradio/soft') as demo:
         prompt_input = gr.Textbox(label="Prompt")
         negative_prompt = gr.Textbox(label="Negative Prompt")
         input_image = gr.Image(label="Input Image")
+        output_image = gr.Gallery(label='Output Image', show_label=False, elem_id="gallery").style(grid=3, height='auto')
         submit_btn = gr.Button(value = "Submit")
         inputs = [prompt_input, negative_prompt, input_image]
         submit_btn.click(fn=infer, inputs=inputs, outputs=[output_image])