Spaces:

JackAILab
/

ConsistentID

Running on Zero

App Files Files Community

JackAILab commited on 16 days ago

Commit

4979db1

verified ·

1 Parent(s): 0d19657

Update app.py

Browse files

Files changed (1) hide show

app.py +92 -75

app.py CHANGED Viewed

@@ -19,7 +19,7 @@ from models.BiSeNet.model import BiSeNet
 # zero = torch.Tensor([0]).cuda()
 # print(zero.device) # <-- 'cpu' 🤔
 # device = zero.device # "cuda"
-device = "cuda"
 # Gets the absolute path of the current script
 script_directory = os.path.dirname(os.path.realpath(__file__))
@@ -34,14 +34,14 @@ pipe = ConsistentIDStableDiffusionPipeline.from_pretrained(
     torch_dtype=torch.float16,
     safety_checker=None, # use_safetensors=True,
     # variant="fp16"
-).to(device)
 ### Load other pretrained models
 ## BiSenet
 bise_net_cp_path = hf_hub_download(repo_id="JackAILab/ConsistentID", filename="face_parsing.pth", local_dir="./checkpoints")
 bise_net = BiSeNet(n_classes = 19)
 bise_net.load_state_dict(torch.load(bise_net_cp_path, map_location="cpu")) # device fail
-bise_net.cuda()
 import sys
 sys.path.append("./models/LLaVA")
@@ -77,85 +77,102 @@ pipe.FacialEncoder.to(device)
-@spaces.GPU
 def process(selected_template_images,costum_image,prompt
         ,negative_prompt,prompt_selected,retouching,model_selected_tab,prompt_selected_tab,width,height,merge_steps,seed_set):
-    if model_selected_tab==0:
-        select_images = load_image(Image.open(selected_template_images))
-    else:
-        select_images = load_image(Image.fromarray(costum_image))
-    if prompt_selected_tab==0:
-        prompt = prompt_selected
-        negative_prompt = ""
-        need_safetycheck = False
-    else:
-        need_safetycheck = True
-    # hyper-parameter
-    num_steps = 50
-    seed_set = torch.randint(0, 1000, (1,)).item()
-    # merge_steps = 30
-    @torch.inference_mode()
-    def Enhance_prompt(prompt,select_images):
-        llva_prompt = f'Please ignore the image. Enhance the following text prompt for me. You can associate more details with the character\'s gesture, environment, and decent clothing:"{prompt}".'
-        # args = type('Args', (), {
-        #     "model_path": llva_model_path,
-        #     "model_base": None,
-        #     "model_name": get_model_name_from_path(llva_model_path),
-        #     "query": llva_prompt,
-        #     "conv_mode": None,
-        #     "image_file": select_images,
-        #     "sep": ",",
-        #     "temperature": 0,
-        #     "top_p": None,
-        #     "num_beams": 1,
-        #     "max_new_tokens": 512
-        # })()
-        # Enhanced_prompt = eval_model(args, llva_tokenizer, llva_model, llva_image_processor)
-        return Enhanced_prompt
-    if prompt == "":
-        prompt = "A man, in a forest"
-        prompt = "A man, with backpack, in a raining tropical forest, adventuring, holding a flashlight, in mist, seeking animals"
-        prompt = "A person, in a sowm, wearing santa hat and a scarf, with a cottage behind"
-    else:
-        # prompt=Enhance_prompt(prompt,Image.new('RGB', (200, 200), color = 'white'))
-        print(prompt)
-        pass
-    if negative_prompt == "":
-        negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
-    #Extend Prompt
-    prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
-    negtive_prompt_group="((cross-eye)),((cross-eyed)),(((NFSW))),(nipple),((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)), [out of frame], extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))). out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck)))"
-    negative_prompt = negative_prompt + negtive_prompt_group
-    # seed = torch.randint(0, 1000, (1,)).item()
-    generator = torch.Generator(device=device).manual_seed(seed_set)
-    images = pipe(
-        prompt=prompt,
-        width=width,
-        height=height,
-        input_id_images=select_images,
-        negative_prompt=negative_prompt,
-        num_images_per_prompt=1,
-        num_inference_steps=num_steps,
-        start_merge_step=merge_steps,
-        generator=generator,
-        retouching=retouching,
-        need_safetycheck=need_safetycheck,
-    ).images[0]
-    current_date = datetime.today()
-    return np.array(images)
 # Gets the templates
 script_directory = os.path.dirname(os.path.realpath(__file__))

 # zero = torch.Tensor([0]).cuda()
 # print(zero.device) # <-- 'cpu' 🤔
 # device = zero.device # "cuda"
+device = "cpu"
 # Gets the absolute path of the current script
 script_directory = os.path.dirname(os.path.realpath(__file__))
     torch_dtype=torch.float16,
     safety_checker=None, # use_safetensors=True,
     # variant="fp16"
+)
 ### Load other pretrained models
 ## BiSenet
 bise_net_cp_path = hf_hub_download(repo_id="JackAILab/ConsistentID", filename="face_parsing.pth", local_dir="./checkpoints")
 bise_net = BiSeNet(n_classes = 19)
 bise_net.load_state_dict(torch.load(bise_net_cp_path, map_location="cpu")) # device fail
+# bise_net.cuda()
 import sys
 sys.path.append("./models/LLaVA")
+@spaces.GPU(duration=120)
 def process(selected_template_images,costum_image,prompt
         ,negative_prompt,prompt_selected,retouching,model_selected_tab,prompt_selected_tab,width,height,merge_steps,seed_set):
+    inference_device = "cuda"
+    pipe.to(inference_device)
+    pipe.image_encoder.to(inference_device)
+    pipe.image_proj_model.to(inference_device)
+    pipe.FacialEncoder.to(inference_device)
+    pipe.bise_net.to(inference_device)
+    try:
+        if model_selected_tab==0:
+            select_images = load_image(Image.open(selected_template_images))
+        else:
+            select_images = load_image(Image.fromarray(costum_image))
+        if prompt_selected_tab==0:
+            prompt = prompt_selected
+            negative_prompt = ""
+            need_safetycheck = False
+        else:
+            need_safetycheck = True
+        # hyper-parameter
+        num_steps = 50
+        seed_set = torch.randint(0, 1000, (1,)).item()
+        # merge_steps = 30
+        @torch.inference_mode()
+        def Enhance_prompt(prompt,select_images):
+            llva_prompt = f'Please ignore the image. Enhance the following text prompt for me. You can associate more details with the character\'s gesture, environment, and decent clothing:"{prompt}".'
+            # args = type('Args', (), {
+            #     "model_path": llva_model_path,
+            #     "model_base": None,
+            #     "model_name": get_model_name_from_path(llva_model_path),
+            #     "query": llva_prompt,
+            #     "conv_mode": None,
+            #     "image_file": select_images,
+            #     "sep": ",",
+            #     "temperature": 0,
+            #     "top_p": None,
+            #     "num_beams": 1,
+            #     "max_new_tokens": 512
+            # })()
+            # Enhanced_prompt = eval_model(args, llva_tokenizer, llva_model, llva_image_processor)
+            return Enhanced_prompt
+        if prompt == "":
+            prompt = "A man, in a forest"
+            prompt = "A man, with backpack, in a raining tropical forest, adventuring, holding a flashlight, in mist, seeking animals"
+            prompt = "A person, in a sowm, wearing santa hat and a scarf, with a cottage behind"
+        else:
+            # prompt=Enhance_prompt(prompt,Image.new('RGB', (200, 200), color = 'white'))
+            print(prompt)
+            pass
+        if negative_prompt == "":
+            negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
+        #Extend Prompt
+        prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
+        negtive_prompt_group="((cross-eye)),((cross-eyed)),(((NFSW))),(nipple),((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)), [out of frame], extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))). out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck)))"
+        negative_prompt = negative_prompt + negtive_prompt_group
+        # seed = torch.randint(0, 1000, (1,)).item()
+        generator = torch.Generator(device=device).manual_seed(seed_set)
+        images = pipe(
+            prompt=prompt,
+            width=width,
+            height=height,
+            input_id_images=select_images,
+            negative_prompt=negative_prompt,
+            num_images_per_prompt=1,
+            num_inference_steps=num_steps,
+            start_merge_step=merge_steps,
+            generator=generator,
+            retouching=retouching,
+            need_safetycheck=need_safetycheck,
+        ).images[0]
+        current_date = datetime.today()
+        return np.array(images)
+    finally:
+        # Release the GPU after the reasoning is completed
+        pipe.to("cpu")
+        pipe.image_encoder.to("cpu")
+        pipe.image_proj_model.to("cpu")
+        pipe.FacialEncoder.to("cpu")
+        pipe.bise_net.to("cpu")
+        torch.cuda.empty_cache()
 # Gets the templates
 script_directory = os.path.dirname(os.path.realpath(__file__))