Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -19,7 +19,7 @@ from models.BiSeNet.model import BiSeNet
|
|
| 19 |
# zero = torch.Tensor([0]).cuda()
|
| 20 |
# print(zero.device) # <-- 'cpu' 🤔
|
| 21 |
# device = zero.device # "cuda"
|
| 22 |
-
device = "
|
| 23 |
|
| 24 |
# Gets the absolute path of the current script
|
| 25 |
script_directory = os.path.dirname(os.path.realpath(__file__))
|
|
@@ -34,14 +34,14 @@ pipe = ConsistentIDStableDiffusionPipeline.from_pretrained(
|
|
| 34 |
torch_dtype=torch.float16,
|
| 35 |
safety_checker=None, # use_safetensors=True,
|
| 36 |
# variant="fp16"
|
| 37 |
-
)
|
| 38 |
|
| 39 |
### Load other pretrained models
|
| 40 |
## BiSenet
|
| 41 |
bise_net_cp_path = hf_hub_download(repo_id="JackAILab/ConsistentID", filename="face_parsing.pth", local_dir="./checkpoints")
|
| 42 |
bise_net = BiSeNet(n_classes = 19)
|
| 43 |
bise_net.load_state_dict(torch.load(bise_net_cp_path, map_location="cpu")) # device fail
|
| 44 |
-
bise_net.cuda()
|
| 45 |
|
| 46 |
import sys
|
| 47 |
sys.path.append("./models/LLaVA")
|
|
@@ -77,85 +77,102 @@ pipe.FacialEncoder.to(device)
|
|
| 77 |
|
| 78 |
|
| 79 |
|
| 80 |
-
@spaces.GPU
|
| 81 |
def process(selected_template_images,costum_image,prompt
|
| 82 |
,negative_prompt,prompt_selected,retouching,model_selected_tab,prompt_selected_tab,width,height,merge_steps,seed_set):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 83 |
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
seed_set = torch.randint(0, 1000, (1,)).item()
|
| 99 |
-
# merge_steps = 30
|
| 100 |
|
| 101 |
-
|
| 102 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 103 |
|
| 104 |
-
|
| 105 |
-
# args = type('Args', (), {
|
| 106 |
-
# "model_path": llva_model_path,
|
| 107 |
-
# "model_base": None,
|
| 108 |
-
# "model_name": get_model_name_from_path(llva_model_path),
|
| 109 |
-
# "query": llva_prompt,
|
| 110 |
-
# "conv_mode": None,
|
| 111 |
-
# "image_file": select_images,
|
| 112 |
-
# "sep": ",",
|
| 113 |
-
# "temperature": 0,
|
| 114 |
-
# "top_p": None,
|
| 115 |
-
# "num_beams": 1,
|
| 116 |
-
# "max_new_tokens": 512
|
| 117 |
-
# })()
|
| 118 |
-
# Enhanced_prompt = eval_model(args, llva_tokenizer, llva_model, llva_image_processor)
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
|
| 137 |
-
|
| 138 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 139 |
|
| 140 |
-
|
| 141 |
-
|
| 142 |
-
|
| 143 |
-
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
num_inference_steps=num_steps,
|
| 151 |
-
start_merge_step=merge_steps,
|
| 152 |
-
generator=generator,
|
| 153 |
-
retouching=retouching,
|
| 154 |
-
need_safetycheck=need_safetycheck,
|
| 155 |
-
).images[0]
|
| 156 |
-
|
| 157 |
-
current_date = datetime.today()
|
| 158 |
-
return np.array(images)
|
| 159 |
|
| 160 |
# Gets the templates
|
| 161 |
script_directory = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
| 19 |
# zero = torch.Tensor([0]).cuda()
|
| 20 |
# print(zero.device) # <-- 'cpu' 🤔
|
| 21 |
# device = zero.device # "cuda"
|
| 22 |
+
device = "cpu"
|
| 23 |
|
| 24 |
# Gets the absolute path of the current script
|
| 25 |
script_directory = os.path.dirname(os.path.realpath(__file__))
|
|
|
|
| 34 |
torch_dtype=torch.float16,
|
| 35 |
safety_checker=None, # use_safetensors=True,
|
| 36 |
# variant="fp16"
|
| 37 |
+
)
|
| 38 |
|
| 39 |
### Load other pretrained models
|
| 40 |
## BiSenet
|
| 41 |
bise_net_cp_path = hf_hub_download(repo_id="JackAILab/ConsistentID", filename="face_parsing.pth", local_dir="./checkpoints")
|
| 42 |
bise_net = BiSeNet(n_classes = 19)
|
| 43 |
bise_net.load_state_dict(torch.load(bise_net_cp_path, map_location="cpu")) # device fail
|
| 44 |
+
# bise_net.cuda()
|
| 45 |
|
| 46 |
import sys
|
| 47 |
sys.path.append("./models/LLaVA")
|
|
|
|
| 77 |
|
| 78 |
|
| 79 |
|
| 80 |
+
@spaces.GPU(duration=120)
|
| 81 |
def process(selected_template_images,costum_image,prompt
|
| 82 |
,negative_prompt,prompt_selected,retouching,model_selected_tab,prompt_selected_tab,width,height,merge_steps,seed_set):
|
| 83 |
+
|
| 84 |
+
inference_device = "cuda"
|
| 85 |
+
|
| 86 |
+
pipe.to(inference_device)
|
| 87 |
+
pipe.image_encoder.to(inference_device)
|
| 88 |
+
pipe.image_proj_model.to(inference_device)
|
| 89 |
+
pipe.FacialEncoder.to(inference_device)
|
| 90 |
+
pipe.bise_net.to(inference_device)
|
| 91 |
+
|
| 92 |
+
try:
|
| 93 |
+
if model_selected_tab==0:
|
| 94 |
+
select_images = load_image(Image.open(selected_template_images))
|
| 95 |
+
else:
|
| 96 |
+
select_images = load_image(Image.fromarray(costum_image))
|
| 97 |
|
| 98 |
+
if prompt_selected_tab==0:
|
| 99 |
+
prompt = prompt_selected
|
| 100 |
+
negative_prompt = ""
|
| 101 |
+
need_safetycheck = False
|
| 102 |
+
else:
|
| 103 |
+
need_safetycheck = True
|
| 104 |
+
|
| 105 |
+
# hyper-parameter
|
| 106 |
+
num_steps = 50
|
| 107 |
+
seed_set = torch.randint(0, 1000, (1,)).item()
|
| 108 |
+
# merge_steps = 30
|
| 109 |
+
|
| 110 |
+
@torch.inference_mode()
|
| 111 |
+
def Enhance_prompt(prompt,select_images):
|
|
|
|
|
|
|
| 112 |
|
| 113 |
+
llva_prompt = f'Please ignore the image. Enhance the following text prompt for me. You can associate more details with the character\'s gesture, environment, and decent clothing:"{prompt}".'
|
| 114 |
+
# args = type('Args', (), {
|
| 115 |
+
# "model_path": llva_model_path,
|
| 116 |
+
# "model_base": None,
|
| 117 |
+
# "model_name": get_model_name_from_path(llva_model_path),
|
| 118 |
+
# "query": llva_prompt,
|
| 119 |
+
# "conv_mode": None,
|
| 120 |
+
# "image_file": select_images,
|
| 121 |
+
# "sep": ",",
|
| 122 |
+
# "temperature": 0,
|
| 123 |
+
# "top_p": None,
|
| 124 |
+
# "num_beams": 1,
|
| 125 |
+
# "max_new_tokens": 512
|
| 126 |
+
# })()
|
| 127 |
+
# Enhanced_prompt = eval_model(args, llva_tokenizer, llva_model, llva_image_processor)
|
| 128 |
|
| 129 |
+
return Enhanced_prompt
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 130 |
|
| 131 |
+
if prompt == "":
|
| 132 |
+
prompt = "A man, in a forest"
|
| 133 |
+
prompt = "A man, with backpack, in a raining tropical forest, adventuring, holding a flashlight, in mist, seeking animals"
|
| 134 |
+
prompt = "A person, in a sowm, wearing santa hat and a scarf, with a cottage behind"
|
| 135 |
+
else:
|
| 136 |
+
# prompt=Enhance_prompt(prompt,Image.new('RGB', (200, 200), color = 'white'))
|
| 137 |
+
print(prompt)
|
| 138 |
+
pass
|
| 139 |
+
|
| 140 |
+
if negative_prompt == "":
|
| 141 |
+
negative_prompt = "monochrome, lowres, bad anatomy, worst quality, low quality, blurry"
|
| 142 |
+
|
| 143 |
+
#Extend Prompt
|
| 144 |
+
prompt = "cinematic photo," + prompt + ", 50mm photograph, half-length portrait, film, bokeh, professional, 4k, highly detailed"
|
| 145 |
+
|
| 146 |
+
negtive_prompt_group="((cross-eye)),((cross-eyed)),(((NFSW))),(nipple),((((ugly)))), (((duplicate))), ((morbid)), ((mutilated)), [out of frame], extra fingers, mutated hands, ((poorly drawn hands)), ((poorly drawn face)), (((mutation))), (((deformed))), ((ugly)), blurry, ((bad anatomy)), (((bad proportions))), ((extra limbs)), cloned face, (((disfigured))). out of frame, ugly, extra limbs, (bad anatomy), gross proportions, (malformed limbs), ((missing arms)), ((missing legs)), (((extra arms))), (((extra legs))), mutated hands, (fused fingers), (too many fingers), (((long neck)))"
|
| 147 |
+
negative_prompt = negative_prompt + negtive_prompt_group
|
| 148 |
+
|
| 149 |
+
# seed = torch.randint(0, 1000, (1,)).item()
|
| 150 |
+
generator = torch.Generator(device=device).manual_seed(seed_set)
|
| 151 |
+
|
| 152 |
+
images = pipe(
|
| 153 |
+
prompt=prompt,
|
| 154 |
+
width=width,
|
| 155 |
+
height=height,
|
| 156 |
+
input_id_images=select_images,
|
| 157 |
+
negative_prompt=negative_prompt,
|
| 158 |
+
num_images_per_prompt=1,
|
| 159 |
+
num_inference_steps=num_steps,
|
| 160 |
+
start_merge_step=merge_steps,
|
| 161 |
+
generator=generator,
|
| 162 |
+
retouching=retouching,
|
| 163 |
+
need_safetycheck=need_safetycheck,
|
| 164 |
+
).images[0]
|
| 165 |
|
| 166 |
+
current_date = datetime.today()
|
| 167 |
+
return np.array(images)
|
| 168 |
+
finally:
|
| 169 |
+
# Release the GPU after the reasoning is completed
|
| 170 |
+
pipe.to("cpu")
|
| 171 |
+
pipe.image_encoder.to("cpu")
|
| 172 |
+
pipe.image_proj_model.to("cpu")
|
| 173 |
+
pipe.FacialEncoder.to("cpu")
|
| 174 |
+
pipe.bise_net.to("cpu")
|
| 175 |
+
torch.cuda.empty_cache()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 176 |
|
| 177 |
# Gets the templates
|
| 178 |
script_directory = os.path.dirname(os.path.realpath(__file__))
|