Spaces:
Paused
Paused
| from fastapi import FastAPI, File, UploadFile, Form | |
| from fastapi.responses import StreamingResponse | |
| from pydantic import BaseModel | |
| from typing import Optional | |
| import logging | |
| import os | |
| import boto3 | |
| import json | |
| import shlex | |
| import subprocess | |
| import tempfile | |
| import time | |
| import base64 | |
| import gradio as gr | |
| import numpy as np | |
| import rembg | |
| import spaces | |
| import torch | |
| from PIL import Image | |
| from functools import partial | |
| import io | |
| subprocess.run(shlex.split('pip install wheel/torchmcubes-0.1.0-cp310-cp310-linux_x86_64.whl')) | |
| from tsr.system import TSR | |
| from tsr.utils import remove_background, resize_foreground, to_gradio_3d_orientation | |
| HEADER = """FRAME AI""" | |
| torch.cuda.empty_cache() | |
| if torch.cuda.is_available(): | |
| device = "cuda:0" | |
| else: | |
| device = "cpu" | |
| model = TSR.from_pretrained( | |
| "stabilityai/TripoSR", | |
| config_name="config.yaml", | |
| weight_name="model.ckpt", | |
| ) | |
| model.renderer.set_chunk_size(131072) | |
| model.to(device) | |
| rembg_session = rembg.new_session() | |
| ACCESS = os.getenv("ACCESS") | |
| SECRET = os.getenv("SECRET") | |
| bedrock = boto3.client(service_name='bedrock', aws_access_key_id = ACCESS, aws_secret_access_key = SECRET, region_name='us-east-1') | |
| bedrock_runtime = boto3.client(service_name='bedrock-runtime', aws_access_key_id = ACCESS, aws_secret_access_key = SECRET, region_name='us-east-1') | |
| # def generate_image_from_text(pos_prompt): | |
| # # bedrock_runtime = boto3.client(region_name = 'us-east-1', service_name='bedrock-runtime') | |
| # parameters = {'text_prompts': [{'text': pos_prompt , 'weight':1}, | |
| # {'text': """Blurry, out of frame, out of focus, Detailed, dull, duplicate, bad quality, low resolution, cropped""", 'weight': -1}], | |
| # 'cfg_scale': 7, 'seed': 0, 'samples': 1} | |
| # request_body = json.dumps(parameters) | |
| # response = bedrock_runtime.invoke_model(body=request_body,modelId = 'stability.stable-diffusion-xl-v1') | |
| # response_body = json.loads(response.get('body').read()) | |
| # base64_image_data = base64.b64decode(response_body['artifacts'][0]['base64']) | |
| # return Image.open(io.BytesIO(base64_image_data)) | |
| def gen_pos_prompt(text): | |
| instruction = f'''Your task is to create a positive prompt for image generation. | |
| Objective: Generate images that prioritize structural integrity and accurate shapes. The focus should be on the correct form and basic contours of objects, with minimal concern for colors. | |
| Guidelines: | |
| Complex Objects (e.g., animals, vehicles): For these, the image should resemble a toy object, emphasizing the correct shape and structure while minimizing details and color complexity. | |
| Example Input: A sports bike | |
| Example Positive Prompt: Simple sports bike with accurate shape and structure, minimal details, digital painting, concept art style, basic contours, soft lighting, clean lines, neutral or muted colors, toy-like appearance, low contrast. | |
| Example Input: A lion | |
| Example Positive Prompt: Toy-like depiction of a lion with a focus on structural accuracy, minimal details, digital painting, concept art style, basic contours, soft lighting, clean lines, neutral or muted colors, simplified features, low contrast. | |
| Simple Objects (e.g., a tennis ball): For these, the prompt should specify a realistic depiction, focusing on the accurate shape and structure. | |
| Example Input: A tennis ball | |
| Example Positive Prompt: Realistic depiction of a tennis ball with accurate shape and texture, digital painting, clean lines, minimal additional details, soft lighting, neutral or muted colors, focus on structural integrity. | |
| Prompt Structure: | |
| Subject: Clearly describe the object and its essential shape and structure. | |
| Medium: Specify the art style (e.g., digital painting, concept art). | |
| Style: Include relevant style terms (e.g., simplified, toy-like for complex objects; realistic for simple objects). | |
| Resolution: Mention resolution if necessary (e.g., basic resolution). | |
| Lighting: Indicate the type of lighting (e.g., soft lighting). | |
| Color: Use neutral or muted colors with minimal emphasis on color details. | |
| Additional Details: Keep additional details minimal or specify if not desired. | |
| Input: {text} | |
| Positive Prompt: | |
| ''' | |
| body = json.dumps({'inputText': instruction, | |
| 'textGenerationConfig': {'temperature': 0.1, 'topP': 0.01, 'maxTokenCount':512}}) | |
| response = bedrock_runtime.invoke_model(body=body, modelId='amazon.titan-text-express-v1') | |
| pos_prompt = json.loads(response.get('body').read())['results'][0]['outputText'] | |
| return pos_prompt | |
| def generate_image_from_text(pos_prompt, seed): | |
| new_prompt = gen_pos_prompt(pos_prompt) | |
| print(new_prompt) | |
| neg_prompt = '''Detailed, complex textures, intricate patterns, realistic lighting, high contrast, reflections, fuzzy surface, realistic proportions, photographic quality, vibrant colors, detailed background, shadows, disfigured, deformed, ugly, multiple, duplicate.''' | |
| neg_prompt = '''Complex textures, intricate patterns, realistic lighting, high contrast, reflections, fuzzy surface, photographic quality, vibrant colors, detailed background, shadows, disfigured, deformed, ugly, multiple, duplicate.''' | |
| parameters = { | |
| 'taskType': 'TEXT_IMAGE', | |
| 'textToImageParams': {'text': new_prompt, | |
| 'negativeText': neg_prompt}, | |
| 'imageGenerationConfig': {"cfgScale":8, | |
| "seed":int(seed), | |
| "width":512, | |
| "height":512, | |
| "numberOfImages":1 | |
| } | |
| } | |
| request_body = json.dumps(parameters) | |
| response = bedrock_runtime.invoke_model(body=request_body, modelId='amazon.titan-image-generator-v1') | |
| response_body = json.loads(response.get('body').read()) | |
| base64_image_data = base64.b64decode(response_body['images'][0]) | |
| return Image.open(io.BytesIO(base64_image_data)) | |
| def check_input_image(input_image): | |
| if input_image is None: | |
| raise gr.Error("No image uploaded!") | |
| def preprocess(input_image, do_remove_background, foreground_ratio): | |
| def fill_background(image): | |
| image = np.array(image).astype(np.float32) / 255.0 | |
| image = image[:, :, :3] * image[:, :, 3:4] + (1 - image[:, :, 3:4]) * 0.5 | |
| image = Image.fromarray((image * 255.0).astype(np.uint8)) | |
| return image | |
| if do_remove_background: | |
| image = input_image.convert("RGB") | |
| image = remove_background(image, rembg_session) | |
| image = resize_foreground(image, foreground_ratio) | |
| image = fill_background(image) | |
| else: | |
| image = input_image | |
| if image.mode == "RGBA": | |
| image = fill_background(image) | |
| return image | |
| def generate(image, mc_resolution, formats=["obj", "glb"]): | |
| scene_codes = model(image, device=device) | |
| mesh = model.extract_mesh(scene_codes, resolution=mc_resolution)[0] | |
| mesh = to_gradio_3d_orientation(mesh) | |
| mesh_path_glb = tempfile.NamedTemporaryFile(suffix=f".glb", delete=False) | |
| mesh.export(mesh_path_glb.name) | |
| mesh_path_obj = tempfile.NamedTemporaryFile(suffix=f".obj", delete=False) | |
| mesh.apply_scale([-1, 1, 1]) # Otherwise the visualized .obj will be flipped | |
| mesh.export(mesh_path_obj.name) | |
| return mesh_path_obj.name, mesh_path_glb.name | |
| def run_example(text_prompt,seed ,do_remove_background, foreground_ratio, mc_resolution): | |
| # Step 1: Generate the image from text prompt | |
| image_pil = generate_image_from_text(text_prompt, seed) | |
| # Step 2: Preprocess the image | |
| preprocessed = preprocess(image_pil, do_remove_background, foreground_ratio) | |
| # Step 3: Generate the 3D model | |
| mesh_name_obj, mesh_name_glb = generate(preprocessed, mc_resolution, ["obj", "glb"]) | |
| return preprocessed, mesh_name_obj, mesh_name_glb | |
| with gr.Blocks() as demo: | |
| gr.Markdown(HEADER) | |
| with gr.Row(variant="panel"): | |
| with gr.Column(): | |
| with gr.Row(): | |
| text_prompt = gr.Textbox( | |
| label="Text Prompt", | |
| placeholder="Enter a text prompt for image generation" | |
| ) | |
| input_image = gr.Image( | |
| label="Generated Image", | |
| image_mode="RGBA", | |
| sources="upload", | |
| type="pil", | |
| elem_id="content_image", | |
| visible=False # Hidden since we generate the image from text | |
| ) | |
| seed = gr.Number(value=0) | |
| processed_image = gr.Image(label="Processed Image", interactive=False, visible=False) | |
| with gr.Row(): | |
| with gr.Group(): | |
| do_remove_background = gr.Checkbox( | |
| label="Remove Background", value=True | |
| ) | |
| foreground_ratio = gr.Slider( | |
| label="Foreground Ratio", | |
| minimum=0.5, | |
| maximum=1.0, | |
| value=0.85, | |
| step=0.05, | |
| ) | |
| mc_resolution = gr.Slider( | |
| label="Marching Cubes Resolution", | |
| minimum=32, | |
| maximum=320, | |
| value=256, | |
| step=32 | |
| ) | |
| with gr.Row(): | |
| submit = gr.Button("Generate", elem_id="generate", variant="primary") | |
| with gr.Column(): | |
| with gr.Tab("OBJ"): | |
| output_model_obj = gr.Model3D( | |
| label="Output Model (OBJ Format)", | |
| interactive=False, | |
| ) | |
| gr.Markdown("Note: Downloaded object will be flipped in case of .obj export. Export .glb instead or manually flip it before usage.") | |
| with gr.Tab("GLB"): | |
| output_model_glb = gr.Model3D( | |
| label="Output Model (GLB Format)", | |
| interactive=False, | |
| ) | |
| gr.Markdown("Note: The model shown here has a darker appearance. Download to get correct results.") | |
| # with gr.Row(variant="panel"): | |
| # gr.Examples( | |
| # examples=[ | |
| # os.path.join("examples", img_name) for img_name in sorted(os.listdir("examples")) | |
| # ], | |
| # inputs=[text_prompt], | |
| # outputs=[processed_image, output_model_obj, output_model_glb], | |
| # cache_examples=True, | |
| # fn=partial(run_example, do_remove_background=True, foreground_ratio=0.85, mc_resolution=256), | |
| # label="Examples", | |
| # examples_per_page=20 | |
| # ) | |
| submit.click(fn=check_input_image, inputs=[text_prompt]).success( | |
| fn=run_example, | |
| inputs=[text_prompt, seed, do_remove_background, foreground_ratio, mc_resolution], | |
| outputs=[processed_image, output_model_obj, output_model_glb], | |
| # outputs=[output_model_obj, output_model_glb], | |
| ) | |
| demo.queue(max_size=10) | |
| demo.launch(auth=(os.getenv('USERNAME'), os.getenv('PASSWORD'))) | |