Text-to-Image

Hello, Team

#9
by DragonsmileHero - opened

Hello,I am trying to on Google Colab, I try for continuous for 8 hours and after trying in fixing I am failed,
your support needed, Looking forward

best, rahul

Model Is Good, its To Bad, Finally I Was able to run it, and result are shocking, after spent 40 Hours, model give result like a child editing, Why You Uploaded Model, Just Wasting time, and saying its better than other model

deepgen org

We have updated the environment configuration on GitHub; the issue might be related to the environment. Also, which checkpoint are you using? Please refer https://github.com/deepgenteam/deepgen/blob/main/INFERENCE.md and down load model.pt to try

Alex11556666 changed discussion status to closed

"""
DEEPGEN 1.0 - COMPLETE FIXED VERSION
Single file solution - Just run this script!

Usage:

  1. First run: Installs dependencies (then restart runtime)
  2. Second run: Downloads checkpoints and runs inference
    """

import sys
import types
import subprocess
import os
from pathlib import Path

============================================================================

TRITON PATCH

============================================================================

def apply_triton_patch():
if 'triton.ops' not in sys.modules:
mock_ops = types.ModuleType('triton.ops')
mock_matmul_perf = types.ModuleType('triton.ops.matmul_perf_model')
mock_matmul_perf.early_config_prune = lambda *args, **kwargs: None
mock_matmul_perf.estimate_matmul_time = lambda *args, **kwargs: 1.0
mock_ops.matmul_perf_model = mock_matmul_perf
sys.modules['triton.ops'] = mock_ops
sys.modules['triton.ops.matmul_perf_model'] = mock_matmul_perf

apply_triton_patch()

============================================================================

DEPENDENCY CHECK & INSTALL

============================================================================

def check_versions():
import importlib
try:
transformers = importlib.import_module('transformers')
if transformers.version < "4.49.0":
return False
except ImportError:
return False
try:
hf_hub = importlib.import_module('huggingface_hub')
if hf_hub.version < "0.23.0":
return False
except ImportError:
return False
return True

if not check_versions():
print("πŸ”§ Installing dependencies...")
subprocess.run(["pip", "uninstall", "-y", "transformers", "huggingface_hub"], check=False, capture_output=True)
subprocess.run([
"pip", "install", "-q",
"transformers==4.49.0",
"huggingface_hub",
"accelerate", "diffusers", "timm", "einops",
"mmengine", "xtuner", "pillow", "numpy",
"peft", "sentencepiece",
"qwen-vl-utils", "triton==3.1.0"
], check=True)
print("\n" + "="*60)
print("βœ… Dependencies installed!")
print("πŸ”„ RESTART RUNTIME NOW, then run this script again")
print("="*60)
sys.exit(0)

============================================================================

SETUP DEEPGEN

============================================================================

import torch
from PIL import Image
import numpy as np
from einops import rearrange
import zipfile

def setup_deepgen():
print("\nπŸš€ Setting up DeepGen 1.0...")

# 1. Clone repo
repo_dir = Path("/content/deepgen")
if not repo_dir.exists():
    print("πŸ“¦ Cloning repository...")
    subprocess.run(["git", "clone", "https://github.com/deepgenteam/deepgen.git", str(repo_dir)], check=True)

if str(repo_dir) not in sys.path:
    sys.path.insert(0, str(repo_dir))
    sys.path.insert(0, str(repo_dir / "src"))

# 2. Download checkpoints
checkpoint_dir = Path("/content/checkpoints")
deepgen_ckpt = checkpoint_dir / "DeepGen_CKPT"
sft_checkpoint = deepgen_ckpt / "SFT" / "iter_400000.pth"

if not sft_checkpoint.exists():
    print("πŸ“₯ Downloading checkpoints...")
    from huggingface_hub import snapshot_download
    snapshot_download(
        repo_id="deepgenteam/DeepGen-1.0",
        local_dir=checkpoint_dir,
        repo_type="model"
    )
    
    # Merge and extract zip if needed
    zip_parts = list(checkpoint_dir.glob("DeepGen_CKPT.zip.part-*"))
    if zip_parts:
        print("πŸ”— Merging zip parts...")
        zip_path = checkpoint_dir / "DeepGen_CKPT.zip"
        with open(zip_path, 'wb') as outfile:
            for part in sorted(zip_parts):
                with open(part, 'rb') as infile:
                    outfile.write(infile.read())
        
        print("πŸ“‚ Extracting...")
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall(checkpoint_dir)

print(f"βœ“ Checkpoint: {sft_checkpoint}")

# 3. Load config
from mmengine.config import Config
from xtuner.registry import BUILDER

config_path = repo_dir / "configs/models/deepgen_scb.py"
cfg = Config.fromfile(str(config_path))

# 4. Apply patches
print("πŸ”§ Applying patches...")

# Patch model class
try:
    from src.models.sd3_kontext.qwen2_5_vl_sd3_hf_dynamic_fusion import Qwen2p5VLStableDiffusion3HF
    Qwen2p5VLStableDiffusion3HF.llm = property(lambda self: self.lmm)
except:
    pass

# Patch config
def patch_config(obj):
    if isinstance(obj, dict):
        if 'attn_implementation' in obj:
            if 'flash' in str(obj['attn_implementation']).lower():
                obj['attn_implementation'] = 'eager'
        
        for k in list(obj.keys()):
            v = obj[k]
            if isinstance(v, str) and "model_zoo/" in v:
                if "Qwen2.5-VL-3B-Instruct" in v:
                    obj[k] = "Qwen/Qwen2.5-VL-3B-Instruct"
                    obj['attn_implementation'] = 'eager'
                elif "UniPic" in v or "SD3.5M" in v:
                    obj[k] = "stabilityai/stable-diffusion-3.5-medium"
                else:
                    obj[k] = v.replace("model_zoo/", "deepgenteam/")
            
            if isinstance(v, (dict, list)):
                patch_config(v)
    elif isinstance(obj, list):
        for item in obj:
            patch_config(item)

patch_config(cfg.model)

# 5. Build and load model
print("πŸ—οΈ  Building model...")
model = BUILDER.build(cfg.model)

print("⚑ Loading weights...")
state_dict = torch.load(str(sft_checkpoint), map_location='cpu', weights_only=False)
model.load_state_dict(state_dict, strict=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
model.eval()

print(f"βœ… Model loaded on {device}\n")
return model, device

============================================================================

INFERENCE FUNCTIONS

============================================================================

def text_to_image(model, device, prompt, output_path="output.png", height=512, width=512, steps=50, cfg_scale=4.0, seed=42):
"""Generate image from text"""
print(f"🎨 Text-to-Image: '{prompt}'")

torch.manual_seed(seed)
data = {'instruction': prompt}
neg_prompt = "blurry, low quality, distorted, artifacts, noise"

with torch.no_grad():
    output = model.generate(
        data=data,
        cfg_scale=cfg_scale,
        num_inference_steps=steps,
        cfg_prompt=neg_prompt,
        height=height,
        width=width
    )

output = rearrange(output.squeeze(0), 'c h w -> h w c')
output = ((output + 1) / 2 * 255).clamp(0, 255).cpu().numpy().astype(np.uint8)
result = Image.fromarray(output)
result.save(output_path)
print(f"βœ… Saved: {output_path}\n")
return result

def image_to_image(model, device, src_img_path, prompt, output_path="edited.png", height=512, width=512, steps=50, cfg_scale=4.0, seed=42):
"""Edit image with instruction"""
print(f"✏️ Image-to-Image: '{prompt}'")

torch.manual_seed(seed)

# Load and preprocess source image
img = Image.open(src_img_path).convert("RGB").resize((512, 512))
pixels = torch.from_numpy(np.array(img)).float() / 255.0
pixels = 2 * pixels - 1
pixels = rearrange(pixels, 'h w c -> c h w').unsqueeze(0).to(device)

data = {'image_pixel_src': pixels, 'instruction': prompt}
neg_prompt = "blurry, low quality, distorted, artifacts, noise"

with torch.no_grad():
    output = model.generate(
        data=data,
        cfg_scale=cfg_scale,
        num_inference_steps=steps,
        cfg_prompt=neg_prompt,
        height=height,
        width=width
    )

output = rearrange(output.squeeze(0), 'c h w -> h w c')
output = ((output + 1) / 2 * 255).clamp(0, 255).cpu().numpy().astype(np.uint8)
result = Image.fromarray(output)
result.save(output_path)
print(f"βœ… Saved: {output_path}\n")
return result

============================================================================

MAIN EXECUTION

============================================================================

if name == "main":
# Setup model
model, device = setup_deepgen()

# Example 1: Text-to-Image
print("="*60)
print("EXAMPLE 1: TEXT-TO-IMAGE")
print("="*60)
text_to_image(
    model, device,
    prompt="a photo of a blue pizza and a yellow baseball glove",
    output_path="/content/text2img_output.png",
    seed=42
)

# Example 2: Image-to-Image (create a test image first if none exists)
print("="*60)
print("EXAMPLE 2: IMAGE-TO-IMAGE")
print("="*60)

# Create a simple test image
test_img_path = "/content/test_input.png"
if not Path(test_img_path).exists():
    test_img = Image.new('RGB', (512, 512), color='white')
    from PIL import ImageDraw
    draw = ImageDraw.Draw(test_img)
    draw.rectangle([100, 100, 400, 400], fill='lightblue', outline='black', width=3)
    test_img.save(test_img_path)
    print(f"πŸ“ Created test image: {test_img_path}")

image_to_image(
    model, device,
    src_img_path=test_img_path,
    prompt="make it look like a sunset painting",
    output_path="/content/img2img_output.png",
    seed=42
)

print("="*60)
print("✨ ALL DONE! Check your outputs:")
print("   - /content/text2img_output.png")
print("   - /content/img2img_output.png")
print("="*60)

I am Using This, also, I Think this is not working, Can You Help, And Tell which library it using? There is no proper documentation, You Created A Good Model, but if not have proper documentation then, what a meaning to create such a great model

The issue may be related to model loading. The final checkpoint model.pt can be loaded directly using torch.load, while .pth checkpoints are handled via guess_load_checkpoint()., Please refer https://github.com/deepgenteam/deepgen/blob/main/INFERENCE.md , we already uploaded inferance code, thanks

Sign up or log in to comment