rahul7star commited on
Commit
6eefcb5
Β·
verified Β·
1 Parent(s): fa257f5

Update app_quant.py

Browse files
Files changed (1) hide show
  1. app_quant.py +176 -131
app_quant.py CHANGED
@@ -5,7 +5,9 @@ import sys
5
  import platform
6
  import diffusers
7
  import transformers
 
8
  import os
 
9
 
10
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
11
  from diffusers import ZImagePipeline, AutoModel
@@ -21,27 +23,56 @@ def log(msg):
21
  LOGS += msg + "\n"
22
  return msg
23
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  # ============================================================
25
  # ENVIRONMENT INFO
26
  # ============================================================
27
  log("===================================================")
28
- log("πŸ” Z-IMAGE-TURBO DEBUGGING + ROBUST TRANSFORMER INSPECTION")
29
  log("===================================================\n")
30
 
31
- log(f"πŸ“Œ PYTHON VERSION : {sys.version.replace(chr(10), ' ')}")
32
  log(f"πŸ“Œ PLATFORM : {platform.platform()}")
33
  log(f"πŸ“Œ TORCH VERSION : {torch.__version__}")
34
  log(f"πŸ“Œ TRANSFORMERS VERSION : {transformers.__version__}")
35
  log(f"πŸ“Œ DIFFUSERS VERSION : {diffusers.__version__}")
36
  log(f"πŸ“Œ CUDA AVAILABLE : {torch.cuda.is_available()}")
37
 
38
- if torch.cuda.is_available():
39
- log(f"πŸ“Œ GPU NAME : {torch.cuda.get_device_name(0)}")
40
- log(f"πŸ“Œ GPU CAPABILITY : {torch.cuda.get_device_capability(0)}")
41
- log(f"πŸ“Œ GPU MEMORY (TOTAL) : {torch.cuda.get_device_properties(0).total_memory/1e9:.2f} GB")
42
- log(f"πŸ“Œ FLASH ATTENTION : {torch.backends.cuda.flash_sdp_enabled()}")
43
- else:
44
- raise RuntimeError("❌ CUDA is REQUIRED but not available.")
45
 
46
  device = "cuda"
47
  gpu_id = 0
@@ -62,75 +93,75 @@ log(f"Model Cache Directory : {model_cache}")
62
  log(f"torch_dtype : {torch_dtype}")
63
  log(f"USE_CPU_OFFLOAD : {USE_CPU_OFFLOAD}")
64
 
 
 
65
  # ============================================================
66
- # ROBUST TRANSFORMER INSPECTION FUNCTION
67
  # ============================================================
68
- def inspect_transformer(model, model_name="Transformer"):
69
- log(f"\nπŸ” {model_name} Architecture Details:")
70
  try:
71
- block_attrs = ["transformer_blocks", "blocks", "layers", "encoder_blocks", "model"]
72
  blocks = None
73
- for attr in block_attrs:
74
- blocks = getattr(model, attr, None)
75
- if blocks is not None:
 
76
  break
77
 
78
  if blocks is None:
79
- log(f"⚠️ Could not find transformer blocks in {model_name}, skipping detailed block info")
80
- else:
81
- try:
82
- log(f"Number of Transformer Modules : {len(blocks)}")
83
- for i, block in enumerate(blocks):
84
- log(f" Block {i}: {block.__class__.__name__}")
85
- attn_type = getattr(block, "attn", None)
86
- if attn_type:
87
- log(f" Attention: {attn_type.__class__.__name__}")
88
- flash_enabled = getattr(attn_type, "flash", None)
89
- log(f" FlashAttention Enabled? : {flash_enabled}")
90
- except Exception as e:
91
- log(f"⚠️ Error inspecting blocks: {e}")
92
-
93
- config = getattr(model, "config", None)
94
- if config:
95
- log(f"Hidden size: {getattr(config, 'hidden_size', 'N/A')}")
96
- log(f"Number of attention heads: {getattr(config, 'num_attention_heads', 'N/A')}")
97
- log(f"Number of layers: {getattr(config, 'num_hidden_layers', 'N/A')}")
98
- log(f"Intermediate size: {getattr(config, 'intermediate_size', 'N/A')}")
99
  else:
100
- log(f"⚠️ No config attribute found in {model_name}")
 
 
 
 
101
  except Exception as e:
102
- log(f"⚠️ Failed to inspect {model_name}: {e}")
 
103
 
104
  # ============================================================
105
- # LOAD TRANSFORMER BLOCK
106
  # ============================================================
107
  log("\n===================================================")
108
  log("πŸ”§ LOADING TRANSFORMER BLOCK")
109
  log("===================================================")
110
 
111
- quantization_config = DiffusersBitsAndBytesConfig(
112
- load_in_4bit=True,
113
- bnb_4bit_quant_type="nf4",
114
- bnb_4bit_compute_dtype=torch_dtype,
115
- bnb_4bit_use_double_quant=True,
116
- llm_int8_skip_modules=["transformer_blocks.0.img_mod"],
117
- )
118
- log("4-bit Quantization Config (Transformer):")
119
- log(str(quantization_config))
120
-
121
- transformer = AutoModel.from_pretrained(
122
- model_id,
123
- cache_dir=model_cache,
124
- subfolder="transformer",
125
- quantization_config=quantization_config,
126
- torch_dtype=torch_dtype,
127
- device_map=device,
128
- )
129
- log("βœ… Transformer block loaded successfully.")
130
- inspect_transformer(transformer, "Transformer")
131
-
132
- if USE_CPU_OFFLOAD:
133
- transformer = transformer.to("cpu")
 
 
 
 
 
 
 
134
 
135
  # ============================================================
136
  # LOAD TEXT ENCODER
@@ -139,104 +170,118 @@ log("\n===================================================")
139
  log("πŸ”§ LOADING TEXT ENCODER")
140
  log("===================================================")
141
 
142
- quantization_config = TransformersBitsAndBytesConfig(
143
- load_in_4bit=True,
144
- bnb_4bit_quant_type="nf4",
145
- bnb_4bit_compute_dtype=torch_dtype,
146
- bnb_4bit_use_double_quant=True,
147
- )
148
- log("4-bit Quantization Config (Text Encoder):")
149
- log(str(quantization_config))
150
-
151
- text_encoder = AutoModel.from_pretrained(
152
- model_id,
153
- cache_dir=model_cache,
154
- subfolder="text_encoder",
155
- quantization_config=quantization_config,
156
- torch_dtype=torch_dtype,
157
- device_map=device,
158
- )
159
- log("βœ… Text encoder loaded successfully.")
160
- inspect_transformer(text_encoder, "Text Encoder")
161
-
162
- if USE_CPU_OFFLOAD:
163
- text_encoder = text_encoder.to("cpu")
 
 
 
 
 
 
 
164
 
165
  # ============================================================
166
  # BUILD PIPELINE
167
  # ============================================================
168
  log("\n===================================================")
169
- log("πŸ”§ BUILDING Z-IMAGE-TURBO PIPELINE")
170
  log("===================================================")
171
 
172
- pipe = ZImagePipeline.from_pretrained(
173
- model_id,
174
- transformer=transformer,
175
- text_encoder=text_encoder,
176
- torch_dtype=torch_dtype,
177
- )
178
-
179
- if USE_CPU_OFFLOAD:
180
- pipe.enable_model_cpu_offload(gpu_id=gpu_id)
181
- log("βš™ CPU OFFLOAD ENABLED")
182
- else:
183
  pipe.to(device)
184
- log("βš™ Pipeline moved to GPU")
 
 
 
 
 
 
185
 
186
- log("βœ… Pipeline ready.")
187
 
188
  # ============================================================
189
- # INFERENCE FUNCTION
190
  # ============================================================
191
- @spaces.GPU
192
  def generate_image(prompt, height, width, steps, seed):
193
  global LOGS
194
  LOGS = "" # reset logs
 
195
  log("===================================================")
196
  log("🎨 RUNNING INFERENCE")
197
  log("===================================================")
198
- log(f"Prompt : {prompt}")
199
- log(f"Resolution : {width} x {height}")
200
- log(f"Steps : {steps}")
201
- log(f"Seed : {seed}")
202
-
203
- generator = torch.Generator(device).manual_seed(seed)
204
-
205
- out = pipe(
206
- prompt=prompt,
207
- height=height,
208
- width=width,
209
- num_inference_steps=steps,
210
- guidance_scale=0.0,
211
- generator=generator,
212
- )
213
- log("βœ… Inference Finished")
214
- return out.images[0], LOGS
 
 
 
 
 
 
215
 
216
  # ============================================================
217
- # GRADIO UI
218
  # ============================================================
219
- with gr.Blocks(title="Z-Image-Turbo Generator") as demo:
220
- gr.Markdown("# **Z-Image-Turbo β€” 4bit Quantized Image Generator**")
221
 
222
  with gr.Row():
223
  with gr.Column(scale=1):
224
- prompt = gr.Textbox(label="Prompt", value="Realistic mid-aged male image")
225
  height = gr.Slider(256, 2048, value=1024, step=8, label="Height")
226
  width = gr.Slider(256, 2048, value=1024, step=8, label="Width")
227
- steps = gr.Slider(1, 16, value=9, step=1, label="Inference Steps")
228
  seed = gr.Slider(0, 999999, value=42, step=1, label="Seed")
229
-
230
- btn = gr.Button("Generate", variant="primary")
231
 
232
  with gr.Column(scale=1):
233
- output_image = gr.Image(label="Output Image")
234
- logs_panel = gr.Textbox(label="πŸ“œ Transformer Logs", lines=25, interactive=False)
235
 
236
- btn.click(
237
- generate_image,
238
- inputs=[prompt, height, width, steps, seed],
239
- outputs=[output_image, logs_panel],
240
- )
241
 
242
  demo.launch()
 
5
  import platform
6
  import diffusers
7
  import transformers
8
+ import psutil
9
  import os
10
+ import time
11
 
12
  from diffusers import BitsAndBytesConfig as DiffusersBitsAndBytesConfig
13
  from diffusers import ZImagePipeline, AutoModel
 
23
  LOGS += msg + "\n"
24
  return msg
25
 
26
+
27
+ # ============================================================
28
+ # SYSTEM METRICS β€” LIVE GPU + CPU MONITORING
29
+ # ============================================================
30
+ def log_system_stats(tag=""):
31
+ try:
32
+ log(f"\n===== πŸ”₯ SYSTEM STATS {tag} =====")
33
+
34
+ # ============= GPU STATS =============
35
+ if torch.cuda.is_available():
36
+ allocated = torch.cuda.memory_allocated(0) / 1e9
37
+ reserved = torch.cuda.memory_reserved(0) / 1e9
38
+ total = torch.cuda.get_device_properties(0).total_memory / 1e9
39
+ free = total - allocated
40
+
41
+ log(f"πŸ’  GPU Total : {total:.2f} GB")
42
+ log(f"πŸ’  GPU Allocated : {allocated:.2f} GB")
43
+ log(f"πŸ’  GPU Reserved : {reserved:.2f} GB")
44
+ log(f"πŸ’  GPU Free : {free:.2f} GB")
45
+
46
+ # ============= CPU STATS ============
47
+ cpu = psutil.cpu_percent()
48
+ ram_used = psutil.virtual_memory().used / 1e9
49
+ ram_total = psutil.virtual_memory().total / 1e9
50
+
51
+ log(f"🧠 CPU Usage : {cpu}%")
52
+ log(f"🧠 RAM Used : {ram_used:.2f} GB / {ram_total:.2f} GB")
53
+
54
+ except Exception as e:
55
+ log(f"⚠️ Failed to log system stats: {e}")
56
+
57
+
58
  # ============================================================
59
  # ENVIRONMENT INFO
60
  # ============================================================
61
  log("===================================================")
62
+ log("πŸ” Z-IMAGE-TURBO DEBUGGING + LIVE METRIC LOGGER")
63
  log("===================================================\n")
64
 
65
+ log(f"πŸ“Œ PYTHON VERSION : {sys.version.replace(chr(10),' ')}")
66
  log(f"πŸ“Œ PLATFORM : {platform.platform()}")
67
  log(f"πŸ“Œ TORCH VERSION : {torch.__version__}")
68
  log(f"πŸ“Œ TRANSFORMERS VERSION : {transformers.__version__}")
69
  log(f"πŸ“Œ DIFFUSERS VERSION : {diffusers.__version__}")
70
  log(f"πŸ“Œ CUDA AVAILABLE : {torch.cuda.is_available()}")
71
 
72
+ log_system_stats("AT STARTUP")
73
+
74
+ if not torch.cuda.is_available():
75
+ raise RuntimeError("❌ CUDA Required")
 
 
 
76
 
77
  device = "cuda"
78
  gpu_id = 0
 
93
  log(f"torch_dtype : {torch_dtype}")
94
  log(f"USE_CPU_OFFLOAD : {USE_CPU_OFFLOAD}")
95
 
96
+ log_system_stats("BEFORE TRANSFORMER LOAD")
97
+
98
  # ============================================================
99
+ # SAFE TRANSFORMER INSPECTION
100
  # ============================================================
101
+ def inspect_transformer(model, name):
102
+ log(f"\nπŸ” Inspecting {name}")
103
  try:
104
+ candidates = ["transformer_blocks", "blocks", "layers", "encoder", "model"]
105
  blocks = None
106
+
107
+ for attr in candidates:
108
+ if hasattr(model, attr):
109
+ blocks = getattr(model, attr)
110
  break
111
 
112
  if blocks is None:
113
+ log(f"⚠️ No block structure found in {name}")
114
+ return
115
+
116
+ if hasattr(blocks, "__len__"):
117
+ log(f"Total Blocks = {len(blocks)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
118
  else:
119
+ log("⚠️ Blocks exist but are not iterable")
120
+
121
+ for i in range(min(10, len(blocks) if hasattr(blocks, "__len__") else 0)):
122
+ log(f"Block {i} = {blocks[i].__class__.__name__}")
123
+
124
  except Exception as e:
125
+ log(f"⚠️ Transformer inspect error: {e}")
126
+
127
 
128
  # ============================================================
129
+ # LOAD TRANSFORMER β€” WITH LIVE STATS
130
  # ============================================================
131
  log("\n===================================================")
132
  log("πŸ”§ LOADING TRANSFORMER BLOCK")
133
  log("===================================================")
134
 
135
+ log("πŸ“Œ Logging memory before load:")
136
+ log_system_stats("START TRANSFORMER LOAD")
137
+
138
+ try:
139
+ quant_cfg = DiffusersBitsAndBytesConfig(
140
+ load_in_4bit=True,
141
+ bnb_4bit_quant_type="nf4",
142
+ bnb_4bit_compute_dtype=torch_dtype,
143
+ bnb_4bit_use_double_quant=True,
144
+ )
145
+
146
+ transformer = AutoModel.from_pretrained(
147
+ model_id,
148
+ cache_dir=model_cache,
149
+ subfolder="transformer",
150
+ quantization_config=quant_cfg,
151
+ torch_dtype=torch_dtype,
152
+ device_map=device,
153
+ )
154
+ log("βœ… Transformer loaded successfully.")
155
+
156
+ except Exception as e:
157
+ log(f"❌ Transformer load failed: {e}")
158
+ transformer = None
159
+
160
+ log_system_stats("AFTER TRANSFORMER LOAD")
161
+
162
+ if transformer:
163
+ inspect_transformer(transformer, "Transformer")
164
+
165
 
166
  # ============================================================
167
  # LOAD TEXT ENCODER
 
170
  log("πŸ”§ LOADING TEXT ENCODER")
171
  log("===================================================")
172
 
173
+ log_system_stats("START TEXT ENCODER LOAD")
174
+
175
+ try:
176
+ quant_cfg2 = TransformersBitsAndBytesConfig(
177
+ load_in_4bit=True,
178
+ bnb_4bit_quant_type="nf4",
179
+ bnb_4bit_compute_dtype=torch_dtype,
180
+ bnb_4bit_use_double_quant=True,
181
+ )
182
+
183
+ text_encoder = AutoModel.from_pretrained(
184
+ model_id,
185
+ cache_dir=model_cache,
186
+ subfolder="text_encoder",
187
+ quantization_config=quant_cfg2,
188
+ torch_dtype=torch_dtype,
189
+ device_map=device,
190
+ )
191
+ log("βœ… Text encoder loaded successfully.")
192
+
193
+ except Exception as e:
194
+ log(f"❌ Text encoder load failed: {e}")
195
+ text_encoder = None
196
+
197
+ log_system_stats("AFTER TEXT ENCODER LOAD")
198
+
199
+ if text_encoder:
200
+ inspect_transformer(text_encoder, "Text Encoder")
201
+
202
 
203
  # ============================================================
204
  # BUILD PIPELINE
205
  # ============================================================
206
  log("\n===================================================")
207
+ log("πŸ”§ BUILDING PIPELINE")
208
  log("===================================================")
209
 
210
+ log_system_stats("START PIPELINE BUILD")
211
+
212
+ try:
213
+ pipe = ZImagePipeline.from_pretrained(
214
+ model_id,
215
+ transformer=transformer,
216
+ text_encoder=text_encoder,
217
+ torch_dtype=torch_dtype,
218
+ )
 
 
219
  pipe.to(device)
220
+ log("βœ… Pipeline built successfully.")
221
+
222
+ except Exception as e:
223
+ log(f"❌ Pipeline build failed: {e}")
224
+ pipe = None
225
+
226
+ log_system_stats("AFTER PIPELINE BUILD")
227
 
 
228
 
229
  # ============================================================
230
+ # INFERENCE
231
  # ============================================================
232
+ @spaces.GGPU
233
  def generate_image(prompt, height, width, steps, seed):
234
  global LOGS
235
  LOGS = "" # reset logs
236
+
237
  log("===================================================")
238
  log("🎨 RUNNING INFERENCE")
239
  log("===================================================")
240
+ log_system_stats("BEFORE INFERENCE")
241
+
242
+ try:
243
+ generator = torch.Generator(device).manual_seed(seed)
244
+
245
+ output = pipe(
246
+ prompt=prompt,
247
+ height=height,
248
+ width=width,
249
+ num_inference_steps=steps,
250
+ guidance_scale=0.0,
251
+ generator=generator,
252
+ )
253
+
254
+ log("βœ… Inference finished.")
255
+ log_system_stats("AFTER INFERENCE")
256
+
257
+ return output.images[0], LOGS
258
+
259
+ except Exception as e:
260
+ log(f"❌ Inference error: {e}")
261
+ return None, LOGS
262
+
263
 
264
  # ============================================================
265
+ # UI
266
  # ============================================================
267
+ with gr.Blocks(title="Z-Image Turbo Debugger") as demo:
268
+ gr.Markdown("## **Z-Image Turbo β€” Full Debug + Live GPU/CPU Monitor**")
269
 
270
  with gr.Row():
271
  with gr.Column(scale=1):
272
+ prompt = gr.Textbox(label="Prompt", value="Realistic male portrait")
273
  height = gr.Slider(256, 2048, value=1024, step=8, label="Height")
274
  width = gr.Slider(256, 2048, value=1024, step=8, label="Width")
275
+ steps = gr.Slider(1, 16, value=9, step=1, label="Steps")
276
  seed = gr.Slider(0, 999999, value=42, step=1, label="Seed")
277
+ btn = gr.Button("Generate")
 
278
 
279
  with gr.Column(scale=1):
280
+ image_out = gr.Image(label="Output")
281
+ logs_panel = gr.Textbox(label="πŸ“œ Logs", lines=30)
282
 
283
+ btn.click(generate_image,
284
+ inputs=[prompt, height, width, steps, seed],
285
+ outputs=[image_out, logs_panel])
 
 
286
 
287
  demo.launch()