| import matplotlib.pyplot as plt |
| import numpy as np |
|
|
| |
| fig, ax = plt.subplots(figsize=(12, 7)) |
|
|
| |
| categories = ['v4.57.6\ndevice_map=auto\nThreadpool', |
| 'v4.57.6\ndevice_map=auto\nNormal', |
| 'v4.57.6\nTP', |
| 'v5\ndevice_map=auto\nAsync', |
| 'v5\ndevice_map=auto\nSync', |
| 'v5\nTP\nAsync', |
| 'v5\nTP\nSync'] |
|
|
| times = [66.24, 67.29, np.nan, 20.71, 45.3, 10.1, 19.28] |
| colors = ['#3498db', '#2980b9', '#e74c3c', '#2ecc71', '#27ae60', '#f39c12', '#e67e22'] |
|
|
| |
| x_pos = np.arange(len(categories)) |
|
|
| |
| bars = ax.bar(x_pos, times, color=colors, alpha=0.8, edgecolor='black', linewidth=1.2) |
|
|
| |
| for i, (bar, time) in enumerate(zip(bars, times)): |
| if np.isnan(time): |
| |
| ax.text(bar.get_x() + bar.get_width()/2, 5, 'OOM', |
| ha='center', va='bottom', fontsize=12, fontweight='bold', color='red') |
| else: |
| ax.text(bar.get_x() + bar.get_width()/2, time + 1.5, f'{time}s', |
| ha='center', va='bottom', fontsize=10, fontweight='bold') |
|
|
| |
| ax.set_xlabel('Configuration', fontsize=12, fontweight='bold') |
| ax.set_ylabel('Loading Time (seconds)', fontsize=12, fontweight='bold') |
| ax.set_title('Model Loading Benchmark: Qwen/Qwen1.5-110B-Chat\nGPU: 1x A100 (80 GB)', |
| fontsize=14, fontweight='bold', pad=20) |
|
|
| ax.set_xticks(x_pos) |
| ax.set_xticklabels(categories, fontsize=9, ha='center') |
| ax.set_ylim(0, max([t for t in times if not np.isnan(t)]) * 1.15) |
|
|
| |
| ax.yaxis.grid(True, linestyle='--', alpha=0.3) |
| ax.set_axisbelow(True) |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
|
|
| |
| |
| |
|
|
| plt.tight_layout() |
| plt.savefig('loading_benchmark.png', dpi=300, bbox_inches='tight') |
| plt.show() |
|
|
| print("Plot saved as 'loading_benchmark.png'") |
|
|