Spaces:
Running
Running
| import gradio as gr | |
| import pandas as pd | |
| import plotly.graph_objects as go | |
| MODEL_PRESETS = { | |
| "DeepSeek V4 — OpenRouter (~90% cache)": (0.041, 0.87), | |
| "Claude Sonnet 4.6": (3.0, 15.0), | |
| "Claude Haiku 4.5": (1.0, 5.0), | |
| "Custom": None, | |
| } | |
| CLOUD_PRESETS = { | |
| "GMI Cloud": [["H200 × 8", 20.8], ["B200 × 8", 32.0], ["GB200 × 4", 32.0]], | |
| "Custom": None, | |
| } | |
| DEFAULT_MODEL = "DeepSeek V4 — OpenRouter (~90% cache)" | |
| DEFAULT_CLOUD = "GMI Cloud" | |
| DEFAULT_IN_K = 64.0 | |
| DEFAULT_OUT_K = 4.0 | |
| DEFAULT_RPS = 1.0 | |
| GPU_COLORS = ["#2E86DE", "#10AC84", "#EE5253", "#8854D0", "#F79F1F", "#576574"] | |
| WORKLOAD_COLOR = "#9b59b6" | |
| def cost_per_request(in_k: float, out_k: float, in_price: float, out_price: float) -> float: | |
| return (in_k * 1000 * in_price + out_k * 1000 * out_price) / 1_000_000 | |
| def parse_gpus(df): | |
| if isinstance(df, pd.DataFrame): | |
| rows = df.fillna(0).values.tolist() | |
| else: | |
| rows = df or [] | |
| out = [] | |
| for row in rows: | |
| if not row or len(row) < 2: | |
| continue | |
| name = str(row[0]).strip() if row[0] is not None else "" | |
| try: | |
| hourly = float(row[1]) | |
| except (TypeError, ValueError): | |
| continue | |
| if not name or hourly <= 0: | |
| continue | |
| out.append((name, hourly)) | |
| return out | |
| def compute(in_price, out_price, in_k, out_k, gpu_df, planned_rps): | |
| cpr = cost_per_request(in_k, out_k, in_price, out_price) | |
| gpus = parse_gpus(gpu_df) | |
| headline = _headline(cpr, in_k, out_k, in_price, out_price) | |
| if cpr <= 0 or not gpus: | |
| empty_break = pd.DataFrame(columns=["GPU config", "$/hour", "Break-even req/hr", "Break-even RPS"]) | |
| empty_workload = pd.DataFrame(columns=["Option", "$ / hour", "vs API"]) | |
| return headline, empty_break, empty_workload, _empty_figure() | |
| break_rows = [] | |
| max_rps = 0.0 | |
| for name, hourly in gpus: | |
| rph = hourly / cpr | |
| rps = rph / 3600 | |
| max_rps = max(max_rps, rps) | |
| break_rows.append({ | |
| "GPU config": name, | |
| "$/hour": f"${hourly:,.2f}", | |
| "Break-even req/hr": f"{rph:,.0f}", | |
| "Break-even RPS": f"{rps:,.3f}", | |
| }) | |
| break_df = pd.DataFrame(break_rows) | |
| api_hourly = planned_rps * 3600 * cpr | |
| workload_rows = [{ | |
| "Option": "API", | |
| "$ / hour": f"${api_hourly:,.2f}", | |
| "vs API": "—", | |
| }] | |
| for name, hourly in gpus: | |
| diff = hourly - api_hourly | |
| if abs(diff) < 0.005: | |
| note = "break-even" | |
| elif diff < 0: | |
| note = f"−${abs(diff):,.2f}/hr cheaper than API" | |
| else: | |
| note = f"+${diff:,.2f}/hr pricier than API" | |
| workload_rows.append({ | |
| "Option": name, | |
| "$ / hour": f"${hourly:,.2f}", | |
| "vs API": note, | |
| }) | |
| workload_df = pd.DataFrame(workload_rows) | |
| x_max = max(max_rps * 1.6, planned_rps * 1.3, 0.1) | |
| fig = _build_figure(cpr, gpus, x_max, planned_rps) | |
| return headline, break_df, workload_df, fig | |
| def _headline(cpr, in_k, out_k, in_price, out_price): | |
| return ( | |
| f"### API cost per request: **${cpr:,.6f}** \n" | |
| f"_({int(in_k * 1000):,} in × ${in_price}/1M + {int(out_k * 1000):,} out × ${out_price}/1M)_" | |
| ) | |
| def _empty_figure(): | |
| fig = go.Figure() | |
| fig.update_layout( | |
| template="plotly_white", | |
| height=480, | |
| annotations=[dict(text="Set positive values for tokens, prices, and at least one GPU row.", | |
| xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)], | |
| ) | |
| return fig | |
| def _build_figure(cpr, gpus, x_max, planned_rps): | |
| n = 200 | |
| xs = [x_max * i / (n - 1) for i in range(n)] | |
| api_costs = [r * 3600 * cpr for r in xs] | |
| fig = go.Figure() | |
| fig.add_trace(go.Scatter( | |
| x=xs, y=api_costs, mode="lines", | |
| name="API cost", | |
| line=dict(color="#222f3e", width=3), | |
| hovertemplate="RPS: %{x:.3f}<br>API $/hr: $%{y:,.2f}<extra></extra>", | |
| )) | |
| y_max = max(api_costs[-1], max(h for _, h in gpus)) * 1.18 | |
| for i, (name, hourly) in enumerate(gpus): | |
| color = GPU_COLORS[i % len(GPU_COLORS)] | |
| fig.add_trace(go.Scatter( | |
| x=[0, x_max], y=[hourly, hourly], mode="lines", | |
| name=f"{name} (${hourly:.2f}/hr)", | |
| line=dict(color=color, width=2, dash="dash"), | |
| hovertemplate=f"{name}<br>$/hr: ${hourly:,.2f}<extra></extra>", | |
| )) | |
| rph = hourly / cpr | |
| rps = rph / 3600 | |
| if rps <= x_max: | |
| fig.add_trace(go.Scatter( | |
| x=[rps], y=[hourly], | |
| mode="markers+text", | |
| marker=dict(color=color, size=11, line=dict(color="white", width=2)), | |
| text=[f" {rps:.3f} RPS"], | |
| textposition="middle right", | |
| textfont=dict(color=color, size=12), | |
| showlegend=False, | |
| hovertemplate=( | |
| f"{name} break-even<br>" | |
| f"RPS: {rps:.3f}<br>" | |
| f"req/hr: {rph:,.0f}<br>" | |
| f"$/hr: ${hourly:,.2f}<extra></extra>" | |
| ), | |
| )) | |
| api_at = planned_rps * 3600 * cpr | |
| fig.add_shape(type="line", | |
| x0=planned_rps, x1=planned_rps, y0=0, y1=y_max, | |
| line=dict(color=WORKLOAD_COLOR, width=2, dash="dot")) | |
| fig.add_annotation(x=planned_rps, y=y_max, | |
| text=f"your workload: {planned_rps:.2f} RPS", | |
| showarrow=False, | |
| font=dict(color=WORKLOAD_COLOR, size=12), | |
| yshift=8) | |
| fig.add_trace(go.Scatter( | |
| x=[planned_rps], y=[api_at], | |
| mode="markers", | |
| marker=dict(color=WORKLOAD_COLOR, size=11, symbol="diamond", | |
| line=dict(color="white", width=2)), | |
| name="Your workload (on API)", | |
| hovertemplate=f"At {planned_rps:.2f} RPS<br>API $/hr: ${api_at:,.2f}<extra></extra>", | |
| )) | |
| fig.update_layout( | |
| template="plotly_white", | |
| height=480, | |
| margin=dict(l=60, r=30, t=70, b=50), | |
| xaxis=dict(title="Requests per second", range=[0, x_max]), | |
| yaxis=dict(title="$ / hour", rangemode="tozero"), | |
| legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0), | |
| title=dict(text="Cloud GPU $/hr vs API $/hr — where lines cross is break-even", | |
| font=dict(size=14)), | |
| ) | |
| return fig | |
| def apply_model_preset(preset_name, cur_in, cur_out): | |
| p = MODEL_PRESETS.get(preset_name) | |
| if p is None: | |
| return cur_in, cur_out | |
| return p[0], p[1] | |
| def apply_cloud_preset(preset_name, cur_df): | |
| p = CLOUD_PRESETS.get(preset_name) | |
| if p is None: | |
| return cur_df | |
| return p | |
| def reset_all(): | |
| return ( | |
| DEFAULT_MODEL, | |
| DEFAULT_CLOUD, | |
| MODEL_PRESETS[DEFAULT_MODEL][0], | |
| MODEL_PRESETS[DEFAULT_MODEL][1], | |
| DEFAULT_IN_K, | |
| DEFAULT_OUT_K, | |
| CLOUD_PRESETS[DEFAULT_CLOUD], | |
| DEFAULT_RPS, | |
| ) | |
| with gr.Blocks(title="Cloud bills vs API bills") as demo: | |
| gr.Markdown( | |
| """ | |
| # Cloud bills vs API bills | |
| At what request rate does renting GPUs beat paying per token? | |
| Drag the **Your workload** slider to see live cost at your planned scale. | |
| """ | |
| ) | |
| with gr.Row(): | |
| with gr.Column(scale=1): | |
| gr.Markdown("### Model & API pricing") | |
| model_preset = gr.Dropdown( | |
| choices=list(MODEL_PRESETS.keys()), | |
| value=DEFAULT_MODEL, | |
| label="Model preset", | |
| info="Pick a preset or switch to Custom to enter your own prices.", | |
| ) | |
| in_price = gr.Number( | |
| value=MODEL_PRESETS[DEFAULT_MODEL][0], | |
| label="Input $ / 1M tokens", | |
| precision=4, | |
| info="Effective input price (post-cache for OpenRouter-style providers).", | |
| ) | |
| out_price = gr.Number( | |
| value=MODEL_PRESETS[DEFAULT_MODEL][1], | |
| label="Output $ / 1M tokens", | |
| precision=4, | |
| ) | |
| gr.Markdown("### Request shape") | |
| in_tokens_k = gr.Slider( | |
| 1, 256, value=DEFAULT_IN_K, step=1, | |
| label="Input tokens / request (k)", | |
| info="64 means 64,000 tokens. Slide for typical context size.", | |
| ) | |
| out_tokens_k = gr.Slider( | |
| 0.1, 32, value=DEFAULT_OUT_K, step=0.1, | |
| label="Output tokens / request (k)", | |
| info="4 means 4,000 tokens.", | |
| ) | |
| gr.Markdown("### Cloud GPU rates") | |
| cloud_preset = gr.Dropdown( | |
| choices=list(CLOUD_PRESETS.keys()), | |
| value=DEFAULT_CLOUD, | |
| label="Cloud provider preset", | |
| info="Edit the table below to match your contract.", | |
| ) | |
| gpu_df = gr.Dataframe( | |
| value=CLOUD_PRESETS[DEFAULT_CLOUD], | |
| headers=["Config", "$ / hour"], | |
| datatype=["str", "number"], | |
| column_count=(2, "fixed"), | |
| row_count=(3, "dynamic"), | |
| interactive=True, | |
| ) | |
| reset_btn = gr.Button("↺ Reset to defaults", variant="secondary", size="sm") | |
| with gr.Column(scale=2): | |
| gr.Markdown("### Your workload") | |
| planned_rps = gr.Slider( | |
| 0, 5, value=DEFAULT_RPS, step=0.05, | |
| label="Planned requests / second", | |
| info="What scale do you expect to run at? The dotted line on the chart marks this point.", | |
| ) | |
| workload_table = gr.Dataframe( | |
| headers=["Option", "$ / hour", "vs API"], | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| gr.Markdown("### Break-even points") | |
| headline = gr.Markdown() | |
| break_table = gr.Dataframe( | |
| headers=["GPU config", "$/hour", "Break-even req/hr", "Break-even RPS"], | |
| interactive=False, | |
| wrap=True, | |
| ) | |
| chart = gr.Plot() | |
| inputs = [in_price, out_price, in_tokens_k, out_tokens_k, gpu_df, planned_rps] | |
| outputs = [headline, break_table, workload_table, chart] | |
| for c in inputs: | |
| c.change(compute, inputs=inputs, outputs=outputs) | |
| model_preset.change( | |
| apply_model_preset, | |
| inputs=[model_preset, in_price, out_price], | |
| outputs=[in_price, out_price], | |
| ) | |
| cloud_preset.change( | |
| apply_cloud_preset, | |
| inputs=[cloud_preset, gpu_df], | |
| outputs=[gpu_df], | |
| ) | |
| reset_outputs = [model_preset, cloud_preset, in_price, out_price, | |
| in_tokens_k, out_tokens_k, gpu_df, planned_rps] | |
| reset_btn.click(reset_all, outputs=reset_outputs).then( | |
| compute, inputs=inputs, outputs=outputs | |
| ) | |
| demo.load(compute, inputs=inputs, outputs=outputs) | |
| if __name__ == "__main__": | |
| demo.launch(theme=gr.themes.Soft()) | |