Spaces:

andynoodles
/

CloudOrAPI

Running

File size: 11,027 Bytes

862c42d

import gradio as gr
import pandas as pd
import plotly.graph_objects as go


MODEL_PRESETS = {
    "DeepSeek V4 — OpenRouter (~90% cache)": (0.041, 0.87),
    "Claude Sonnet 4.6": (3.0, 15.0),
    "Claude Haiku 4.5": (1.0, 5.0),
    "Custom": None,
}

CLOUD_PRESETS = {
    "GMI Cloud": [["H200 × 8", 20.8], ["B200 × 8", 32.0], ["GB200 × 4", 32.0]],
    "Custom": None,
}

DEFAULT_MODEL = "DeepSeek V4 — OpenRouter (~90% cache)"
DEFAULT_CLOUD = "GMI Cloud"
DEFAULT_IN_K = 64.0
DEFAULT_OUT_K = 4.0
DEFAULT_RPS = 1.0

GPU_COLORS = ["#2E86DE", "#10AC84", "#EE5253", "#8854D0", "#F79F1F", "#576574"]
WORKLOAD_COLOR = "#9b59b6"


def cost_per_request(in_k: float, out_k: float, in_price: float, out_price: float) -> float:
    return (in_k * 1000 * in_price + out_k * 1000 * out_price) / 1_000_000


def parse_gpus(df):
    if isinstance(df, pd.DataFrame):
        rows = df.fillna(0).values.tolist()
    else:
        rows = df or []
    out = []
    for row in rows:
        if not row or len(row) < 2:
            continue
        name = str(row[0]).strip() if row[0] is not None else ""
        try:
            hourly = float(row[1])
        except (TypeError, ValueError):
            continue
        if not name or hourly <= 0:
            continue
        out.append((name, hourly))
    return out


def compute(in_price, out_price, in_k, out_k, gpu_df, planned_rps):
    cpr = cost_per_request(in_k, out_k, in_price, out_price)
    gpus = parse_gpus(gpu_df)
    headline = _headline(cpr, in_k, out_k, in_price, out_price)

    if cpr <= 0 or not gpus:
        empty_break = pd.DataFrame(columns=["GPU config", "$/hour", "Break-even req/hr", "Break-even RPS"])
        empty_workload = pd.DataFrame(columns=["Option", "$ / hour", "vs API"])
        return headline, empty_break, empty_workload, _empty_figure()

    break_rows = []
    max_rps = 0.0
    for name, hourly in gpus:
        rph = hourly / cpr
        rps = rph / 3600
        max_rps = max(max_rps, rps)
        break_rows.append({
            "GPU config": name,
            "$/hour": f"${hourly:,.2f}",
            "Break-even req/hr": f"{rph:,.0f}",
            "Break-even RPS": f"{rps:,.3f}",
        })
    break_df = pd.DataFrame(break_rows)

    api_hourly = planned_rps * 3600 * cpr
    workload_rows = [{
        "Option": "API",
        "$ / hour": f"${api_hourly:,.2f}",
        "vs API": "—",
    }]
    for name, hourly in gpus:
        diff = hourly - api_hourly
        if abs(diff) < 0.005:
            note = "break-even"
        elif diff < 0:
            note = f"−${abs(diff):,.2f}/hr cheaper than API"
        else:
            note = f"+${diff:,.2f}/hr pricier than API"
        workload_rows.append({
            "Option": name,
            "$ / hour": f"${hourly:,.2f}",
            "vs API": note,
        })
    workload_df = pd.DataFrame(workload_rows)

    x_max = max(max_rps * 1.6, planned_rps * 1.3, 0.1)
    fig = _build_figure(cpr, gpus, x_max, planned_rps)
    return headline, break_df, workload_df, fig


def _headline(cpr, in_k, out_k, in_price, out_price):
    return (
        f"### API cost per request: **${cpr:,.6f}**  \n"
        f"_({int(in_k * 1000):,} in × ${in_price}/1M  +  {int(out_k * 1000):,} out × ${out_price}/1M)_"
    )


def _empty_figure():
    fig = go.Figure()
    fig.update_layout(
        template="plotly_white",
        height=480,
        annotations=[dict(text="Set positive values for tokens, prices, and at least one GPU row.",
                          xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)],
    )
    return fig


def _build_figure(cpr, gpus, x_max, planned_rps):
    n = 200
    xs = [x_max * i / (n - 1) for i in range(n)]
    api_costs = [r * 3600 * cpr for r in xs]

    fig = go.Figure()
    fig.add_trace(go.Scatter(
        x=xs, y=api_costs, mode="lines",
        name="API cost",
        line=dict(color="#222f3e", width=3),
        hovertemplate="RPS: %{x:.3f}<br>API $/hr: $%{y:,.2f}<extra></extra>",
    ))

    y_max = max(api_costs[-1], max(h for _, h in gpus)) * 1.18

    for i, (name, hourly) in enumerate(gpus):
        color = GPU_COLORS[i % len(GPU_COLORS)]
        fig.add_trace(go.Scatter(
            x=[0, x_max], y=[hourly, hourly], mode="lines",
            name=f"{name} (${hourly:.2f}/hr)",
            line=dict(color=color, width=2, dash="dash"),
            hovertemplate=f"{name}<br>$/hr: ${hourly:,.2f}<extra></extra>",
        ))
        rph = hourly / cpr
        rps = rph / 3600
        if rps <= x_max:
            fig.add_trace(go.Scatter(
                x=[rps], y=[hourly],
                mode="markers+text",
                marker=dict(color=color, size=11, line=dict(color="white", width=2)),
                text=[f"  {rps:.3f} RPS"],
                textposition="middle right",
                textfont=dict(color=color, size=12),
                showlegend=False,
                hovertemplate=(
                    f"{name} break-even<br>"
                    f"RPS: {rps:.3f}<br>"
                    f"req/hr: {rph:,.0f}<br>"
                    f"$/hr: ${hourly:,.2f}<extra></extra>"
                ),
            ))

    api_at = planned_rps * 3600 * cpr
    fig.add_shape(type="line",
                  x0=planned_rps, x1=planned_rps, y0=0, y1=y_max,
                  line=dict(color=WORKLOAD_COLOR, width=2, dash="dot"))
    fig.add_annotation(x=planned_rps, y=y_max,
                       text=f"your workload: {planned_rps:.2f} RPS",
                       showarrow=False,
                       font=dict(color=WORKLOAD_COLOR, size=12),
                       yshift=8)
    fig.add_trace(go.Scatter(
        x=[planned_rps], y=[api_at],
        mode="markers",
        marker=dict(color=WORKLOAD_COLOR, size=11, symbol="diamond",
                    line=dict(color="white", width=2)),
        name="Your workload (on API)",
        hovertemplate=f"At {planned_rps:.2f} RPS<br>API $/hr: ${api_at:,.2f}<extra></extra>",
    ))

    fig.update_layout(
        template="plotly_white",
        height=480,
        margin=dict(l=60, r=30, t=70, b=50),
        xaxis=dict(title="Requests per second", range=[0, x_max]),
        yaxis=dict(title="$ / hour", rangemode="tozero"),
        legend=dict(orientation="h", yanchor="bottom", y=1.02, xanchor="left", x=0),
        title=dict(text="Cloud GPU $/hr vs API $/hr — where lines cross is break-even",
                   font=dict(size=14)),
    )
    return fig


def apply_model_preset(preset_name, cur_in, cur_out):
    p = MODEL_PRESETS.get(preset_name)
    if p is None:
        return cur_in, cur_out
    return p[0], p[1]


def apply_cloud_preset(preset_name, cur_df):
    p = CLOUD_PRESETS.get(preset_name)
    if p is None:
        return cur_df
    return p


def reset_all():
    return (
        DEFAULT_MODEL,
        DEFAULT_CLOUD,
        MODEL_PRESETS[DEFAULT_MODEL][0],
        MODEL_PRESETS[DEFAULT_MODEL][1],
        DEFAULT_IN_K,
        DEFAULT_OUT_K,
        CLOUD_PRESETS[DEFAULT_CLOUD],
        DEFAULT_RPS,
    )


with gr.Blocks(title="Cloud bills vs API bills") as demo:
    gr.Markdown(
        """
# Cloud bills vs API bills
At what request rate does renting GPUs beat paying per token?
Drag the **Your workload** slider to see live cost at your planned scale.
        """
    )

    with gr.Row():
        with gr.Column(scale=1):
            gr.Markdown("### Model & API pricing")
            model_preset = gr.Dropdown(
                choices=list(MODEL_PRESETS.keys()),
                value=DEFAULT_MODEL,
                label="Model preset",
                info="Pick a preset or switch to Custom to enter your own prices.",
            )
            in_price = gr.Number(
                value=MODEL_PRESETS[DEFAULT_MODEL][0],
                label="Input $ / 1M tokens",
                precision=4,
                info="Effective input price (post-cache for OpenRouter-style providers).",
            )
            out_price = gr.Number(
                value=MODEL_PRESETS[DEFAULT_MODEL][1],
                label="Output $ / 1M tokens",
                precision=4,
            )

            gr.Markdown("### Request shape")
            in_tokens_k = gr.Slider(
                1, 256, value=DEFAULT_IN_K, step=1,
                label="Input tokens / request  (k)",
                info="64 means 64,000 tokens. Slide for typical context size.",
            )
            out_tokens_k = gr.Slider(
                0.1, 32, value=DEFAULT_OUT_K, step=0.1,
                label="Output tokens / request  (k)",
                info="4 means 4,000 tokens.",
            )

            gr.Markdown("### Cloud GPU rates")
            cloud_preset = gr.Dropdown(
                choices=list(CLOUD_PRESETS.keys()),
                value=DEFAULT_CLOUD,
                label="Cloud provider preset",
                info="Edit the table below to match your contract.",
            )
            gpu_df = gr.Dataframe(
                value=CLOUD_PRESETS[DEFAULT_CLOUD],
                headers=["Config", "$ / hour"],
                datatype=["str", "number"],
                column_count=(2, "fixed"),
                row_count=(3, "dynamic"),
                interactive=True,
            )

            reset_btn = gr.Button("↺ Reset to defaults", variant="secondary", size="sm")

        with gr.Column(scale=2):
            gr.Markdown("### Your workload")
            planned_rps = gr.Slider(
                0, 5, value=DEFAULT_RPS, step=0.05,
                label="Planned requests / second",
                info="What scale do you expect to run at? The dotted line on the chart marks this point.",
            )
            workload_table = gr.Dataframe(
                headers=["Option", "$ / hour", "vs API"],
                interactive=False,
                wrap=True,
            )

            gr.Markdown("### Break-even points")
            headline = gr.Markdown()
            break_table = gr.Dataframe(
                headers=["GPU config", "$/hour", "Break-even req/hr", "Break-even RPS"],
                interactive=False,
                wrap=True,
            )

            chart = gr.Plot()

    inputs = [in_price, out_price, in_tokens_k, out_tokens_k, gpu_df, planned_rps]
    outputs = [headline, break_table, workload_table, chart]

    for c in inputs:
        c.change(compute, inputs=inputs, outputs=outputs)

    model_preset.change(
        apply_model_preset,
        inputs=[model_preset, in_price, out_price],
        outputs=[in_price, out_price],
    )
    cloud_preset.change(
        apply_cloud_preset,
        inputs=[cloud_preset, gpu_df],
        outputs=[gpu_df],
    )

    reset_outputs = [model_preset, cloud_preset, in_price, out_price,
                     in_tokens_k, out_tokens_k, gpu_df, planned_rps]
    reset_btn.click(reset_all, outputs=reset_outputs).then(
        compute, inputs=inputs, outputs=outputs
    )

    demo.load(compute, inputs=inputs, outputs=outputs)


if __name__ == "__main__":
    demo.launch(theme=gr.themes.Soft())