| | |
| |
|
| | |
| | import plotly.express as px |
| | from plotly.graph_objs import Figure, FigureWidget |
| | import datasets |
| | import pandas as pd |
| | import huggingface_hub |
| | import plotly.graph_objs as go |
| | import numpy as np |
| | from PIL import Image |
| |
|
| | FIGURES: dict[str, Figure] = {} |
| | |
| |
|
| | df = pd.read_csv("nlp_datas.csv") |
| | fig = px.treemap( |
| | df, |
| | path=[px.Constant("nlp-datasets"), "task", "dataset"], |
| | values="size", |
| | |
| | |
| | |
| | ) |
| |
|
| | FIGURES["nlp"] = fig |
| | fig.update_layout( |
| | paper_bgcolor="rgba(0,0,0,0)", |
| | |
| | margin=dict(t=0, l=0, r=0, b=0), |
| | |
| | ) |
| | |
| | |
| | |
| | fig |
| | |
| | df = pd.read_csv("llm.csv") |
| | fig = px.treemap( |
| | df, |
| | path=[px.Constant("LLM"), "dataset"], |
| | values="size", |
| | |
| | |
| | |
| | ) |
| | FIGURES["gpt"] = fig |
| | fig.update_layout( |
| | paper_bgcolor="rgba(0,0,0,0)", |
| | |
| | margin=dict(t=0, l=0, r=0, b=0), |
| | |
| | ) |
| | |
| | fig |
| | |
| |
|
| | df = pd.read_csv("./seq-time.csv", index_col=0) |
| | df.index = df.index.map(lambda x: eval(x.replace("k", "*1024"))) |
| | df["platformers"] = df["platformers"] / 7 |
| | df.drop([df.columns[-1]], axis=1, inplace=True) |
| | df = df.reset_index(names="sequence length").melt( |
| | id_vars="sequence length", var_name="model", value_name="time" |
| | ) |
| | fig = px.line(df, x="sequence length", y="time", color="model") |
| | FIGURES["seq-time"] = fig |
| | fig.update_layout( |
| | paper_bgcolor="rgba(0,0,0,0)", |
| | |
| | margin=dict(t=0, l=0, r=0, b=0), |
| | plot_bgcolor="rgba(0,0,0,0)", |
| | legend_font=dict(color="white"), |
| | ) |
| | fig.update_xaxes( |
| | color="white", |
| | ) |
| | fig.update_yaxes( |
| | |
| | |
| | |
| | |
| | |
| | |
| | color="white", |
| | ) |
| | fig |
| | |
| |
|
| | df = pd.read_csv("seq-tflops.csv", index_col=0) |
| | |
| | |
| | df = df.reset_index(names="sequence length").melt( |
| | id_vars="sequence length", var_name="model", value_name="tflops" |
| | ) |
| | fig = px.bar(df, x="sequence length", y="tflops", color="model", barmode="group") |
| | FIGURES["seq-tflops"] = fig |
| | fig.update_layout( |
| | paper_bgcolor="rgba(0,0,0,0)", |
| | |
| | margin=dict(t=0, l=0, r=0, b=0), |
| | plot_bgcolor="rgba(0,0,0,0)", |
| | legend_font=dict(color="white"), |
| | ) |
| |
|
| | fig.update_xaxes( |
| | color="white", |
| | ) |
| | fig.update_yaxes( |
| | |
| | |
| | |
| | |
| | |
| | |
| | color="white", |
| | ) |
| | fig |
| | |
| |
|
| |
|
| | df = datasets.load_dataset("SUSTech/webvid", split="train[:100]").to_pandas() |
| |
|
| | df = df.drop(["duration"], axis=1) |
| |
|
| |
|
| | fig = go.Figure( |
| | data=[ |
| | go.Table( |
| | header=dict( |
| | values=list(df.columns), fill_color="paleturquoise", align="left" |
| | ), |
| | cells=dict( |
| | values=[df[col] for col in df.columns], |
| | fill_color="lavender", |
| | align="left", |
| | |
| | ), |
| | ) |
| | ] |
| | ) |
| |
|
| | fig.update_layout( |
| | paper_bgcolor="rgba(0,0,0,0)", |
| | |
| | margin=dict(t=0, l=0, r=0, b=0), |
| | |
| | ) |
| | |
| | FIGURES["webvid"] = fig |
| | |
| |
|
| | fig = go.Figure() |
| |
|
| | data = { |
| | "402-page transcripts from Apollo 11’s mission to the moon": 326914, |
| | "44-minute silent Buster Keaton movie": 696417, |
| | "more than 100,000 lines of code": 816767, |
| | "Generate 1min video": 1000000, |
| | } |
| |
|
| | df = pd.Series(data, name="token").to_frame().reset_index(names="task") |
| |
|
| |
|
| | |
| | fig = px.bar( |
| | df, |
| | y="token", |
| | x="task", |
| | text_auto=".2s", |
| | |
| | |
| | |
| | ) |
| | FIGURES["token-bar"] = fig |
| |
|
| | fig.update_traces( |
| | textfont_size=12, |
| | textangle=0, |
| | textposition="outside", |
| | cliponaxis=False, |
| | textfont_color="white", |
| | ) |
| | fig.update_layout( |
| | paper_bgcolor="rgba(0,0,0,0)", |
| | |
| | margin=dict(t=0, l=0, r=0, b=0), |
| | plot_bgcolor="rgba(0,0,0,0)", |
| | legend_font=dict(color="white"), |
| | ) |
| |
|
| | fig.update_xaxes( |
| | color="white", |
| | |
| | zeroline=False, |
| | showline=False, |
| | showgrid=False, |
| | title="", |
| | ) |
| | fig.update_yaxes( |
| | |
| | showline=False, |
| | showgrid=False, |
| | zeroline=False, |
| | |
| | |
| | |
| | color="white", |
| | ) |
| | fig |
| |
|
| |
|
| | |
| | def generate_loss(steps, initial_loss, decay_rate, noise_factor): |
| | loss = initial_loss * np.exp(-decay_rate * steps) |
| | noise = noise_factor * loss * np.random.randn(*steps.shape) |
| | return loss + noise |
| |
|
| |
|
| | def splitpoints(total, split): |
| | step = total // split |
| | for i in range(split - 1): |
| | yield slice(i * step, (i + 1) * step) |
| | yield slice((i + 1) * step, None) |
| |
|
| |
|
| | meta = [ |
| | { |
| | "name": "2xDGX on aws", |
| | "color": "red", |
| | "icon": "../figures/gc.png", |
| | }, |
| | { |
| | "name": "16xDGX on aliyun", |
| | "color": "orange", |
| | "icon": "../figures/aws-white.png", |
| | }, |
| | { |
| | "name": "128xDGX on ucloud", |
| | "color": "blue", |
| | "icon": "../figures/aliyun.png", |
| | }, |
| | ] |
| |
|
| |
|
| | steps = np.linspace(0, 1, 1000) |
| | loss = generate_loss(steps, initial_loss=1, decay_rate=5, noise_factor=0.1) |
| | fig = go.Figure() |
| | |
| | |
| | |
| |
|
| | FIGURES["cloud-switch"] = fig |
| | for i, idx in enumerate(splitpoints(1000, len(meta))): |
| | fig.add_trace( |
| | go.Scatter( |
| | x=steps[idx], |
| | y=loss[idx], |
| | mode="lines", |
| | name=meta[i]["name"], |
| | line=dict(color=meta[i]["color"]), |
| | ) |
| | ) |
| | fig.add_layout_image( |
| | x=0.8, |
| | sizex=0.2, |
| | y=0.2, |
| | sizey=0.2, |
| | xref="paper", |
| | yref="paper", |
| | opacity=1.0, |
| | layer="above", |
| | source=Image.open("../figures/logo/ucloud.png"), |
| | ) |
| | fig.add_layout_image( |
| | x=0.17, |
| | sizex=0.15, |
| | y=0.7, |
| | sizey=0.15, |
| | xref="paper", |
| | yref="paper", |
| | opacity=1.0, |
| | layer="above", |
| | source=Image.open("../figures/aws-white.png"), |
| | ) |
| | fig.add_layout_image( |
| | x=0.43, |
| | sizex=0.15, |
| | y=0.3, |
| | sizey=0.15, |
| | xref="paper", |
| | yref="paper", |
| | opacity=1.0, |
| | layer="above", |
| | source=Image.open("../figures/aliyun.png"), |
| | ) |
| |
|
| | fig.update_layout( |
| | showlegend=False, |
| | paper_bgcolor="rgba(0,0,0,0)", |
| | plot_bgcolor="rgba(255,255,255,0)", |
| | |
| | |
| | ) |
| | fig.update_xaxes( |
| | showticklabels=False, |
| | |
| | showline=False, |
| | zeroline=False, |
| | showgrid=False, |
| | |
| | automargin=True, |
| | ) |
| | fig.update_yaxes( |
| | showticklabels=False, |
| | zeroline=False, |
| | showline=False, |
| | griddash="4px", |
| | gridcolor="rgba(255,255,255,0.3)", |
| | title="Loss", |
| | color="white", |
| | ) |
| | fig |
| |
|
| |
|
| | |
| | def plot_gantt(df): |
| | fig = px.timeline(df, x_start="Start", x_end="End", y="Task", color="Task") |
| |
|
| | fig.update_layout(xaxis_tickformat="%H:%M") |
| |
|
| | fig.update_layout( |
| | showlegend=False, |
| | paper_bgcolor="rgba(0,0,0,0)", |
| | |
| | plot_bgcolor="rgba(255,255,255,0)", |
| | |
| | |
| | ) |
| | fig.update_xaxes( |
| | showticklabels=False, |
| | |
| | showline=False, |
| | zeroline=False, |
| | showgrid=False, |
| | |
| | automargin=True, |
| | ) |
| | fig.update_yaxes( |
| | |
| | zeroline=False, |
| | showline=False, |
| | griddash="4px", |
| | gridcolor="rgba(0,0,0,0.3)", |
| | title="", |
| | color="white", |
| | tickfont=dict(size=20), |
| | ) |
| |
|
| | return fig |
| |
|
| |
|
| | |
| | num_rows = 1000 |
| | download_prop = 0.65 |
| | df = pd.DataFrame( |
| | {"Start": pd.date_range("1-jan-2021", periods=num_rows, freq="4h")} |
| | ).assign( |
| | End=lambda d: d.Start + pd.Timedelta(hours=1), |
| | Task=np.random.choice( |
| | ["Read", "Transform"], num_rows, p=(download_prop, 1 - download_prop) |
| | ), |
| | ) |
| |
|
| | df.loc[0, "Task"] = "Read" |
| | df.loc[len(df) - 1, "Task"] = "Transform" |
| | df = df.groupby(df.Task.ne(df.Task.shift()).cumsum()).agg( |
| | {"Start": "min", "End": "max", "Task": "first"} |
| | ) |
| |
|
| | timeline = df.copy() |
| | |
| |
|
| | df = timeline.copy() |
| | ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=10) |
| | for start, end in zip(ddi[2:-1:3], ddi[3::3]): |
| | df.loc[df["Start"].between(start, end), "Task"] = "Train" |
| | df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"}) |
| |
|
| | FIGURES["profile-naive"] = plot_gantt(df) |
| | FIGURES["profile-naive"] |
| | |
| |
|
| | df = timeline.copy() |
| | prop = 10 |
| | ddi = pd.date_range(df.iloc[0].Start, end=df.iloc[-1].End, periods=(prop + 1) * 10) |
| | for start, end in zip(ddi[1 : -1 : prop + 1], ddi[prop :: prop + 1]): |
| | df.loc[df["Start"].between(start, end), "Task"] = "Train" |
| | df.loc[len(df) + 1] = pd.Series({"Start": start, "End": end, "Task": "Train"}) |
| | FIGURES["profile-old"] = plot_gantt(df) |
| | FIGURES["profile-old"] |
| | |
| |
|
| | df = timeline.copy() |
| |
|
| | df.loc[len(df) + 1] = pd.Series( |
| | {"Start": df.iloc[0].Start, "End": df.iloc[-1].Start, "Task": "Train"} |
| | ) |
| | FIGURES["profile-stream"] = plot_gantt(df) |
| | FIGURES["profile-stream"] |
| |
|
| | |
| |
|
| | for k, v in FIGURES.items(): |
| | print(k) |
| | v.write_html( |
| | f"../components/{k}.qmd", |
| | full_html=False, |
| | include_plotlyjs="cdn", |
| | ) |
| |
|
| | |
| | |
| | |
| | import qrcode |
| | from qrcode.image.styledpil import StyledPilImage |
| | from qrcode.image.styles.moduledrawers.pil import RoundedModuleDrawer |
| | from qrcode.image.styles.colormasks import RadialGradiantColorMask |
| |
|
| | qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L) |
| | qr.add_data("https://u.wechat.com/MAmdMGMYjGFC4-2ESxZ1oyw") |
| |
|
| | |
| | img_2 = qr.make_image( |
| | |
| | |
| | fill_color="white", |
| | back_color="transparent", |
| | ) |
| | |
| | |
| | |
| | img_2.save("../figures/qr/jing.png") |
| | |
| |
|
| |
|
| | qr = qrcode.QRCode(error_correction=qrcode.constants.ERROR_CORRECT_L) |
| | qr.add_data("mailto:data@sustech.edu.cn?subject=Hello&body=") |
| |
|
| | |
| | img_2 = qr.make_image( |
| | |
| | |
| | fill_color="white", |
| | back_color="transparent", |
| | ) |
| | |
| | |
| | |
| | img_2.save("../figures/qr/mail-data.png") |
| |
|
| |
|
| |
|