Spaces:
Running
on
Zero
Running
on
Zero
| import argparse | |
| import code | |
| import datetime | |
| import json | |
| import os | |
| from pytz import timezone | |
| import time | |
| import pandas as pd # pandas>=2.0.3 | |
| import plotly.express as px | |
| import plotly.graph_objects as go | |
| from tqdm import tqdm | |
| NUM_SERVERS = 1 | |
| LOG_ROOT_DIR = os.getenv("LOGDIR", None) | |
| if LOG_ROOT_DIR is None: | |
| raise ValueError("LOGDIR environment variable not set, please set it by `export LOGDIR=...`") | |
| def get_log_files(max_num_files=None): | |
| log_root = os.path.expanduser(LOG_ROOT_DIR) | |
| filenames = [] | |
| if NUM_SERVERS == 1: | |
| for filename in os.listdir(log_root): | |
| if filename.endswith("-conv.json"): | |
| filepath = f"{log_root}/{filename}" | |
| name_tstamp_tuple = (filepath, os.path.getmtime(filepath)) | |
| filenames.append(name_tstamp_tuple) | |
| else: | |
| for i in range(NUM_SERVERS): | |
| for filename in os.listdir(f"{log_root}/server{i}"): | |
| if filename.endswith("-conv.json"): | |
| filepath = f"{log_root}/server{i}/{filename}" | |
| name_tstamp_tuple = (filepath, os.path.getmtime(filepath)) | |
| filenames.append(name_tstamp_tuple) | |
| # sort by tstamp | |
| filenames = sorted(filenames, key=lambda x: x[1]) | |
| filenames = [x[0] for x in filenames] | |
| max_num_files = max_num_files or len(filenames) | |
| filenames = filenames[-max_num_files:] | |
| return filenames | |
| def load_log_files(filename): | |
| data = [] | |
| for retry in range(5): | |
| try: | |
| lines = open(filename).readlines() | |
| break | |
| except FileNotFoundError: | |
| time.sleep(2) | |
| for l in lines: | |
| row = json.loads(l) | |
| data.append( | |
| dict( | |
| type=row["type"], | |
| tstamp=row["tstamp"], | |
| model=row.get("model", ""), | |
| models=row.get("models", ["", ""]), | |
| ) | |
| ) | |
| return data | |
| def load_log_files_parallel(log_files, num_threads=16): | |
| data_all = [] | |
| from multiprocessing import Pool | |
| with Pool(num_threads) as p: | |
| ret_all = list(tqdm(p.imap(load_log_files, log_files), total=len(log_files))) | |
| for ret in ret_all: | |
| data_all.extend(ret) | |
| return data_all | |
| def get_anony_vote_df(df): | |
| anony_vote_df = df[ | |
| df["type"].isin(["leftvote", "rightvote", "tievote", "bothbad_vote"]) | |
| ] | |
| anony_vote_df = anony_vote_df[anony_vote_df["models"].apply(lambda x: x[0] == "")] | |
| return anony_vote_df | |
| def merge_counts(series, on, names): | |
| ret = pd.merge(series[0], series[1], on=on) | |
| for i in range(2, len(series)): | |
| ret = pd.merge(ret, series[i], on=on) | |
| ret = ret.reset_index() | |
| old_names = list(ret.columns)[-len(series) :] | |
| rename = {old_name: new_name for old_name, new_name in zip(old_names, names)} | |
| ret = ret.rename(columns=rename) | |
| return ret | |
| def report_basic_stats(log_files): | |
| df_all = load_log_files_parallel(log_files) | |
| df_all = pd.DataFrame(df_all) | |
| now_t = df_all["tstamp"].max() | |
| df_1_hour = df_all[df_all["tstamp"] > (now_t - 3600)] | |
| df_1_day = df_all[df_all["tstamp"] > (now_t - 3600 * 24)] | |
| anony_vote_df_all = get_anony_vote_df(df_all) | |
| # Chat trends | |
| chat_dates = [ | |
| datetime.datetime.fromtimestamp(x, tz=timezone("US/Pacific")).strftime( | |
| "%Y-%m-%d" | |
| ) | |
| for x in df_all[df_all["type"] == "chat"]["tstamp"] | |
| ] | |
| chat_dates_counts = pd.value_counts(chat_dates) | |
| vote_dates = [ | |
| datetime.datetime.fromtimestamp(x, tz=timezone("US/Pacific")).strftime( | |
| "%Y-%m-%d" | |
| ) | |
| for x in anony_vote_df_all["tstamp"] | |
| ] | |
| vote_dates_counts = pd.value_counts(vote_dates) | |
| chat_dates_bar = go.Figure( | |
| data=[ | |
| go.Bar( | |
| name="Anony. Vote", | |
| x=vote_dates_counts.index, | |
| y=vote_dates_counts, | |
| text=[f"{val:.0f}" for val in vote_dates_counts], | |
| textposition="auto", | |
| ), | |
| go.Bar( | |
| name="Chat", | |
| x=chat_dates_counts.index, | |
| y=chat_dates_counts, | |
| text=[f"{val:.0f}" for val in chat_dates_counts], | |
| textposition="auto", | |
| ), | |
| ] | |
| ) | |
| chat_dates_bar.update_layout( | |
| barmode="stack", | |
| xaxis_title="Dates", | |
| yaxis_title="Count", | |
| height=300, | |
| width=1200, | |
| ) | |
| # Model call counts | |
| model_hist_all = df_all[df_all["type"] == "chat"]["model"].value_counts() | |
| model_hist_1_day = df_1_day[df_1_day["type"] == "chat"]["model"].value_counts() | |
| model_hist_1_hour = df_1_hour[df_1_hour["type"] == "chat"]["model"].value_counts() | |
| model_hist = merge_counts( | |
| [model_hist_all, model_hist_1_day, model_hist_1_hour], | |
| on="model", | |
| names=["All", "Last Day", "Last Hour"], | |
| ) | |
| model_hist_md = model_hist.to_markdown(index=False, tablefmt="github") | |
| # Action counts | |
| action_hist_all = df_all["type"].value_counts() | |
| action_hist_1_day = df_1_day["type"].value_counts() | |
| action_hist_1_hour = df_1_hour["type"].value_counts() | |
| action_hist = merge_counts( | |
| [action_hist_all, action_hist_1_day, action_hist_1_hour], | |
| on="type", | |
| names=["All", "Last Day", "Last Hour"], | |
| ) | |
| action_hist_md = action_hist.to_markdown(index=False, tablefmt="github") | |
| # Anony vote counts | |
| anony_vote_hist_all = anony_vote_df_all["type"].value_counts() | |
| anony_vote_df_1_day = get_anony_vote_df(df_1_day) | |
| anony_vote_hist_1_day = anony_vote_df_1_day["type"].value_counts() | |
| # anony_vote_df_1_hour = get_anony_vote_df(df_1_hour) | |
| # anony_vote_hist_1_hour = anony_vote_df_1_hour["type"].value_counts() | |
| anony_vote_hist = merge_counts( | |
| [anony_vote_hist_all, anony_vote_hist_1_day], | |
| on="type", | |
| names=["All", "Last Day"], | |
| ) | |
| anony_vote_hist_md = anony_vote_hist.to_markdown(index=False, tablefmt="github") | |
| # Last 24 hours | |
| chat_1_day = df_1_day[df_1_day["type"] == "chat"] | |
| num_chats_last_24_hours = [] | |
| base = df_1_day["tstamp"].min() | |
| for i in range(24, 0, -1): | |
| left = base + (i - 1) * 3600 | |
| right = base + i * 3600 | |
| num = ((chat_1_day["tstamp"] >= left) & (chat_1_day["tstamp"] < right)).sum() | |
| num_chats_last_24_hours.append(num) | |
| times = [ | |
| datetime.datetime.fromtimestamp( | |
| base + i * 3600, tz=timezone("US/Pacific") | |
| ).strftime("%Y-%m-%d %H:%M:%S %Z") | |
| for i in range(24, 0, -1) | |
| ] | |
| last_24_hours_df = pd.DataFrame({"time": times, "value": num_chats_last_24_hours}) | |
| last_24_hours_md = last_24_hours_df.to_markdown(index=False, tablefmt="github") | |
| # Last update datetime | |
| last_updated_tstamp = now_t | |
| last_updated_datetime = datetime.datetime.fromtimestamp( | |
| last_updated_tstamp, tz=timezone("US/Pacific") | |
| ).strftime("%Y-%m-%d %H:%M:%S %Z") | |
| # code.interact(local=locals()) | |
| return { | |
| "chat_dates_bar": chat_dates_bar, | |
| "model_hist_md": model_hist_md, | |
| "action_hist_md": action_hist_md, | |
| "anony_vote_hist_md": anony_vote_hist_md, | |
| "num_chats_last_24_hours": last_24_hours_md, | |
| "last_updated_datetime": last_updated_datetime, | |
| } | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument("--max-num-files", type=int) | |
| args = parser.parse_args() | |
| log_files = get_log_files(args.max_num_files) | |
| basic_stats = report_basic_stats(log_files) | |
| print(basic_stats["action_hist_md"] + "\n") | |
| print(basic_stats["model_hist_md"] + "\n") | |
| print(basic_stats["anony_vote_hist_md"] + "\n") | |
| print(basic_stats["num_chats_last_24_hours"] + "\n") | |