From 262ddd3092ce42d2d17c02b20ac0f52462de109c Mon Sep 17 00:00:00 2001 From: James Bourbeau Date: Fri, 1 Mar 2024 15:28:32 -0600 Subject: [PATCH] Add basic streamlit app --- dashboard.py | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 dashboard.py diff --git a/dashboard.py b/dashboard.py new file mode 100644 index 0000000..503a143 --- /dev/null +++ b/dashboard.py @@ -0,0 +1,43 @@ +import dask_deltatable as ddt +import streamlit as st + +from preprocess import OUTDIR + + +def get_data(variable): + df = ddt.read_deltalake(OUTDIR / "dask" / variable).compute() + df = df.drop(columns="__index_level_0__") + df["repo"] = "https://github.com/" + df["repo"] + df["username"] = "https://github.com/" + df["username"] + # Streamlit doesn't like pyarrow strings + # (xref https://github.com/streamlit/streamlit/issues/6334) + str_cols = df.select_dtypes(include="string").columns + df = df.astype({c: object for c in str_cols}) + return df + + +st.markdown(""" +## Dask mentions on GitHub +### Commits +""") +df = get_data("commits") +st.dataframe( + df, + column_config={ + "repo": st.column_config.LinkColumn("repo"), + "username": st.column_config.LinkColumn("user"), + }, + hide_index=True, +) +st.markdown(""" +### Comments +""") +df = get_data("comments") +st.dataframe( + df, + column_config={ + "repo": st.column_config.LinkColumn("repo"), + "username": st.column_config.LinkColumn("user"), + }, + hide_index=True, +)