added streamlit v1
This commit is contained in:
134
viz/prototype_charts.py
Normal file
134
viz/prototype_charts.py
Normal file
@@ -0,0 +1,134 @@
|
||||
'''
|
||||
prototype_charts.py
|
||||
generate test charts for later addition to streamlit
|
||||
'''
|
||||
|
||||
|
||||
from pathlib import Path
|
||||
import pandas as pd
|
||||
import plotly.express as px
|
||||
import numpy as np
|
||||
|
||||
inp = Path(r"c:/users/moses/projects/vath/analysis/jobs/f452-1/review.csv")
|
||||
out = Path("viz/")
|
||||
out.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
stance_order = ["support", "oppose", "neutral", "unknown"]
|
||||
|
||||
# tone_order = ["positive", "negative", "neutral", "mixed", "unknown", "unclear"]
|
||||
# default order was actually better - unclear/negative/neutral/mixed/positive vs unknown/oppose/neutral/support
|
||||
# same for pct w/in stance
|
||||
df = pd.read_csv(inp)
|
||||
df["date"] = pd.to_datetime(df["date"], errors="coerce")
|
||||
df["date_day"] = df["date"].dt.date
|
||||
df["stance"] = df["stance"].fillna("unknown")
|
||||
df["tone"] = df["tone"].fillna("unknown")
|
||||
|
||||
# 1. stance share
|
||||
counts = df["stance"].value_counts().reindex(stance_order, fill_value=0).reset_index()
|
||||
counts.columns = ["stance", "count"]
|
||||
fig = px.bar(counts, x="count", y="stance", orientation="h", text="count")
|
||||
fig.write_html(out / "stance_share.html")
|
||||
|
||||
# 2. stance over time
|
||||
daily = df.groupby(["date_day", "stance"]).size().reset_index(name="count")
|
||||
fig = px.bar(daily, x="date_day", y="count", color="stance", category_orders={"stance": stance_order})
|
||||
fig.write_html(out / "stance_over_time.html")
|
||||
|
||||
# 3. stance x tone
|
||||
heat = df.groupby(["stance", "tone"]).size().reset_index(name="count")
|
||||
fig = px.density_heatmap(heat, x="tone", y="stance", z="count", category_orders={"stance": stance_order})
|
||||
fig.write_html(out / "stance_tone_heatmap.html")
|
||||
|
||||
# 4. confidence by stance
|
||||
fig = px.box(df, x="stance", y="stance_confidence", category_orders={"stance": stance_order}, points="outliers")
|
||||
fig.write_html(out / "confidence_by_stance.html")
|
||||
|
||||
# 5. cumulative stance and share over time
|
||||
daily = (
|
||||
df.groupby(["date_day", "stance"])
|
||||
.size()
|
||||
.unstack(fill_value=0)
|
||||
.reindex(columns=stance_order, fill_value=0)
|
||||
.sort_index()
|
||||
)
|
||||
|
||||
cum = daily.cumsum()
|
||||
cum_long = cum.reset_index().melt(id_vars="date_day", var_name="stance", value_name="cumulative_count")
|
||||
|
||||
fig = px.area(
|
||||
cum_long,
|
||||
x="date_day",
|
||||
y="cumulative_count",
|
||||
color="stance",
|
||||
category_orders={"stance": stance_order},
|
||||
title="cumulative comments by stance over time",
|
||||
)
|
||||
fig.write_html(out / "cumulative_stance_area.html")
|
||||
|
||||
cum_pct = cum.div(cum.sum(axis=1), axis=0).reset_index().melt(
|
||||
id_vars="date_day", var_name="stance", value_name="cumulative_share"
|
||||
)
|
||||
|
||||
fig = px.line(
|
||||
cum_pct,
|
||||
x="date_day",
|
||||
y="cumulative_share",
|
||||
color="stance",
|
||||
category_orders={"stance": stance_order},
|
||||
title="cumulative stance share over time",
|
||||
)
|
||||
fig.update_yaxes(tickformat=".0%")
|
||||
fig.write_html(out / "cumulative_stance_share.html")
|
||||
|
||||
# 7. diverging h-bar
|
||||
stance_counts = df["stance"].value_counts().reindex(stance_order, fill_value=0)
|
||||
|
||||
div = pd.DataFrame({
|
||||
"stance": ["oppose", "support", "neutral", "unknown"],
|
||||
"count": [
|
||||
-stance_counts.get("oppose", 0),
|
||||
stance_counts.get("support", 0),
|
||||
stance_counts.get("neutral", 0),
|
||||
stance_counts.get("unknown", 0),
|
||||
],
|
||||
})
|
||||
|
||||
fig = px.bar(
|
||||
div,
|
||||
x="count",
|
||||
y="stance",
|
||||
orientation="h",
|
||||
text=div["count"].abs(),
|
||||
title="support vs oppose",
|
||||
)
|
||||
fig.update_xaxes(title="comments", zeroline=True)
|
||||
fig.update_traces(textposition="outside")
|
||||
fig.write_html(out / "stance_diverging_bar.html")
|
||||
|
||||
# 8. Stance x Tone labels
|
||||
heat = pd.crosstab(df["stance"], df["tone"]).reindex(
|
||||
index=stance_order,
|
||||
columns=[c for c in tone_order if c in df["tone"].unique()],
|
||||
fill_value=0,
|
||||
)
|
||||
|
||||
fig = px.imshow(
|
||||
heat,
|
||||
text_auto=True,
|
||||
aspect="auto",
|
||||
title="stance x tone, count",
|
||||
)
|
||||
fig.write_html(out / "stance_tone_counts.html")
|
||||
|
||||
rowpct = heat.div(heat.sum(axis=1).replace(0, np.nan), axis=0)
|
||||
|
||||
fig = px.imshow(
|
||||
rowpct,
|
||||
text_auto=".0%",
|
||||
aspect="auto",
|
||||
title="stance x tone, percent within stance",
|
||||
)
|
||||
fig.write_html(out / "stance_tone_rowpct.html")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user