added argparse for job dir, added tone filter

2026-05-08 23:28:13 -04:00
parent afd5b8c60e
commit cc16acbb12
2 changed files with 14 additions and 13 deletions
--- a/docs/tasks.org
+++ b/docs/tasks.org
@@ -351,15 +351,13 @@ data pulls entirely from the job; goal is to point viz/streamlit.py at any job/
 ** evidence
 - commit: 
- tests: from root dir, `streamlit run viz/streamlit.py`
+- tests: from root dir, `streamlit run viz/streamlit.py <job-dir>`
 7. add forum_url, forum_collected_date to scraper
 * [ ] t1.6 host streamlit
 figure out how to host this, locally or via streamlit servers
 * === Backlog ===
-
+- add forum_url, forum_collected_date to scraper (to add to viz)
 * [ ] X: complete proposal information
 Ensure we capture as much useful information as possible about the actual proposal - contact information, etc. what the state actually says about what was posted. 
 ** acceptance criteria
--- a/viz/streamlit.py
+++ b/viz/streamlit.py
@@ -1,14 +1,17 @@
-# streamlit run analysis/viz/streamlit.py
+# streamlit run viz/streamlit.py -- --jobs-dir analysis/jobs/f452-1
 import argparse
 from pathlib import Path
 from datetime import datetime as dt
 import pandas as pd
 import plotly.graph_objects as go
 import plotly.express as px
 import plotly.subplots as ps
 import streamlit as st
-workdir = Path("analysis/jobs/f452-1")
+parser = argparse.ArgumentParser()
 parser.add_argument("--jobs-dir", default="analysis/jobs/f452-1", type=Path,
                    help="Job directory containing review.csv, forum.jsonl, and prompt.txt")
 args, _ = parser.parse_known_args()  # parse_known_args: ignore Streamlit's own argv entries
 workdir = args.jobs_dir
 df = pd.read_csv(workdir/"review.csv")
 df['date_dt'] = pd.to_datetime(df.date)
 df["date_day"] = df["date_dt"].dt.date
@@ -128,14 +131,14 @@ st.subheader("Comment Explorer",anchor=False,divider="gray")
 # comment explorer
 cex_left, cex_right = st.columns([1,1])
 with cex_left:
-    stance = st.multiselect("Filter stance", sorted(df["stance"].dropna().unique()), default=sorted(df["stance"].dropna().unique()))
+    filter_stance = st.multiselect("Filter stance", sorted(df["stance"].dropna().unique()), default=sorted(df["stance"].dropna().unique()))
-    q = st.text_input("Search comment title and text")
+    filter_tone = st.multiselect("Filter tone", sorted(df["tone"].dropna().unique()), default=sorted(df["tone"].dropna().unique()))
-    dff = df[df["stance"].isin(stance)]
+    dff = df[df["stance"].isin(filter_stance) & df["tone"].isin(filter_tone)]
    if q:
        dff = dff[dff["text"].fillna("").str.contains(q, case=False, regex=False)]
 with cex_right:
-    filter_tone = st.multiselect("Filter tone", sorted(df["tone"].dropna().unique()), default=sorted(df["tone"].dropna().unique()))
+    q = st.text_input("Search comment title and text")
    if q:
        dff = dff[dff["text"].fillna("").str.contains(q, case=False, regex=False)]
    st.text(""); st.text("")
    st.text("Showing " + str(len(dff))+ " comments",text_alignment="right", width="stretch")