diff --git a/analysis/gpt4o/analysis-realtime.py b/analysis/gpt4o/analysis_realtime.py similarity index 64% rename from analysis/gpt4o/analysis-realtime.py rename to analysis/gpt4o/analysis_realtime.py index 550d669..91e4551 100644 --- a/analysis/gpt4o/analysis-realtime.py +++ b/analysis/gpt4o/analysis_realtime.py @@ -1,9 +1,9 @@ #!/usr/bin/env python3 """ -analysis/gpt4o/analysis.py — Manual GPT-4o sentiment pipeline for VA Townhall comments. +analysis/gpt4o/analysis-realtime.py — Synchronous GPT-4o pipeline for VA Townhall comments. Usage: - python analysis/gpt4o/analysis.py [--limit {5,10,20,50}] [--model MODEL] + python analysis/gpt4o/analysis-realtime.py [--limit {5,10,20,50}] [--model MODEL] Output: analysis/gpt4o/forum{id}_{scrape_ts}_{model}_{run_ts}.jsonl @@ -28,33 +28,11 @@ except ImportError: sys.exit("openai package not installed. Run: pip install openai") # --------------------------------------------------------------------------- -# Prompt (version is derived from the content — changing either string changes PROMPT_VERSION) +# Prompt — loaded from analysis/prompt-1.txt at import time -SYSTEM_PROMPT = """\ -You are an expert policy analyst classifying public comments submitted to the Virginia Town Hall -regulatory comment system. You will be given the text of a proposed regulation and a single -public comment. Return ONLY a JSON object — no other text. - -Definitions: -- stance: the commenter's position on whether the regulation should be adopted. - "support" = wants it approved (as-is or with changes); - "oppose" = wants it rejected or substantially weakened; - "neutral" = takes no position, asks a question, or provides factual input only; - "unknown" = too vague, off-topic, or uninterpretable to classify. -- tone: the emotional register of the writing, independent of stance. - "positive" = affirming, hopeful, appreciative; - "negative" = angry, fearful, alarmed, or contemptuous; - "neutral" = matter-of-fact, procedural, or informational; - "mixed" = contains both positive and negative emotional content; - "unclear" = tone cannot be determined (e.g., a one-word comment). -- stance_confidence: float 0.0–1.0, your confidence in the stance label. -- stance_rationale: 1–3 sentences explaining the key evidence; quote specific phrases where possible. -- tags: up to 5 short topic labels relevant to the comment's specific concerns (e.g. - "parental rights", "student safety", "privacy", "religious freedom", "LGBTQ+ inclusion", - "bullying prevention", "school sports", "bathroom access"). Empty array if none apply. - -Return exactly these keys: stance, stance_confidence, stance_rationale, tone, tags.\ -""" +_PROMPT_FILE = Path(__file__).parent.parent / "prompt-1.txt" +SYSTEM_PROMPT = _PROMPT_FILE.read_text(encoding="utf-8").strip() +PROMPT_VERSION = hashlib.sha256(SYSTEM_PROMPT.encode("utf-8")).hexdigest()[:7] USER_TEMPLATE = """\ ## Proposed Regulation @@ -73,15 +51,11 @@ Body: Classify this comment per the instructions. Return only JSON.\ """ -PROMPT_VERSION = hashlib.sha256( - (SYSTEM_PROMPT + USER_TEMPLATE).encode("utf-8") -).hexdigest()[:7] - MAX_COMMENT_CHARS = 6000 -_RETRY_DELAYS = [1.0, 2.0] # delays before attempt 2 and 3 +_RETRY_DELAYS = [1.0, 2.0] # --------------------------------------------------------------------------- -# Core functions (importable for tests) +# Core functions def load_items(path: Path) -> tuple[dict | None, list[dict]]: @@ -102,11 +76,7 @@ def load_items(path: Path) -> tuple[dict | None, list[dict]]: def build_messages(comment: dict, forum: dict | None) -> tuple[list, bool]: - """Build the OpenAI messages list for one comment. - - Returns (messages, truncated) where truncated is True if the comment body - was cut to MAX_COMMENT_CHARS. - """ + """Build OpenAI messages for one comment. Returns (messages, truncated).""" reg_title = (forum or {}).get("reg_title", "[unknown]") reg_desc = (forum or {}).get("reg_desc", "[unknown]") @@ -132,8 +102,13 @@ def build_messages(comment: dict, forum: dict | None) -> tuple[list, bool]: ], truncated +def parse_api_response(content: str) -> dict: + data = json.loads(content) + keys = ("stance", "stance_confidence", "stance_rationale", "tone", "tags") + return {k: data.get(k) for k in keys} + + def _call_api(client, messages: list, model: str) -> str: - """Call the OpenAI chat API with exponential-backoff retry on rate limits.""" last_exc = None for delay in [0.0] + _RETRY_DELAYS: if delay: @@ -151,21 +126,7 @@ def _call_api(client, messages: list, model: str) -> str: raise last_exc # type: ignore[misc] -def parse_api_response(content: str) -> dict: - """Parse the model's JSON response, returning only the expected keys.""" - data = json.loads(content) - keys = ("stance", "stance_confidence", "stance_rationale", "tone", "tags") - return {k: data.get(k) for k in keys} - - -def analyze_comment( - client, - comment: dict, - forum: dict | None, - run_id: str, - model: str, -) -> dict: - """Analyze one comment and return a fully-formed output record.""" +def analyze_comment(client, comment: dict, forum: dict | None, run_id: str, model: str) -> dict: base = { "run_id": run_id, "forum_id": comment.get("forum_id", ""), @@ -191,7 +152,6 @@ def analyze_comment( def _scrape_ts_from_filename(path: Path) -> str: - """Extract the timestamp from a scraped JSONL filename for use in the output name.""" m = re.search(r"(\d{4}-\d{2}-\d{2}T[\d\-+:]+)", path.stem) return m.group(1).replace(":", "-") if m else "unknown" @@ -199,13 +159,11 @@ def _scrape_ts_from_filename(path: Path) -> str: # --------------------------------------------------------------------------- # CLI - def main() -> None: load_dotenv() parser = argparse.ArgumentParser( - description="Analyze VA Townhall public comments with GPT-4o.", - formatter_class=argparse.RawDescriptionHelpFormatter, + description="Analyze VA Townhall public comments with GPT-4o (synchronous).", ) parser.add_argument("input", help="Path to scraped JSONL file") parser.add_argument( @@ -215,11 +173,7 @@ def main() -> None: metavar="{5,10,20,50}", help="Process only the first N comments (for testing). Omit to process all.", ) - parser.add_argument( - "--model", - default="gpt-4o", - help="OpenAI model name (default: gpt-4o)", - ) + parser.add_argument("--model", default="gpt-4o", help="OpenAI model (default: gpt-4o)") args = parser.parse_args() api_key = os.environ.get("OPENAI_API_KEY") @@ -234,10 +188,7 @@ def main() -> None: forum, comments = load_items(input_path) if forum is None: - print( - "Warning: no ForumItem found in file — regulation context will be [unknown].", - file=sys.stderr, - ) + print("Warning: no ForumItem found — regulation context will be [unknown].", file=sys.stderr) if args.limit: comments = comments[: args.limit] @@ -264,16 +215,10 @@ def main() -> None: out.flush() if record["error"]: n_err += 1 - print( - f" [{i}/{total}] ERROR {comment.get('comment_id')}: {record['error']}", - file=sys.stderr, - ) + print(f" [{i}/{total}] ERROR {comment.get('comment_id')}: {record['error']}", file=sys.stderr) else: n_ok += 1 - print( - f" [{i}/{total}] OK {comment.get('comment_id')} → {record['stance']}", - file=sys.stderr, - ) + print(f" [{i}/{total}] OK {comment.get('comment_id')} → {record['stance']}", file=sys.stderr) time.sleep(0.1) print(f"\nDone. {n_ok} ok, {n_err} errors → {out_path}", file=sys.stderr) diff --git a/analysis/gpt4o/forum452_unknown_gpt-4o_2026-05-05T20-43-59+00-00.jsonl b/analysis/gpt4o/forum452_unknown_gpt-4o_2026-05-05T20-43-59+00-00.jsonl new file mode 100644 index 0000000..e69de29 diff --git a/analysis/gpt4o/forum452_unknown_gpt-4o_2026-05-05T20-44-11+00-00.jsonl b/analysis/gpt4o/forum452_unknown_gpt-4o_2026-05-05T20-44-11+00-00.jsonl new file mode 100644 index 0000000..dcf040b --- /dev/null +++ b/analysis/gpt4o/forum452_unknown_gpt-4o_2026-05-05T20-44-11+00-00.jsonl @@ -0,0 +1,10 @@ +{"run_id": "e84adaf5-5250-42b9-97c1-59623bd99bc7", "forum_id": "452", "comment_id": "87914", "analyzed_at": "2026-05-05T20:44:11.731054+00:00", "model": "gpt-4o", "prompt_version": "cb41250", "input_title": "Support the Model Policy Wholeheartedly", "stance": "support", "stance_confidence": 1.0, "stance_rationale": "The commenter explicitly states, \"I support the model policy wholeheartedly,\" indicating clear support for the regulation. They also express appreciation for the policy's inclusivity and guidance, saying it is a \"first step in creating schools in Virginia that are inclusive and welcoming for transgender and non-binary students.\"", "tone": "positive", "tags": ["LGBTQ+ inclusion", "student safety", "school policy", "transgender rights", "educational support"], "truncated": false, "error": null} +{"run_id": "e84adaf5-5250-42b9-97c1-59623bd99bc7", "forum_id": "452", "comment_id": "87915", "analyzed_at": "2026-05-05T20:44:14.418311+00:00", "model": "gpt-4o", "prompt_version": "cb41250", "input_title": "Please support this vital policy", "stance": "support", "stance_confidence": 1.0, "stance_rationale": "The commenter explicitly states, 'I strongly support these proposals,' indicating clear approval of the regulation. They also affirm the importance of treating every student with dignity and respect, aligning with the policy's goals.", "tone": "positive", "tags": ["LGBTQ+ inclusion", "student safety", "nondiscrimination"], "truncated": false, "error": null} +{"run_id": "e84adaf5-5250-42b9-97c1-59623bd99bc7", "forum_id": "452", "comment_id": "87916", "analyzed_at": "2026-05-05T20:44:17.820090+00:00", "model": "gpt-4o", "prompt_version": "cb41250", "input_title": "Please support this policy", "stance": "support", "stance_confidence": 1.0, "stance_rationale": "The commenter explicitly states 'I am in full support of this policy guidance,' indicating clear support for the regulation. The phrase 'Trans rights are human rights' further reinforces their supportive stance.", "tone": "positive", "tags": ["transgender rights", "human rights"], "truncated": false, "error": null} +{"run_id": "e84adaf5-5250-42b9-97c1-59623bd99bc7", "forum_id": "452", "comment_id": "87917", "analyzed_at": "2026-05-05T20:44:18.982080+00:00", "model": "gpt-4o", "prompt_version": "cb41250", "input_title": "Please support this policy", "stance": "support", "stance_confidence": 0.95, "stance_rationale": "The commenter explicitly states 'Please support this policy' and 'Please implement this policy,' indicating a clear support for the adoption of the regulation.", "tone": "positive", "tags": ["transgender rights", "student safety", "nondiscrimination"], "truncated": false, "error": null} +{"run_id": "e84adaf5-5250-42b9-97c1-59623bd99bc7", "forum_id": "452", "comment_id": "87918", "analyzed_at": "2026-05-05T20:44:22.439016+00:00", "model": "gpt-4o", "prompt_version": "cb41250", "input_title": "An Essential Policy", "stance": "support", "stance_confidence": 1.0, "stance_rationale": "The commenter explicitly states 'I fully support this policy' and describes it as 'essential for the health and wellbeing of our students and of our community,' indicating clear approval of the regulation.", "tone": "positive", "tags": ["student wellbeing", "community support", "education policy"], "truncated": false, "error": null} +{"run_id": "e84adaf5-5250-42b9-97c1-59623bd99bc7", "forum_id": "452", "comment_id": "87919", "analyzed_at": "2026-05-05T20:44:23.589115+00:00", "model": "gpt-4o", "prompt_version": "cb41250", "input_title": "Support from a School Counselor", "stance": "support", "stance_confidence": 1.0, "stance_rationale": "The commenter explicitly states support for the guidance, noting it will be 'incredibly helpful' and 'important in order to better support transgender students.' This indicates a clear approval of the proposed regulation.", "tone": "positive", "tags": ["LGBTQ+ inclusion", "student support", "mental health", "school counseling"], "truncated": false, "error": null} +{"run_id": "e84adaf5-5250-42b9-97c1-59623bd99bc7", "forum_id": "452", "comment_id": "87920", "analyzed_at": "2026-05-05T20:44:25.159983+00:00", "model": "gpt-4o", "prompt_version": "cb41250", "input_title": "I support this policy", "stance": "support", "stance_confidence": 0.95, "stance_rationale": "The commenter explicitly states 'I support this policy' and expresses belief in the importance of a 'welcoming and nurturing environment' for transgender students, indicating clear support for the regulation.", "tone": "positive", "tags": ["LGBTQ+ inclusion", "student safety"], "truncated": false, "error": null} +{"run_id": "e84adaf5-5250-42b9-97c1-59623bd99bc7", "forum_id": "452", "comment_id": "87921", "analyzed_at": "2026-05-05T20:44:28.076212+00:00", "model": "gpt-4o", "prompt_version": "cb41250", "input_title": "It’s about time!", "stance": "support", "stance_confidence": 0.95, "stance_rationale": "The commenter expresses clear support for the regulation by stating that the guidance is \"a long time coming and is desperately needed.\" This indicates a strong desire for the regulation to be adopted to address issues faced by transgender students, like their son.", "tone": "positive", "tags": ["bullying prevention", "LGBTQ+ inclusion", "student safety"], "truncated": false, "error": null} +{"run_id": "e84adaf5-5250-42b9-97c1-59623bd99bc7", "forum_id": "452", "comment_id": "87922", "analyzed_at": "2026-05-05T20:44:29.673172+00:00", "model": "gpt-4o", "prompt_version": "cb41250", "input_title": "A long overdue policy", "stance": "support", "stance_confidence": 1.0, "stance_rationale": "The commenter expresses strong support for the policy, describing it as 'pro-equality' and 'evidence based,' and states that it would 'guarantee protections for transgender and gender variant youth.' The use of phrases like 'incredibly excited' and 'kudos to you, champions of equality!' further indicates a supportive stance.", "tone": "positive", "tags": ["LGBTQ+ inclusion", "student safety", "bullying prevention", "equality"], "truncated": false, "error": null} +{"run_id": "e84adaf5-5250-42b9-97c1-59623bd99bc7", "forum_id": "452", "comment_id": "87923", "analyzed_at": "2026-05-05T20:44:35.056904+00:00", "model": "gpt-4o", "prompt_version": "cb41250", "input_title": "100% support", "stance": "support", "stance_confidence": 1.0, "stance_rationale": "The commenter explicitly states 'I totally support this needed policy,' indicating clear support for the regulation. They emphasize the importance of safety, support, and equality for all kids, aligning with the goals of the proposed regulation.", "tone": "positive", "tags": ["student safety", "LGBTQ+ inclusion", "nondiscrimination"], "truncated": false, "error": null} diff --git a/tests/test_gpt4o_analysis.py b/tests/analysis_gpt4o_realtime.py similarity index 65% rename from tests/test_gpt4o_analysis.py rename to tests/analysis_gpt4o_realtime.py index 71fb475..1e80d59 100644 --- a/tests/test_gpt4o_analysis.py +++ b/tests/analysis_gpt4o_realtime.py @@ -1,15 +1,14 @@ -"""Unit tests for analysis/gpt4o/analysis.py — no real API calls.""" +"""Unit tests for analysis/gpt4o/analysis_realtime.py — no real API calls.""" import json import sys from pathlib import Path -from unittest.mock import MagicMock, patch +from unittest.mock import MagicMock import pytest -# Make the module importable without installing as a package sys.path.insert(0, str(Path(__file__).parent.parent / "analysis" / "gpt4o")) -import analysis as gpt4o +import analysis_realtime as rt # --------------------------------------------------------------------------- @@ -51,26 +50,25 @@ def _mock_client(response_content: str = MOCK_RESPONSE_CONTENT): # Prompt versioning def test_prompt_version_is_7_hex_chars(): - assert len(gpt4o.PROMPT_VERSION) == 7 - assert all(c in "0123456789abcdef" for c in gpt4o.PROMPT_VERSION) + assert len(rt.PROMPT_VERSION) == 7 + assert all(c in "0123456789abcdef" for c in rt.PROMPT_VERSION) -def test_prompt_version_changes_with_system_prompt(): +def test_prompt_version_matches_prompt_file(): import hashlib - alt = hashlib.sha256(("CHANGED" + gpt4o.USER_TEMPLATE).encode("utf-8")).hexdigest()[:7] - assert alt != gpt4o.PROMPT_VERSION + prompt_file = Path(__file__).parent.parent / "analysis" / "prompt-1.txt" + expected = hashlib.sha256(prompt_file.read_text(encoding="utf-8").strip().encode()).hexdigest()[:7] + assert rt.PROMPT_VERSION == expected def test_prompt_version_is_stable(): import hashlib - v2 = hashlib.sha256( - (gpt4o.SYSTEM_PROMPT + gpt4o.USER_TEMPLATE).encode("utf-8") - ).hexdigest()[:7] - assert v2 == gpt4o.PROMPT_VERSION + v2 = hashlib.sha256(rt.SYSTEM_PROMPT.encode("utf-8")).hexdigest()[:7] + assert v2 == rt.PROMPT_VERSION # --------------------------------------------------------------------------- -# Item detection via load_items +# load_items def test_load_items_separates_forum_and_comments(tmp_path): jsonl = tmp_path / "test.jsonl" @@ -78,7 +76,7 @@ def test_load_items_separates_forum_and_comments(tmp_path): json.dumps(FORUM_ITEM) + "\n" + json.dumps(COMMENT_ITEM) + "\n", encoding="utf-8", ) - forum, comments = gpt4o.load_items(jsonl) + forum, comments = rt.load_items(jsonl) assert forum is not None assert forum["reg_title"] == FORUM_ITEM["reg_title"] assert len(comments) == 1 @@ -88,18 +86,15 @@ def test_load_items_separates_forum_and_comments(tmp_path): def test_load_items_no_forum(tmp_path): jsonl = tmp_path / "test.jsonl" jsonl.write_text(json.dumps(COMMENT_ITEM) + "\n", encoding="utf-8") - forum, comments = gpt4o.load_items(jsonl) + forum, comments = rt.load_items(jsonl) assert forum is None assert len(comments) == 1 def test_load_items_skips_blank_lines(tmp_path): jsonl = tmp_path / "test.jsonl" - jsonl.write_text( - "\n" + json.dumps(COMMENT_ITEM) + "\n\n", - encoding="utf-8", - ) - _, comments = gpt4o.load_items(jsonl) + jsonl.write_text("\n" + json.dumps(COMMENT_ITEM) + "\n\n", encoding="utf-8") + _, comments = rt.load_items(jsonl) assert len(comments) == 1 @@ -108,40 +103,37 @@ def test_load_items_skips_blank_lines(tmp_path): def test_truncation_applied(): long_comment = {**COMMENT_ITEM, "text": "x" * 7000} - messages, truncated = gpt4o.build_messages(long_comment, FORUM_ITEM) + messages, truncated = rt.build_messages(long_comment, FORUM_ITEM) assert truncated is True - user_content = messages[1]["content"] - assert "... [truncated]" in user_content - # The x's in the prompt must not exceed MAX_COMMENT_CHARS - x_count = user_content.count("x") - assert x_count == gpt4o.MAX_COMMENT_CHARS + assert "... [truncated]" in messages[1]["content"] + assert messages[1]["content"].count("x") == rt.MAX_COMMENT_CHARS def test_no_truncation_for_short_comment(): - _, truncated = gpt4o.build_messages(COMMENT_ITEM, FORUM_ITEM) + _, truncated = rt.build_messages(COMMENT_ITEM, FORUM_ITEM) assert truncated is False def test_empty_text_fallback(): empty = {**COMMENT_ITEM, "text": ""} - messages, truncated = gpt4o.build_messages(empty, FORUM_ITEM) + messages, truncated = rt.build_messages(empty, FORUM_ITEM) assert "[No body text provided]" in messages[1]["content"] assert truncated is False def test_none_text_fallback(): none_text = {**COMMENT_ITEM, "text": None} - messages, _ = gpt4o.build_messages(none_text, FORUM_ITEM) + messages, _ = rt.build_messages(none_text, FORUM_ITEM) assert "[No body text provided]" in messages[1]["content"] def test_missing_forum_uses_unknown_context(): - messages, _ = gpt4o.build_messages(COMMENT_ITEM, None) + messages, _ = rt.build_messages(COMMENT_ITEM, None) assert "[unknown]" in messages[1]["content"] def test_reg_context_included_in_prompt(): - messages, _ = gpt4o.build_messages(COMMENT_ITEM, FORUM_ITEM) + messages, _ = rt.build_messages(COMMENT_ITEM, FORUM_ITEM) assert FORUM_ITEM["reg_title"] in messages[1]["content"] assert "HB 145" in messages[1]["content"] @@ -150,8 +142,7 @@ def test_reg_context_included_in_prompt(): # Output record schema def test_output_record_all_keys_present(): - client = _mock_client() - record = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o") + record = rt.analyze_comment(_mock_client(), COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o") required = { "run_id", "forum_id", "comment_id", "analyzed_at", "model", "prompt_version", "stance", "stance_confidence", "stance_rationale", "tone", "tags", @@ -161,8 +152,7 @@ def test_output_record_all_keys_present(): def test_output_record_correct_types(): - client = _mock_client() - record = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o") + record = rt.analyze_comment(_mock_client(), COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o") assert record["stance"] == "support" assert isinstance(record["stance_confidence"], float) assert isinstance(record["tags"], list) @@ -171,13 +161,12 @@ def test_output_record_correct_types(): def test_output_record_metadata(): - client = _mock_client() - record = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o") + record = rt.analyze_comment(_mock_client(), COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o") assert record["run_id"] == "run-123" assert record["forum_id"] == "452" assert record["comment_id"] == "87914" assert record["model"] == "gpt-4o" - assert record["prompt_version"] == gpt4o.PROMPT_VERSION + assert record["prompt_version"] == rt.PROMPT_VERSION assert record["input_title"] == COMMENT_ITEM["title"] @@ -185,12 +174,12 @@ def test_output_record_metadata(): # Error handling def test_error_record_on_api_failure(): - client = MagicMock() import openai as _openai + client = MagicMock() client.chat.completions.create.side_effect = _openai.RateLimitError( "rate limit", response=MagicMock(status_code=429), body={} ) - record = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o") + record = rt.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o") assert record["error"] is not None assert record["stance"] is None assert record["tone"] is None @@ -198,8 +187,7 @@ def test_error_record_on_api_failure(): def test_error_record_on_bad_json(): - client = _mock_client("not valid json{{{") - record = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o") + record = rt.analyze_comment(_mock_client("not valid json{{{"), COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o") assert record["error"] is not None assert record["stance"] is None @@ -210,21 +198,18 @@ def test_error_record_on_bad_json(): def test_run_id_is_shared_across_records(): client = _mock_client() run_id = "fixed-run-id" - r1 = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, run_id, "gpt-4o") - r2 = gpt4o.analyze_comment(client, {**COMMENT_ITEM, "comment_id": "99999"}, FORUM_ITEM, run_id, "gpt-4o") + r1 = rt.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, run_id, "gpt-4o") + r2 = rt.analyze_comment(client, {**COMMENT_ITEM, "comment_id": "99999"}, FORUM_ITEM, run_id, "gpt-4o") assert r1["run_id"] == r2["run_id"] == run_id # --------------------------------------------------------------------------- -# Filename parsing +# Filename helpers def test_scrape_ts_extracted_from_filename(): p = Path("output/forum452_comments_2026-05-05T17-33-54+00-00.jsonl") - ts = gpt4o._scrape_ts_from_filename(p) - assert ts == "2026-05-05T17-33-54+00-00" + assert rt._scrape_ts_from_filename(p) == "2026-05-05T17-33-54+00-00" def test_scrape_ts_fallback_for_unknown_filename(): - p = Path("output/somefile.jsonl") - ts = gpt4o._scrape_ts_from_filename(p) - assert ts == "unknown" + assert rt._scrape_ts_from_filename(Path("output/somefile.jsonl")) == "unknown" diff --git a/tests/test_forum_spider.py b/tests/scrape_forum_spider.py similarity index 100% rename from tests/test_forum_spider.py rename to tests/scrape_forum_spider.py