added 4o initial manual analysis and test

2026-05-05 15:00:34 -04:00
parent c8017c908d
commit d834d18c81
5 changed files with 540 additions and 3 deletions
--- a/tests/test_gpt4o_analysis.py
+++ b/tests/test_gpt4o_analysis.py
@@ -0,0 +1,230 @@
+"""Unit tests for analysis/gpt4o/analysis.py — no real API calls."""
+
+import json
+import sys
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+# Make the module importable without installing as a package
+sys.path.insert(0, str(Path(__file__).parent.parent / "analysis" / "gpt4o"))
+import analysis as gpt4o
+
+
+# ---------------------------------------------------------------------------
+# Fixtures
+
+FORUM_ITEM = {
+    "forum_id": "452",
+    "reg_title": "Model Policies for Transgender Students",
+    "reg_desc": "Guidance developed in response to HB 145.",
+}
+
+COMMENT_ITEM = {
+    "forum_id": "452",
+    "comment_id": "87914",
+    "author": "Alice Example",
+    "date": "2021-01-04T09:15:00",
+    "title": "I support this policy",
+    "text": "This is a great policy that protects students.",
+}
+
+MOCK_RESPONSE_CONTENT = json.dumps({
+    "stance": "support",
+    "stance_confidence": 0.95,
+    "stance_rationale": "Commenter explicitly endorses the policy.",
+    "tone": "positive",
+    "tags": ["student safety", "LGBTQ+ inclusion"],
+})
+
+
+def _mock_client(response_content: str = MOCK_RESPONSE_CONTENT):
+    client = MagicMock()
+    choice = MagicMock()
+    choice.message.content = response_content
+    client.chat.completions.create.return_value = MagicMock(choices=[choice])
+    return client
+
+
+# ---------------------------------------------------------------------------
+# Prompt versioning
+
+def test_prompt_version_is_7_hex_chars():
+    assert len(gpt4o.PROMPT_VERSION) == 7
+    assert all(c in "0123456789abcdef" for c in gpt4o.PROMPT_VERSION)
+
+
+def test_prompt_version_changes_with_system_prompt():
+    import hashlib
+    alt = hashlib.sha256(("CHANGED" + gpt4o.USER_TEMPLATE).encode("utf-8")).hexdigest()[:7]
+    assert alt != gpt4o.PROMPT_VERSION
+
+
+def test_prompt_version_is_stable():
+    import hashlib
+    v2 = hashlib.sha256(
+        (gpt4o.SYSTEM_PROMPT + gpt4o.USER_TEMPLATE).encode("utf-8")
+    ).hexdigest()[:7]
+    assert v2 == gpt4o.PROMPT_VERSION
+
+
+# ---------------------------------------------------------------------------
+# Item detection via load_items
+
+def test_load_items_separates_forum_and_comments(tmp_path):
+    jsonl = tmp_path / "test.jsonl"
+    jsonl.write_text(
+        json.dumps(FORUM_ITEM) + "\n" + json.dumps(COMMENT_ITEM) + "\n",
+        encoding="utf-8",
+    )
+    forum, comments = gpt4o.load_items(jsonl)
+    assert forum is not None
+    assert forum["reg_title"] == FORUM_ITEM["reg_title"]
+    assert len(comments) == 1
+    assert comments[0]["comment_id"] == "87914"
+
+
+def test_load_items_no_forum(tmp_path):
+    jsonl = tmp_path / "test.jsonl"
+    jsonl.write_text(json.dumps(COMMENT_ITEM) + "\n", encoding="utf-8")
+    forum, comments = gpt4o.load_items(jsonl)
+    assert forum is None
+    assert len(comments) == 1
+
+
+def test_load_items_skips_blank_lines(tmp_path):
+    jsonl = tmp_path / "test.jsonl"
+    jsonl.write_text(
+        "\n" + json.dumps(COMMENT_ITEM) + "\n\n",
+        encoding="utf-8",
+    )
+    _, comments = gpt4o.load_items(jsonl)
+    assert len(comments) == 1
+
+
+# ---------------------------------------------------------------------------
+# build_messages
+
+def test_truncation_applied():
+    long_comment = {**COMMENT_ITEM, "text": "x" * 7000}
+    messages, truncated = gpt4o.build_messages(long_comment, FORUM_ITEM)
+    assert truncated is True
+    user_content = messages[1]["content"]
+    assert "... [truncated]" in user_content
+    # The x's in the prompt must not exceed MAX_COMMENT_CHARS
+    x_count = user_content.count("x")
+    assert x_count == gpt4o.MAX_COMMENT_CHARS
+
+
+def test_no_truncation_for_short_comment():
+    _, truncated = gpt4o.build_messages(COMMENT_ITEM, FORUM_ITEM)
+    assert truncated is False
+
+
+def test_empty_text_fallback():
+    empty = {**COMMENT_ITEM, "text": ""}
+    messages, truncated = gpt4o.build_messages(empty, FORUM_ITEM)
+    assert "[No body text provided]" in messages[1]["content"]
+    assert truncated is False
+
+
+def test_none_text_fallback():
+    none_text = {**COMMENT_ITEM, "text": None}
+    messages, _ = gpt4o.build_messages(none_text, FORUM_ITEM)
+    assert "[No body text provided]" in messages[1]["content"]
+
+
+def test_missing_forum_uses_unknown_context():
+    messages, _ = gpt4o.build_messages(COMMENT_ITEM, None)
+    assert "[unknown]" in messages[1]["content"]
+
+
+def test_reg_context_included_in_prompt():
+    messages, _ = gpt4o.build_messages(COMMENT_ITEM, FORUM_ITEM)
+    assert FORUM_ITEM["reg_title"] in messages[1]["content"]
+    assert "HB 145" in messages[1]["content"]
+
+
+# ---------------------------------------------------------------------------
+# Output record schema
+
+def test_output_record_all_keys_present():
+    client = _mock_client()
+    record = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o")
+    required = {
+        "run_id", "forum_id", "comment_id", "analyzed_at", "model", "prompt_version",
+        "stance", "stance_confidence", "stance_rationale", "tone", "tags",
+        "input_title", "truncated", "error",
+    }
+    assert required == set(record.keys())
+
+
+def test_output_record_correct_types():
+    client = _mock_client()
+    record = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o")
+    assert record["stance"] == "support"
+    assert isinstance(record["stance_confidence"], float)
+    assert isinstance(record["tags"], list)
+    assert record["truncated"] is False
+    assert record["error"] is None
+
+
+def test_output_record_metadata():
+    client = _mock_client()
+    record = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o")
+    assert record["run_id"] == "run-123"
+    assert record["forum_id"] == "452"
+    assert record["comment_id"] == "87914"
+    assert record["model"] == "gpt-4o"
+    assert record["prompt_version"] == gpt4o.PROMPT_VERSION
+    assert record["input_title"] == COMMENT_ITEM["title"]
+
+
+# ---------------------------------------------------------------------------
+# Error handling
+
+def test_error_record_on_api_failure():
+    client = MagicMock()
+    import openai as _openai
+    client.chat.completions.create.side_effect = _openai.RateLimitError(
+        "rate limit", response=MagicMock(status_code=429), body={}
+    )
+    record = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o")
+    assert record["error"] is not None
+    assert record["stance"] is None
+    assert record["tone"] is None
+    assert record["tags"] is None
+
+
+def test_error_record_on_bad_json():
+    client = _mock_client("not valid json{{{")
+    record = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, "run-123", "gpt-4o")
+    assert record["error"] is not None
+    assert record["stance"] is None
+
+
+# ---------------------------------------------------------------------------
+# run_id consistency
+
+def test_run_id_is_shared_across_records():
+    client = _mock_client()
+    run_id = "fixed-run-id"
+    r1 = gpt4o.analyze_comment(client, COMMENT_ITEM, FORUM_ITEM, run_id, "gpt-4o")
+    r2 = gpt4o.analyze_comment(client, {**COMMENT_ITEM, "comment_id": "99999"}, FORUM_ITEM, run_id, "gpt-4o")
+    assert r1["run_id"] == r2["run_id"] == run_id
+
+
+# ---------------------------------------------------------------------------
+# Filename parsing
+
+def test_scrape_ts_extracted_from_filename():
+    p = Path("output/forum452_comments_2026-05-05T17-33-54+00-00.jsonl")
+    ts = gpt4o._scrape_ts_from_filename(p)
+    assert ts == "2026-05-05T17-33-54+00-00"
+
+
+def test_scrape_ts_fallback_for_unknown_filename():
+    p = Path("output/somefile.jsonl")
+    ts = gpt4o._scrape_ts_from_filename(p)
+    assert ts == "unknown"