openai batch refactor

This commit is contained in:
2026-05-06 13:53:50 -04:00
parent 946aeac7c8
commit 64a7a18721
5 changed files with 833 additions and 312 deletions

View File

@@ -75,9 +75,24 @@ ANALYZED_AT = "2026-05-05T18:00:00+00:00"
RUN_ID = "test-run-id-123"
MODEL = "gpt-4o"
# Minimal status.json for testing job logic
def _make_status(jobs_override=None):
jobs = jobs_override or [
{"job_num": 1, "run_id": "r1", "status": "pending", "batch_id": None,
"records_submitted": 60, "records_completed": None, "records_failed": None,
"submitted_at": None, "completed_at": None},
]
return {
"model": "gpt-4o-mini", "prompt_hash": "abc1234",
"input_file": "output/f452.jsonl", "input_sha256": "sha",
"total_comments": 100, "input_tokens": 50_000,
"est_queue_days": 0.025, "cost_$": 0.01,
"total_jobs": len(jobs), "jobs": jobs,
}
# ---------------------------------------------------------------------------
# Prompt versioning (batch reads the same prompt file)
# Prompt versioning
def test_prompt_version_is_7_hex_chars():
assert len(bt.PROMPT_VERSION) == 7
@@ -206,52 +221,6 @@ def test_normalize_unknown_comment_id():
assert record["input_title"] == ""
# ---------------------------------------------------------------------------
# Manifest
def test_make_manifest_all_keys():
m = bt.make_manifest(
run_id=RUN_ID,
input_filename="output/forum452.jsonl",
input_sha256="abc123",
model="gpt-4o",
batch_id="batch_xyz",
records_submitted=100,
request_filename="analysis/gpt4o/requests/test-run-id-123.jsonl",
)
required = {
"run_id", "input_filename", "input_sha256", "prompt_hash", "model",
"batch_id", "records_submitted", "records_completed", "records_failed",
"request_filename", "raw_output_filename", "normalized_output_filename",
"created_at", "completed_at",
}
assert required == set(m.keys())
def test_make_manifest_initial_nulls():
m = bt.make_manifest(
run_id=RUN_ID, input_filename="f", input_sha256="s",
model="gpt-4o", batch_id="b", records_submitted=10, request_filename="r",
)
assert m["records_completed"] is None
assert m["records_failed"] is None
assert m["raw_output_filename"] is None
assert m["normalized_output_filename"] is None
assert m["completed_at"] is None
assert m["prompt_hash"] == bt.PROMPT_VERSION
def test_manifest_save_load_roundtrip(tmp_path, monkeypatch):
monkeypatch.setattr(bt, "RUNS_DIR", tmp_path)
m = bt.make_manifest(
run_id=RUN_ID, input_filename="f", input_sha256="s",
model="gpt-4o", batch_id="b", records_submitted=42, request_filename="r",
)
bt.save_manifest(m)
loaded = bt.load_manifest(RUN_ID)
assert loaded == m
# ---------------------------------------------------------------------------
# estimate_tokens
@@ -309,3 +278,112 @@ def test_chunk_preserves_all_comments(monkeypatch):
def test_model_limits_has_required_models():
for model in ("gpt-4o", "gpt-4o-mini", "gpt-5.4", "gpt-5.4-mini", "gpt-o4-mini"):
assert model in bt.MODEL_LIMITS, f"{model} missing from MODEL_LIMITS"
# ---------------------------------------------------------------------------
# status.json helpers
def test_status_save_load_roundtrip(tmp_path):
status = _make_status()
bt.save_status(status, tmp_path)
loaded = bt.load_status(tmp_path)
assert loaded == status
# ---------------------------------------------------------------------------
# _find_next_eligible_job
def test_find_next_eligible_job_first_job_pending():
jobs = _make_status()["jobs"]
target, warning = bt._find_next_eligible_job(jobs)
assert target["job_num"] == 1
assert warning is None
def test_find_next_eligible_job_after_completed():
jobs = [
{"job_num": 1, "status": "completed", "batch_id": "b1",
"records_submitted": 60, "records_completed": 60, "records_failed": 0,
"submitted_at": "t", "completed_at": "t", "run_id": "r1"},
{"job_num": 2, "status": "pending", "batch_id": None,
"records_submitted": 40, "records_completed": None, "records_failed": None,
"submitted_at": None, "completed_at": None, "run_id": "r2"},
]
target, warning = bt._find_next_eligible_job(jobs)
assert target["job_num"] == 2
assert warning is None
def test_find_next_eligible_job_blocked_by_in_progress():
jobs = [
{"job_num": 1, "status": "in_progress", "batch_id": "b1",
"records_submitted": 60, "records_completed": None, "records_failed": None,
"submitted_at": "t", "completed_at": None, "run_id": "r1"},
{"job_num": 2, "status": "pending", "batch_id": None,
"records_submitted": 40, "records_completed": None, "records_failed": None,
"submitted_at": None, "completed_at": None, "run_id": "r2"},
]
target, warning = bt._find_next_eligible_job(jobs)
assert target is None
assert warning is not None
assert "in_progress" in warning
def test_find_next_eligible_job_all_completed():
jobs = [
{"job_num": 1, "status": "completed", "batch_id": "b1",
"records_submitted": 60, "records_completed": 60, "records_failed": 0,
"submitted_at": "t", "completed_at": "t", "run_id": "r1"},
]
target, warning = bt._find_next_eligible_job(jobs)
assert target is None
assert warning is None
def test_resume_from_status_json(tmp_path):
"""Reload a status.json with one completed job and find the next pending job."""
jobs = [
{"job_num": 1, "run_id": "r1", "status": "completed", "batch_id": "b1",
"records_submitted": 60, "records_completed": 58, "records_failed": 2,
"submitted_at": "2026-05-06T10:00:00+00:00", "completed_at": "2026-05-06T11:00:00+00:00"},
{"job_num": 2, "run_id": "r2", "status": "pending", "batch_id": None,
"records_submitted": 40, "records_completed": None, "records_failed": None,
"submitted_at": None, "completed_at": None},
]
bt.save_status(_make_status(jobs), tmp_path)
loaded = bt.load_status(tmp_path)
target, warning = bt._find_next_eligible_job(loaded["jobs"])
assert target["job_num"] == 2
assert warning is None
# ---------------------------------------------------------------------------
# normalize: out-of-order and duplicate custom_id
def test_out_of_order_output_reconciled_by_custom_id():
"""Raw lines processed in any order are mapped to the correct comment."""
c2 = {**COMMENT_ITEM, "comment_id": "99999", "title": "Second comment"}
lookup = {COMMENT_ITEM["comment_id"]: COMMENT_ITEM, "99999": c2}
line_for_99999 = {
**RAW_SUCCESS_LINE,
"custom_id": "comment_99999",
}
line_for_87914 = RAW_SUCCESS_LINE
r1 = bt.normalize_output_line(line_for_99999, lookup, RUN_ID, ANALYZED_AT, MODEL, bt.PROMPT_VERSION)
r2 = bt.normalize_output_line(line_for_87914, lookup, RUN_ID, ANALYZED_AT, MODEL, bt.PROMPT_VERSION)
assert r1["comment_id"] == "99999"
assert r1["input_title"] == "Second comment"
assert r2["comment_id"] == "87914"
assert r2["input_title"] == COMMENT_ITEM["title"]
def test_duplicate_custom_id_both_produce_valid_records():
"""Two raw lines with the same custom_id each produce a valid record."""
r1 = bt.normalize_output_line(RAW_SUCCESS_LINE, COMMENT_LOOKUP, RUN_ID, ANALYZED_AT, MODEL, bt.PROMPT_VERSION)
r2 = bt.normalize_output_line(RAW_SUCCESS_LINE, COMMENT_LOOKUP, RUN_ID, ANALYZED_AT, MODEL, bt.PROMPT_VERSION)
assert r1["comment_id"] == r2["comment_id"] == "87914"
assert r1["error"] is None
assert r2["error"] is None

201
tests/tokenizer.py Normal file
View File

@@ -0,0 +1,201 @@
"""Unit tests for analysis/gpt4o/tokenizer.py — no real API calls."""
import io
import json
import math
import sys
from pathlib import Path
from unittest.mock import patch
import pytest
sys.path.insert(0, str(Path(__file__).parent.parent / "analysis" / "gpt4o"))
import tokenizer as tk
import analysis_batch as ab
# ---------------------------------------------------------------------------
# Fixtures
FORUM_ITEM = {
"forum_id": "452",
"reg_title": "Model Policies for Transgender Students",
"reg_desc": "Guidance developed in response to HB 145.",
}
COMMENT_A = {
"forum_id": "452",
"comment_id": "100",
"author": "Alice",
"date": "2021-01-04T09:15:00",
"title": "Support",
"text": "I support this policy.",
}
COMMENT_B = {
"forum_id": "452",
"comment_id": "101",
"author": "Bob",
"date": "2021-01-05T10:00:00",
"title": "Oppose",
"text": "I oppose this policy.",
}
COMMENTS = [COMMENT_A, COMMENT_B]
PROMPT_HASH = "abc1234"
INPUT_FILE = "output/f452.jsonl"
INPUT_SHA256 = "deadbeef" * 8
PROMPT_FILE = "analysis/prompt-1.txt"
def _make_report(total_tokens=10_000):
return tk.compute_report(
COMMENTS, FORUM_ITEM, PROMPT_HASH, INPUT_FILE, INPUT_SHA256, PROMPT_FILE
)
# ---------------------------------------------------------------------------
# compute_report: required top-level keys
def test_report_has_top_level_keys():
report = _make_report()
required = {"prompt", "prompt_hash", "input_file", "input_sha256",
"total_comments", "input_tokens"}
assert required.issubset(set(report.keys()))
def test_report_metadata_values():
report = _make_report()
assert report["prompt"] == PROMPT_FILE
assert report["prompt_hash"] == PROMPT_HASH
assert report["input_file"] == INPUT_FILE
assert report["input_sha256"] == INPUT_SHA256
assert report["total_comments"] == 2
def test_report_input_tokens_positive():
report = _make_report()
assert isinstance(report["input_tokens"], int)
assert report["input_tokens"] > 0
# ---------------------------------------------------------------------------
# compute_report: per-model entries
def test_report_has_per_model_keys():
report = _make_report()
for model in ab.MODEL_LIMITS:
assert model in report, f"Model {model} missing from report"
assert isinstance(report[model], dict)
def test_report_per_model_has_required_fields():
report = _make_report()
for model in ab.MODEL_LIMITS:
m = report[model]
assert "jobs" in m
assert "cost_$" in m
assert "est_queue_days" in m
def test_report_jobs_at_least_one():
report = _make_report()
for model in ab.MODEL_LIMITS:
assert report[model]["jobs"] >= 1
# ---------------------------------------------------------------------------
# compute_report: calculation accuracy
def test_cost_calculation():
"""cost_$ = total_tokens / 1M * pricing_rate"""
report = _make_report()
total = report["input_tokens"]
for model in ab.MODEL_LIMITS:
expected_cost = round(total / 1_000_000 * tk.MODEL_PRICING.get(model, 0.0), 4)
assert report[model]["cost_$"] == pytest.approx(expected_cost, abs=1e-6)
def test_est_queue_days_calculation():
"""est_queue_days = total_tokens / tpd (rounded to 2 decimal places)"""
report = _make_report()
total = report["input_tokens"]
for model, tpd in ab.MODEL_LIMITS.items():
expected = round(total / tpd, 2)
assert report[model]["est_queue_days"] == pytest.approx(expected, abs=1e-4)
def test_jobs_ceiling_division():
"""jobs = ceil(total_tokens / (tpd * _LIMIT_BUFFER))"""
report = _make_report()
total = report["input_tokens"]
for model, tpd in ab.MODEL_LIMITS.items():
effective = int(tpd * ab._LIMIT_BUFFER)
expected = math.ceil(total / effective)
assert report[model]["jobs"] == expected
def test_more_comments_increases_tokens():
"""More comments → more input_tokens."""
few = tk.compute_report([COMMENT_A], FORUM_ITEM, PROMPT_HASH, INPUT_FILE, INPUT_SHA256, PROMPT_FILE)
many = tk.compute_report(COMMENTS, FORUM_ITEM, PROMPT_HASH, INPUT_FILE, INPUT_SHA256, PROMPT_FILE)
assert many["input_tokens"] > few["input_tokens"]
# ---------------------------------------------------------------------------
# MODEL_PRICING coverage
def test_model_pricing_has_required_models():
for model in ("gpt-4o", "gpt-4o-mini", "gpt-5.4", "gpt-5.4-mini", "gpt-o4-mini"):
assert model in tk.MODEL_PRICING, f"{model} missing from MODEL_PRICING"
def test_model_pricing_values_positive():
for model, price in tk.MODEL_PRICING.items():
assert price > 0, f"{model} has non-positive price"
# ---------------------------------------------------------------------------
# print_table: runs without error, produces output
def test_print_table_runs():
report = _make_report()
buf = io.StringIO()
with patch("sys.stdout", buf):
tk.print_table(report)
output = buf.getvalue()
assert "gpt-4o" in output
assert "gpt-4o-mini" in output
def test_print_table_shows_all_models():
report = _make_report()
buf = io.StringIO()
with patch("sys.stdout", buf):
tk.print_table(report)
output = buf.getvalue()
for model in ab.MODEL_LIMITS:
assert model in output, f"{model} not shown in print_table output"
def test_print_table_highlights_recommended():
"""When a single-job cheapest model exists, table marks it as recommended."""
report = _make_report()
buf = io.StringIO()
with patch("sys.stdout", buf):
tk.print_table(report)
output = buf.getvalue()
assert "recommended" in output
# ---------------------------------------------------------------------------
# report.json round-trip (write → read)
def test_report_json_roundtrip(tmp_path):
report = _make_report()
out = tmp_path / "report.json"
out.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
loaded = json.loads(out.read_text(encoding="utf-8"))
assert loaded["total_comments"] == report["total_comments"]
assert loaded["input_tokens"] == report["input_tokens"]
assert loaded["gpt-4o-mini"]["jobs"] == report["gpt-4o-mini"]["jobs"]