openai batch refactor

2026-05-06 13:53:50 -04:00
parent 946aeac7c8
commit 64a7a18721
5 changed files with 833 additions and 312 deletions
--- a/analysis/gpt4o/analysis_batch.py
+++ b/analysis/gpt4o/analysis_batch.py
@@ -1,27 +1,23 @@
 #!/usr/bin/env python3
 """
-analysis_batch.py — OpenAI Batch API pipeline
+analysis_batch.py — OpenAI Batch API job runner
-Commands (run manually in order):
+Run tokenizer.py first to generate report.json, then:
-    submit   <input_jsonl> [--model gpt-4o] [--limit N]
+    create   <report.json> --model <model>   — build job directory
-                                           — build request file, upload, create batch
+    submit   [--job N] [--dir DIR]           — submit next eligible job
-    status   [run_id]                      — check batch status, update manifest
+    status   [--job N] [--dir DIR]           — check job status
-    download [run_id]                      — download + normalize output, update manifest
+    download [--job N] [--dir DIR]           — download + normalize completed jobs
-run_id defaults to the most recent run in runs/ when omitted.
+DIR is a name under analysis/gpt4o/jobs/ (default: most recently created).
 File layout (all under analysis/gpt4o/):
    requests/<run_id>.jsonl     — batch input sent to OpenAI
    raw/<run_id>.jsonl          — raw batch output from OpenAI
    runs/<run_id>.json          — run manifest
    <run_id>_<model>.jsonl      — normalized output (same schema as realtime)
 """
 import argparse
 import hashlib
 import json
 import os
 import shutil
 import sys
 import uuid
 from datetime import datetime, timezone
 from pathlib import Path
@@ -35,9 +31,8 @@ except ImportError:
 # ---------------------------------------------------------------------------
 # Model limits and token estimation
-# Max enqueued tokens across ALL concurrent batches for this model
+# Max enqueued tokens across ALL concurrent batches (docs/openai.md, 2026-05-05).
-# (docs/openai.md pricing table, updated 2026-05-05).
+# Org-tier limits may be lower; use --job to limit submission size if needed.
 # NOTE: your org tier may be lower — if a submit fails, use --limit to reduce chunk size.
 MODEL_LIMITS: dict[str, int] = {
    "gpt-5.5":        900_000,
    "gpt-5.4":        900_000,
@@ -48,8 +43,6 @@ MODEL_LIMITS: dict[str, int] = {
    "gpt-o4-mini":  2_000_000,
 }
 _DEFAULT_TOKEN_LIMIT = 900_000
 # tiktoken encoding per model family; unknown models fall back to o200k_base
 _MODEL_ENCODING: dict[str, str] = {
    "gpt-5.5":       "o200k_base",
    "gpt-5.4":       "o200k_base",
@@ -59,16 +52,11 @@ _MODEL_ENCODING: dict[str, str] = {
    "gpt-4o-mini":   "o200k_base",
    "gpt-o4-mini":   "o200k_base",
 }
 # Leave 10% headroom below the published limit
 _LIMIT_BUFFER = 0.90
 def estimate_tokens(messages: list[dict], model: str) -> int:
-    """Estimate token count for a messages list.
+    """Exact token count via tiktoken; falls back to chars/3 + 4 overhead per message."""
    Uses tiktoken when available (exact for OpenAI models); falls back to
    chars/3 + 4-token overhead per message for unknown/Anthropic models.
    """
    try:
        import tiktoken
        enc = tiktoken.get_encoding(_MODEL_ENCODING.get(model, "o200k_base"))
@@ -80,14 +68,11 @@ def estimate_tokens(messages: list[dict], model: str) -> int:
 def chunk_comments_by_tokens(
    comments: list[dict], forum: dict | None, model: str
 ) -> list[list[dict]]:
-    """Split comments into chunks where each chunk fits under the model token limit."""
+    """Greedy bin-pack comments into chunks that fit under the model TPD limit."""
-    raw_limit = MODEL_LIMITS.get(model, _DEFAULT_TOKEN_LIMIT)
+    token_limit = int(MODEL_LIMITS.get(model, _DEFAULT_TOKEN_LIMIT) * _LIMIT_BUFFER)
    token_limit = int(raw_limit * _LIMIT_BUFFER)
    chunks: list[list[dict]] = []
    current: list[dict] = []
    current_tokens = 0
    for comment in comments:
        messages, _ = build_messages(comment, forum)
        tokens = estimate_tokens(messages, model)
@@ -98,10 +83,8 @@ def chunk_comments_by_tokens(
        else:
            current.append(comment)
            current_tokens += tokens
    if current:
        chunks.append(current)
    return chunks
@@ -114,11 +97,11 @@ PROMPT_VERSION = hashlib.sha256(SYSTEM_PROMPT.encode("utf-8")).hexdigest()[:7]
 def _load_prompt(path: Path) -> None:
    """Re-read a prompt file, updating module-level SYSTEM_PROMPT and PROMPT_VERSION."""
    global SYSTEM_PROMPT, PROMPT_VERSION
    SYSTEM_PROMPT = path.read_text(encoding="utf-8").strip()
    PROMPT_VERSION = hashlib.sha256(SYSTEM_PROMPT.encode("utf-8")).hexdigest()[:7]
 USER_TEMPLATE = """\
 ## Proposed Regulation
 Title: {reg_title}
@@ -141,17 +124,15 @@ MAX_COMMENT_CHARS = 6000
 # ---------------------------------------------------------------------------
 # Directories
-_SCRIPT_DIR  = Path(__file__).parent
+_SCRIPT_DIR = Path(__file__).parent
-REQUESTS_DIR = _SCRIPT_DIR / "requests"
+JOBS_DIR = _SCRIPT_DIR / "jobs"
 RAW_DIR      = _SCRIPT_DIR / "raw"
 RUNS_DIR     = _SCRIPT_DIR / "runs"
 # ---------------------------------------------------------------------------
 # Core functions (importable for tests)
 def load_items(path: Path) -> tuple[dict | None, list[dict]]:
-    """Read a scraped JSONL file. Returns (forum_item_or_None, [comment_items])."""
+    """Read a scraped JSONL. Returns (forum_item_or_None, [comment_items])."""
    forum = None
    comments = []
    with open(path, encoding="utf-8") as f:
@@ -172,7 +153,6 @@ def custom_id_from(comment_id: str) -> str:
 def parse_custom_id(custom_id: str) -> str:
    """Return comment_id from a custom_id string."""
    return custom_id.removeprefix("comment_")
@@ -180,7 +160,6 @@ def build_messages(comment: dict, forum: dict | None) -> tuple[list, bool]:
    """Build OpenAI messages for one comment. Returns (messages, truncated)."""
    reg_title = (forum or {}).get("reg_title", "[unknown]")
    reg_desc  = (forum or {}).get("reg_desc",  "[unknown]")
    body = (comment.get("text") or "").strip()
    truncated = False
    if not body:
@@ -188,7 +167,6 @@ def build_messages(comment: dict, forum: dict | None) -> tuple[list, bool]:
    elif len(body) > MAX_COMMENT_CHARS:
        body = body[:MAX_COMMENT_CHARS] + "... [truncated]"
        truncated = True
    user_text = USER_TEMPLATE.format(
        reg_title=reg_title,
        reg_desc=reg_desc,
@@ -196,7 +174,6 @@ def build_messages(comment: dict, forum: dict | None) -> tuple[list, bool]:
        comment_title=comment.get("title", ""),
        comment_text=body,
    )
    return [
        {"role": "system", "content": SYSTEM_PROMPT},
        {"role": "user",   "content": user_text},
@@ -204,7 +181,6 @@ def build_messages(comment: dict, forum: dict | None) -> tuple[list, bool]:
 def build_batch_request_line(comment: dict, forum: dict | None, model: str) -> dict:
    """Build one line of the batch input JSONL."""
    messages, _ = build_messages(comment, forum)
    return {
        "custom_id": custom_id_from(comment["comment_id"]),
@@ -227,14 +203,9 @@ def normalize_output_line(
    model: str,
    prompt_version: str,
 ) -> dict:
-    """Convert one raw batch output line into a normalized analysis record.
+    """Convert one raw batch output line into a normalized analysis record."""
    comment_lookup: {comment_id: CommentItem dict}
    prompt_version: taken from the run manifest so it reflects what was submitted.
    """
    comment_id = parse_custom_id(raw_line.get("custom_id", ""))
    comment = comment_lookup.get(comment_id, {})
    base = {
        "run_id":         run_id,
        "forum_id":       comment.get("forum_id", ""),
@@ -245,20 +216,16 @@ def normalize_output_line(
        "input_title":    comment.get("title", ""),
        "truncated":      len(comment.get("text") or "") > MAX_COMMENT_CHARS,
    }
    # Check for outer-level batch error (e.g. batch_expired)
    if raw_line.get("error"):
        err = raw_line["error"]
        err_msg = err.get("message", str(err)) if isinstance(err, dict) else str(err)
        return {**base, "stance": None, "stance_confidence": None,
                "stance_rationale": None, "tone": None, "tags": None, "error": err_msg}
    response = raw_line.get("response") or {}
    if response.get("status_code") != 200:
        return {**base, "stance": None, "stance_confidence": None,
                "stance_rationale": None, "tone": None, "tags": None,
                "error": f"status {response.get('status_code')}"}
    try:
        content = response["body"]["choices"][0]["message"]["content"]
        data = json.loads(content)
@@ -270,278 +237,372 @@ def normalize_output_line(
                "stance_rationale": None, "tone": None, "tags": None, "error": str(exc)}
-def make_manifest(
+# ---------------------------------------------------------------------------
-    run_id: str,
+# Job directory management
-    input_filename: str,
+
-    input_sha256: str,
+
-    model: str,
+def _next_job_dir(stem: str) -> Path:
-    batch_id: str,
+    base = stem[:8]
-    records_submitted: int,
+    i = 1
-    request_filename: str,
+    while (JOBS_DIR / f"{base}-{i}").exists():
-) -> dict:
+        i += 1
-    return {
+    return JOBS_DIR / f"{base}-{i}"
-        "run_id":                 run_id,
+
-        "input_filename":         input_filename,
+
-        "input_sha256":           input_sha256,
+def _latest_job_dir() -> Path:
-        "prompt_hash":            PROMPT_VERSION,
+    if not JOBS_DIR.exists():
-        "model":                  model,
+        sys.exit(f"No jobs directory found. Run 'create' first.")
-        "batch_id":               batch_id,
+    status_files = list(JOBS_DIR.glob("*/status.json"))
-        "records_submitted":      records_submitted,
+    if not status_files:
-        "records_completed":      None,
+        sys.exit(f"No jobs found in {JOBS_DIR}. Run 'create' first.")
-        "records_failed":         None,
+    return max(status_files, key=lambda p: p.stat().st_mtime).parent
-        "request_filename":       request_filename,
+
-        "raw_output_filename":    None,
+
-        "normalized_output_filename": None,
+def _resolve_job_dir(args) -> Path:
-        "created_at":             datetime.now(timezone.utc).isoformat(),
+    if getattr(args, "dir", None):
-        "completed_at":           None,
+        d = Path(args.dir)
        if not d.is_absolute():
            d = JOBS_DIR / d
        if not d.exists():
            sys.exit(f"Job directory not found: {d}")
        return d
    return _latest_job_dir()
 def load_status(job_dir: Path) -> dict:
    return json.loads((job_dir / "status.json").read_text(encoding="utf-8"))
 def save_status(status: dict, job_dir: Path) -> None:
    (job_dir / "status.json").write_text(
        json.dumps(status, indent=2, ensure_ascii=False), encoding="utf-8"
    )
 def _find_next_eligible_job(jobs: list[dict]) -> tuple[dict | None, str | None]:
    """Return (next_pending_job, None) or (None, warning_message).
    A job is eligible when it is 'pending' and either it is the first job
    or its predecessor is 'completed'.
    """
    for j in jobs:
        if j["status"] != "pending":
            continue
        if j["job_num"] == 1:
            return j, None
        prev = next(p for p in jobs if p["job_num"] == j["job_num"] - 1)
        if prev["status"] == "completed":
            return j, None
        if prev["status"] in ("submitted", "in_progress", "validating", "finalizing"):
            return None, (
                f"Job {prev['job_num']} is '{prev['status']}'. "
                f"Wait for it to complete before submitting job {j['job_num']}."
            )
    return None, None
 # ---------------------------------------------------------------------------
 # Subcommand: create
 def cmd_create(args) -> None:
    report_path = Path(args.report)
    if not report_path.exists():
        sys.exit(f"Report not found: {report_path}")
    report = json.loads(report_path.read_text(encoding="utf-8"))
    if args.model not in report or not isinstance(report[args.model], dict):
        available = [k for k in report if isinstance(report.get(k), dict)]
        sys.exit(f"Model '{args.model}' not in report. Available: {', '.join(available)}")
    prompt_path = Path(report["prompt"])
    if not prompt_path.exists():
        sys.exit(f"Prompt file not found: {prompt_path}")
    _load_prompt(prompt_path)
    input_path = Path(report["input_file"])
    if not input_path.exists():
        sys.exit(f"Input file not found: {input_path}")
    forum, comments = load_items(input_path)
    if not comments:
        sys.exit("No comment items found in input file.")
    chunks = chunk_comments_by_tokens(comments, forum, args.model)
    stem = input_path.stem[:8]
    job_dir = _next_job_dir(stem)
    JOBS_DIR.mkdir(parents=True, exist_ok=True)
    job_dir.mkdir()
    shutil.copy2(input_path, job_dir / "forum.jsonl")
    shutil.copy2(prompt_path, job_dir / "prompt.txt")
    shutil.copy2(report_path, job_dir / "report.json")
    jobs_meta = []
    for i, chunk in enumerate(chunks, start=1):
        req_path = job_dir / f"job{i}-input.jsonl"
        with open(req_path, "w", encoding="utf-8") as f:
            for comment in chunk:
                f.write(json.dumps(build_batch_request_line(comment, forum, args.model),
                                   ensure_ascii=False) + "\n")
        jobs_meta.append({
            "job_num": i,
            "run_id": str(uuid.uuid4()),
            "status": "pending",
            "batch_id": None,
            "records_submitted": len(chunk),
            "records_completed": None,
            "records_failed": None,
            "submitted_at": None,
            "completed_at": None,
        })
    model_info = report[args.model]
    status = {
        "model": args.model,
        "prompt_hash": report["prompt_hash"],
        "input_file": str(input_path),
        "input_sha256": report["input_sha256"],
        "total_comments": report["total_comments"],
        "input_tokens": report["input_tokens"],
        "est_queue_days": model_info["est_queue_days"],
        "cost_$": model_info["cost_$"],
        "total_jobs": len(chunks),
        "jobs": jobs_meta,
    }
    save_status(status, job_dir)
-
+    print(f"Created: {job_dir.name}")
-def _latest_run_id() -> str:
+    print(f"  {len(chunks)} job(s)  |  {len(comments)} comments  |  model: {args.model}")
-    """Return the run_id of the most recently saved manifest, or exit if none found."""
+    print(f"\nNext:  python analysis/gpt4o/analysis_batch.py submit")
    runs = list(RUNS_DIR.glob("*.json")) if RUNS_DIR.exists() else []
    if not runs:
        sys.exit(f"No runs found in {RUNS_DIR}. Submit a batch first.")
    latest = max(runs, key=lambda p: p.stat().st_mtime)
    return latest.stem
 def load_manifest(run_id: str) -> dict:
    path = RUNS_DIR / f"{run_id}.json"
    return json.loads(path.read_text(encoding="utf-8"))
 def save_manifest(manifest: dict) -> None:
    RUNS_DIR.mkdir(parents=True, exist_ok=True)
    path = RUNS_DIR / f"{manifest['run_id']}.json"
    path.write_text(json.dumps(manifest, indent=2, ensure_ascii=False), encoding="utf-8")
 # ---------------------------------------------------------------------------
 # Subcommand: submit
 def _submit_chunk(
    chunk: list[dict],
    forum: dict | None,
    input_path: Path,
    input_sha256: str,
    model: str,
    client,
    chunk_index: int,
    total_chunks: int,
 ) -> str:
    """Upload and submit one chunk of comments. Returns the run_id."""
    import uuid
    run_id = str(uuid.uuid4())
    label = f"chunk {chunk_index + 1}/{total_chunks}" if total_chunks > 1 else "single batch"
-    REQUESTS_DIR.mkdir(parents=True, exist_ok=True)
+def cmd_submit(args, client) -> None:
-    request_path = REQUESTS_DIR / f"{run_id}.jsonl"
+    job_dir = _resolve_job_dir(args)
-    with open(request_path, "w", encoding="utf-8") as f:
+    status = load_status(job_dir)
-        for comment in chunk:
+    jobs = status["jobs"]
            line = build_batch_request_line(comment, forum, model)
            f.write(json.dumps(line, ensure_ascii=False) + "\n")
-    print(f"[{label}] Wrote {len(chunk)} requests → {request_path}", file=sys.stderr)
+    if args.job:
        target = next((j for j in jobs if j["job_num"] == args.job), None)
        if target is None:
            sys.exit(f"Job {args.job} not found in {job_dir.name}.")
        if target["status"] != "pending":
            sys.exit(f"Job {args.job} is already '{target['status']}' — cannot resubmit.")
        if target["job_num"] > 1:
            prev = next(p for p in jobs if p["job_num"] == target["job_num"] - 1)
            if prev["status"] != "completed":
                sys.exit(
                    f"Cannot submit job {target['job_num']}: "
                    f"job {prev['job_num']} is '{prev['status']}' (must be 'completed')."
                )
    else:
        target, warning = _find_next_eligible_job(jobs)
        if warning:
            print(warning, file=sys.stderr)
            sys.exit(1)
        if target is None:
            all_done = all(j["status"] == "completed" for j in jobs)
            print("All jobs completed." if all_done else "No pending jobs eligible for submission.")
            return
-    with open(request_path, "rb") as f:
+    n = target["job_num"]
    req_path = job_dir / f"job{n}-input.jsonl"
    print(f"Submitting job {n}/{status['total_jobs']} ({target['records_submitted']} comments) ...",
          file=sys.stderr)
    with open(req_path, "rb") as f:
        uploaded = client.files.create(file=f, purpose="batch")
    print(f"[{label}] Uploaded: {uploaded.id}", file=sys.stderr)
    batch = client.batches.create(
        input_file_id=uploaded.id,
        endpoint="/v1/chat/completions",
        completion_window="24h",
-        metadata={"run_id": run_id, "input_filename": str(input_path)},
+        metadata={"run_id": target["run_id"], "job_dir": job_dir.name},
    )
    print(f"[{label}] Batch created: {batch.id}  status={batch.status}", file=sys.stderr)
    manifest = make_manifest(
        run_id=run_id,
        input_filename=str(input_path),
        input_sha256=input_sha256,
        model=model,
        batch_id=batch.id,
        records_submitted=len(chunk),
        request_filename=str(request_path),
    )
    save_manifest(manifest)
    return run_id
 def cmd_submit(args, client) -> None:
    _load_prompt(Path(args.prompt))
    print(f"Prompt: {args.prompt}  (version {PROMPT_VERSION})", file=sys.stderr)
    input_path = Path(args.input)
    if not input_path.exists():
        sys.exit(f"File not found: {input_path}")
    print(f"Reading {input_path} ...", file=sys.stderr)
    forum, comments = load_items(input_path)
    if not comments:
        sys.exit("No comment items found in input file.")
    if forum is None:
        print("Warning: no ForumItem found — regulation context will be [unknown].", file=sys.stderr)
    if args.limit:
        comments = comments[:args.limit]
        print(f"Limiting to {len(comments)} comments (--limit {args.limit}).", file=sys.stderr)
    token_limit = int(MODEL_LIMITS.get(args.model, _DEFAULT_TOKEN_LIMIT) * _LIMIT_BUFFER)
    chunks = chunk_comments_by_tokens(comments, forum, args.model)
    total = len(chunks)
    print(
        f"Model: {args.model}  token limit: {token_limit:,}  "
        f"→ {len(comments)} comments split into {total} chunk(s).",
        file=sys.stderr,
    )
-    input_sha256 = hashlib.sha256(input_path.read_bytes()).hexdigest()
+    target["status"] = "submitted"
    target["batch_id"] = batch.id
    target["submitted_at"] = datetime.now(timezone.utc).isoformat()
    save_status(status, job_dir)
-    # Submit only the first chunk — the enqueued token limit is a TOTAL across all
+    print(f"Job {n} submitted: {batch.id}  ({batch.status})")
-    # concurrent batches, so stacking multiple submissions will exceed the quota.
+    print(f"  python analysis/gpt4o/analysis_batch.py status")
    # Wait for each batch to complete before submitting the next.
    run_id = _submit_chunk(chunks[0], forum, input_path, input_sha256, args.model, client, 0, total)
    print(f"\nBatch 1/{total} submitted.", file=sys.stderr)
    print(f"  status:   python analysis/gpt4o/analysis_batch.py status {run_id}", file=sys.stderr)
    print(f"  download: python analysis/gpt4o/analysis_batch.py download {run_id}", file=sys.stderr)
    if total > 1:
        remaining = sum(len(c) for c in chunks[1:])
        print(f"\n{total - 1} more chunk(s) remaining ({remaining} comments).", file=sys.stderr)
        print("After this batch completes and is downloaded, rerun submit with --limit to get the next chunk:", file=sys.stderr)
        offset = len(chunks[0])
        for idx, chunk in enumerate(chunks[1:], start=2):
            print(f"  chunk {idx}/{total}: comments {offset}–{offset + len(chunk) - 1}", file=sys.stderr)
            offset += len(chunk)
    print(run_id)  # stdout for scripting
 # ---------------------------------------------------------------------------
 # Subcommand: status
 def cmd_status(args, client) -> None:
-    run_id = args.run_id or _latest_run_id()
+    job_dir = _resolve_job_dir(args)
-    if not args.run_id:
+    status = load_status(job_dir)
-        print(f"(using latest run: {run_id})", file=sys.stderr)
+    jobs = status["jobs"]
    manifest = load_manifest(run_id)
    batch = client.batches.retrieve(manifest["batch_id"])
-    counts = batch.request_counts
+    job_filter = getattr(args, "job", None)
    print(f"status:     {batch.status}")
    print(f"completed:  {counts.completed}/{counts.total}")
    print(f"failed:     {counts.failed}")
-    manifest["records_completed"] = counts.completed
+    for job in jobs:
-    manifest["records_failed"]    = counts.failed
+        if job_filter is not None and job["job_num"] != job_filter:
-    save_manifest(manifest)
+            continue
        if not job["batch_id"]:
            continue
        if job["status"] in ("completed", "failed", "expired", "cancelled", "pending"):
            continue
        batch = client.batches.retrieve(job["batch_id"])
        counts = batch.request_counts
        if batch.status == "completed":
            job["status"] = "completed"
            if batch.completed_at:
                job["completed_at"] = datetime.fromtimestamp(
                    batch.completed_at, tz=timezone.utc
                ).isoformat()
        elif batch.status in ("failed", "expired", "cancelled"):
            job["status"] = batch.status
        else:
            job["status"] = batch.status
        job["records_completed"] = counts.completed
        job["records_failed"] = counts.failed
-    if batch.status == "completed":
+    save_status(status, job_dir)
-        print(f"\nReady to download. Run:")
+
-        print(f"  python analysis/gpt4o/analysis_batch.py download {run_id}")
+    target_jobs = jobs if not job_filter else [j for j in jobs if j["job_num"] == job_filter]
    print(f"Dir: {job_dir.name}  |  Model: {status['model']}  |  {status['total_jobs']} job(s)")
    print(f"{'Job':<5} {'Status':<14} {'Records':>12}  {'Submitted':<20}  {'Completed':<20}")
    print("-" * 76)
    for j in target_jobs:
        rec = (f"{j['records_completed']}/{j['records_submitted']}"
               if j["records_completed"] is not None else f"-/{j['records_submitted']}")
        sub  = (j["submitted_at"]  or "-")[:19]
        done = (j["completed_at"] or "-")[:19]
        print(f"{j['job_num']:<5} {j['status']:<14} {rec:>12}  {sub:<20}  {done:<20}")
 # ---------------------------------------------------------------------------
 # Subcommand: download
 def cmd_download(args, client) -> None:
-    run_id = args.run_id or _latest_run_id()
+    job_dir = _resolve_job_dir(args)
    if not args.run_id:
        print(f"(using latest run: {run_id})", file=sys.stderr)
    manifest = load_manifest(run_id)
    batch = client.batches.retrieve(manifest["batch_id"])
-    if batch.status != "completed":
+    # Refresh status before deciding what to download
-        sys.exit(f"Batch not complete yet (status={batch.status}). Run 'status' to check.")
+    cmd_status(args, client)
    status = load_status(job_dir)
    jobs = status["jobs"]
-    run_id    = manifest["run_id"]
+    job_filter = getattr(args, "job", None)
-    model     = manifest["model"]
+    if job_filter:
-    model_slug = model.replace("/", "-")
+        candidates = [j for j in jobs if j["job_num"] == job_filter]
    else:
        candidates = [
            j for j in jobs
            if j["status"] == "completed"
            and not (job_dir / f"job{j['job_num']}-output.jsonl").exists()
        ]
-    # Download raw output
+    if not candidates:
-    RAW_DIR.mkdir(parents=True, exist_ok=True)
+        print("No completed jobs pending download.", file=sys.stderr)
-    raw_path = RAW_DIR / f"{run_id}.jsonl"
+        return
    raw_text = client.files.content(batch.output_file_id).text
    raw_path.write_text(raw_text, encoding="utf-8")
    print(f"Raw output → {raw_path}", file=sys.stderr)
-    # Build comment lookup from original input for reconciliation
+    _, all_comments = load_items(job_dir / "forum.jsonl")
-    input_path = Path(manifest["input_filename"])
+    comment_lookup = {c["comment_id"]: c for c in all_comments}
    _, comments = load_items(input_path)
    comment_lookup = {c["comment_id"]: c for c in comments}
-    # Normalize
+    for job in candidates:
-    completed_at = datetime.now(timezone.utc).isoformat()
+        n = job["job_num"]
    if batch.completed_at:
        completed_at = datetime.fromtimestamp(batch.completed_at, tz=timezone.utc).isoformat()
-    normalized_path = _SCRIPT_DIR / f"{run_id}_{model_slug}.jsonl"
+        if job["status"] != "completed":
-    n_ok = n_err = 0
+            print(f"Job {n} not yet completed ('{job['status']}'), skipping.", file=sys.stderr)
-    with open(normalized_path, "w", encoding="utf-8") as out:
+            continue
        for line in raw_text.splitlines():
            if not line.strip():
                continue
            raw_line = json.loads(line)
            record = normalize_output_line(raw_line, comment_lookup, run_id, completed_at, model, manifest["prompt_hash"])
            out.write(json.dumps(record, ensure_ascii=False) + "\n")
            if record["error"]:
                n_err += 1
            else:
                n_ok += 1
-    print(f"Normalized → {normalized_path}  ({n_ok} ok, {n_err} errors)", file=sys.stderr)
+        batch = client.batches.retrieve(job["batch_id"])
-    manifest["records_completed"]         = n_ok
+        if not batch.output_file_id:
-    manifest["records_failed"]            = n_err
+            print(f"Job {n}: no output file available from OpenAI.", file=sys.stderr)
-    manifest["raw_output_filename"]       = str(raw_path)
+            continue
-    manifest["normalized_output_filename"] = str(normalized_path)
+
-    manifest["completed_at"]              = completed_at
+        raw_text = client.files.content(batch.output_file_id).text
-    save_manifest(manifest)
+        raw_path = job_dir / f"job{n}-output-raw.jsonl"
-    print(f"Manifest updated → {RUNS_DIR / run_id}.json", file=sys.stderr)
+        raw_path.write_text(raw_text, encoding="utf-8")
        print(f"Job {n} raw → {raw_path.name}", file=sys.stderr)
        if batch.error_file_id:
            err_text = client.files.content(batch.error_file_id).text
            err_path = job_dir / f"job{n}-errors.jsonl"
            err_path.write_text(err_text, encoding="utf-8")
            n_err_lines = sum(1 for line in err_text.splitlines() if line.strip())
            print(f"Job {n} errors → {err_path.name}  ({n_err_lines} lines)", file=sys.stderr)
        completed_at = job.get("completed_at") or datetime.now(timezone.utc).isoformat()
        norm_path = job_dir / f"job{n}-output.jsonl"
        n_ok = n_err = 0
        with open(norm_path, "w", encoding="utf-8") as out:
            for line in raw_text.splitlines():
                if not line.strip():
                    continue
                record = normalize_output_line(
                    json.loads(line), comment_lookup,
                    job["run_id"], completed_at,
                    status["model"], status["prompt_hash"],
                )
                out.write(json.dumps(record, ensure_ascii=False) + "\n")
                if record["error"]:
                    n_err += 1
                else:
                    n_ok += 1
        print(f"Job {n} normalized → {norm_path.name}  ({n_ok} ok, {n_err} errors)", file=sys.stderr)
        job["records_completed"] = n_ok
        job["records_failed"] = n_err
    save_status(status, job_dir)
 # ---------------------------------------------------------------------------
 # CLI
 def _add_common_args(p: argparse.ArgumentParser) -> None:
    p.add_argument("--job", type=int, default=None, metavar="N",
                   help="Job number within the run (default: auto)")
    p.add_argument("--dir", default=None, metavar="DIR",
                   help="Job directory name or path (default: most recent)")
 def main() -> None:
    load_dotenv()
    api_key = os.environ.get("OPENAI_API_KEY")
    if not api_key:
        sys.exit("OPENAI_API_KEY not set. Create a .env file or export the variable.")
    parser = argparse.ArgumentParser(
-        description="Public comment batch analysis pipeline.",
+        description="Batch analysis job runner.",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog=__doc__,
    )
    sub = parser.add_subparsers(dest="command", required=True)
-    p_submit = sub.add_parser("submit", help="Build and submit a batch job")
+    p_create = sub.add_parser("create", help="Create job directory from tokenizer report")
-    p_submit.add_argument("input", help="Path to scraped JSONL file")
+    p_create.add_argument("report", help="Path to report.json from tokenizer.py")
-    p_submit.add_argument("--model", default="gpt-4o", help="OpenAI model (default: gpt-4o)")
+    p_create.add_argument("--model", required=True, help="Model (e.g. gpt-4o-mini)")
    p_submit.add_argument(
        "--prompt",
        default=str(_DEFAULT_PROMPT_FILE),
        help="Path to system prompt file (default: analysis/prompt-1.txt)",
    )
    p_submit.add_argument(
        "--limit", type=int, default=None, metavar="N",
        help="Submit only the first N comments (useful for staying under token quota)",
    )
-    p_status = sub.add_parser("status", help="Check batch status")
+    p_submit = sub.add_parser("submit", help="Submit next eligible job")
-    p_status.add_argument("run_id", nargs="?", default=None,
+    _add_common_args(p_submit)
                          help="run_id from submit (default: most recent run)")
-    p_download = sub.add_parser("download", help="Download and normalize completed batch")
+    p_status = sub.add_parser("status", help="Check job status")
-    p_download.add_argument("run_id", nargs="?", default=None,
+    _add_common_args(p_status)
-                            help="run_id from submit (default: most recent run)")
+
    p_download = sub.add_parser("download", help="Download and normalize completed jobs")
    _add_common_args(p_download)
    args = parser.parse_args()
    if args.command == "create":
        cmd_create(args)
        return
    api_key = os.environ.get("OPENAI_API_KEY")
    if not api_key:
        sys.exit("OPENAI_API_KEY not set. Create a .env file or export the variable.")
    client = openai.OpenAI(api_key=api_key)
    if args.command == "submit":
--- a/analysis/gpt4o/tokenizer.py
+++ b/analysis/gpt4o/tokenizer.py
@@ -0,0 +1,141 @@
 #!/usr/bin/env python3
 """
 tokenizer.py — estimate token usage and cost for a batch analysis run.
 Usage:
    python analysis/gpt4o/tokenizer.py output/f452.jsonl [--prompt analysis/prompt-1.txt]
 Prints a per-model comparison table and writes report.json next to the input file.
 Run this before analysis_batch.py create.
 """
 import argparse
 import hashlib
 import json
 import math
 import sys
 from pathlib import Path
 sys.path.insert(0, str(Path(__file__).parent))
 import analysis_batch as _ab
 # Input pricing ($/1M tokens, batch API) — from docs/openai.md, updated 2026-05-05.
 # Add Anthropic/other models here when needed; only models with a LIMITS entry are reported.
 MODEL_PRICING: dict[str, float] = {
    "gpt-5.5":       2.50,
    "gpt-5.4":       1.25,
    "gpt-5.4-mini":  0.375,
    "gpt-5.4-nano":  0.10,
    "gpt-4o":        1.25,
    "gpt-4o-mini":   0.075,
    "gpt-o4-mini":   0.55,
 }
 def compute_report(
    comments: list[dict],
    forum: dict | None,
    prompt_hash: str,
    input_file: str,
    input_sha256: str,
    prompt_file: str,
 ) -> dict:
    """Compute token estimate and per-model job/cost/time breakdown."""
    # Use gpt-4o encoding as the canonical estimator (same for all current models)
    total_tokens = sum(
        _ab.estimate_tokens(_ab.build_messages(c, forum)[0], "gpt-4o")
        for c in comments
    )
    report: dict = {
        "prompt": prompt_file,
        "prompt_hash": prompt_hash,
        "input_file": input_file,
        "input_sha256": input_sha256,
        "total_comments": len(comments),
        "input_tokens": total_tokens,
    }
    for model, tpd in _ab.MODEL_LIMITS.items():
        effective_tpd = int(tpd * _ab._LIMIT_BUFFER)
        jobs = math.ceil(total_tokens / effective_tpd)
        cost = round(total_tokens / 1_000_000 * MODEL_PRICING.get(model, 0.0), 4)
        est_days = round(total_tokens / tpd, 2)
        report[model] = {"jobs": jobs, "cost_$": cost, "est_queue_days": est_days}
    return report
 def print_table(report: dict) -> None:
    """Print a human-readable model comparison table to stdout."""
    print(f"\nInput:    {report['input_file']}")
    print(f"Comments: {report['total_comments']:,}")
    print(f"Tokens:   {report['input_tokens']:,}")
    print(f"Prompt:   {report['prompt']}  (hash: {report['prompt_hash']})")
    print()
    # Cheapest model that fits in one job
    single_job_models = [m for m in _ab.MODEL_LIMITS if report.get(m, {}).get("jobs") == 1]
    best = (min(single_job_models, key=lambda m: report[m]["cost_$"])
            if single_job_models else None)
    print(f"{'Model':<15} {'Jobs':>5}  {'Cost ($)':>9}  {'Est days':>9}  {'Note'}")
    print("-" * 62)
    for model in _ab.MODEL_LIMITS:
        if model not in report or not isinstance(report[model], dict):
            continue
        m = report[model]
        note = "<-- recommended" if model == best else ""
        print(f"{model:<15} {m['jobs']:>5}  {m['cost_$']:>9.4f}  {m['est_queue_days']:>9.2f}  {note}")
    print()
 def main() -> None:
    _default_prompt = Path(__file__).parent.parent / "prompt-1.txt"
    parser = argparse.ArgumentParser(description="Estimate batch token usage and cost.")
    parser.add_argument("input", help="Scraped JSONL file")
    parser.add_argument(
        "--prompt",
        default=str(_default_prompt),
        help=f"System prompt file (default: {_default_prompt.name})",
    )
    args = parser.parse_args()
    input_path = Path(args.input)
    if not input_path.exists():
        sys.exit(f"File not found: {input_path}")
    prompt_path = Path(args.prompt)
    if not prompt_path.exists():
        sys.exit(f"Prompt file not found: {prompt_path}")
    prompt_text = prompt_path.read_text(encoding="utf-8").strip()
    prompt_hash = hashlib.sha256(prompt_text.encode("utf-8")).hexdigest()[:7]
    # Ensure build_messages uses the specified prompt
    _ab._load_prompt(prompt_path)
    forum, comments = _ab.load_items(input_path)
    if not comments:
        sys.exit("No comment items found.")
    if forum is None:
        print("Warning: no ForumItem — token estimates may be slightly low.", file=sys.stderr)
    input_sha256 = hashlib.sha256(input_path.read_bytes()).hexdigest()
    report = compute_report(
        comments, forum, prompt_hash,
        str(input_path), input_sha256, str(prompt_path),
    )
    print_table(report)
    out_path = input_path.parent / "report.json"
    out_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
    print(f"Report written to: {out_path}")
    print(f"\nNext:  python analysis/gpt4o/analysis_batch.py create {out_path} --model <model>")
 if __name__ == "__main__":
    main()
--- a/docs/tasks.org
+++ b/docs/tasks.org
@@ -158,7 +158,7 @@ forum_id_input,comment_id,title,text,date,author,stance,stance_confidence,stance
 - tests: 23 passing (pytest tests/analysis_gpt4o_batch.py), 51 total across suite
 - datetime: [2026-05-06 Wed 08:55]
-* [ ] t1.2.3: batch job refactor
+* [X] t1.2.3: batch job refactor
 This task encompasses intent and fixes for 1.2.1 and 1.2.2.
 batch processing should  be a resumable job queue, not a one-shot script. the user should not need to remember offsets, completed chunks, failed batches, or which comments remain.
 ** Acceptance Criteria
@@ -200,6 +200,46 @@ batch processing should  be a resumable job queue, not a one-shot script. the us
   - resume from status.json
   - remaining-comment detection
 ** notes
 - analysis/gpt4o/tokenizer.py: new standalone script; imports analysis_batch for MODEL_LIMITS, estimate_tokens, build_messages. Reads input JSONL + prompt, computes per-model jobs/cost/time table, writes report.json to input file's directory. MODEL_PRICING dict lives here (not in analysis_batch).
 - analysis/gpt4o/analysis_batch.py: fully rewritten with four subcommands: create, submit, status, download. No longer uses REQUESTS_DIR / RAW_DIR / RUNS_DIR.
 - Job directories: analysis/gpt4o/jobs/<stem[:8]>-N/ (e.g. f452-1). Each run is self-contained: forum.jsonl, prompt.txt, report.json, jobN-input.jsonl, jobN-output-raw.jsonl, jobN-output.jsonl, jobN-errors.jsonl.
 - status.json: tracks all jobs with pending/submitted/in_progress/completed/failed states. Updated by submit, status, download.
 - _find_next_eligible_job: pure function for testability. Returns (next_pending_job, None) or (None, warning). Blocks submission if previous job is in_progress/submitted.
 - create: no API key required. Reads report.json, re-chunks comments, writes all jobN-input.jsonl files, writes status.json.
 - submit: uploads jobN-input.jsonl to Files API, creates batch, updates status.json to 'submitted'. Will not stack batches.
 - status: retrieves batch from OpenAI, updates status.json counts and status.
 - download: auto-runs status first, downloads output_file_id → jobN-output-raw.jsonl, error_file_id → jobN-errors.jsonl, normalizes → jobN-output.jsonl. Updates status.json.
 - tests/test_tokenizer.py: 15 tests for compute_report schema, cost/time calculation, MODEL_PRICING coverage, print_table output, report.json round-trip.
 *** usage
 #+begin_src sh
 # 1. estimate tokens and cost
 python analysis/gpt4o/tokenizer.py output/f452.jsonl --prompt analysis/prompt-1.txt
 # writes output/report.json
 # 2. create job directory (no api key needed)
 python analysis/gpt4o/analysis_batch.py create output/report.json --model gpt-4o-mini
 # creates analysis/gpt4o/jobs/f452-1/
 # 3. submit first job
 python analysis/gpt4o/analysis_batch.py submit
 # 4. check status (repeat until completed)
 python analysis/gpt4o/analysis_batch.py status
 # 5. download and normalize
 python analysis/gpt4o/analysis_batch.py download
 # 6. submit next job (if multi-job run), then repeat 4-5
 python analysis/gpt4o/analysis_batch.py submit
 #+end_src
 ** evidence
 - commit:
 - tests: passing (pytest tests/analysis_gpt4o_batch.py tests/test_tokenizer.py)
 - datetime: [2026-05-05 Tue]
 * === Backlog ===
 * [ ] X: analysis validation view
 create a lightweight validation script that joins raw comments to normalized analysis output and writes a human-reviewable csv.
--- a/tests/analysis_gpt4o_batch.py
+++ b/tests/analysis_gpt4o_batch.py
@@ -75,9 +75,24 @@ ANALYZED_AT = "2026-05-05T18:00:00+00:00"
 RUN_ID = "test-run-id-123"
 MODEL = "gpt-4o"
 # Minimal status.json for testing job logic
 def _make_status(jobs_override=None):
    jobs = jobs_override or [
        {"job_num": 1, "run_id": "r1", "status": "pending", "batch_id": None,
         "records_submitted": 60, "records_completed": None, "records_failed": None,
         "submitted_at": None, "completed_at": None},
    ]
    return {
        "model": "gpt-4o-mini", "prompt_hash": "abc1234",
        "input_file": "output/f452.jsonl", "input_sha256": "sha",
        "total_comments": 100, "input_tokens": 50_000,
        "est_queue_days": 0.025, "cost_$": 0.01,
        "total_jobs": len(jobs), "jobs": jobs,
    }
 # ---------------------------------------------------------------------------
-# Prompt versioning (batch reads the same prompt file)
+# Prompt versioning
 def test_prompt_version_is_7_hex_chars():
    assert len(bt.PROMPT_VERSION) == 7
@@ -206,52 +221,6 @@ def test_normalize_unknown_comment_id():
    assert record["input_title"] == ""
 # ---------------------------------------------------------------------------
 # Manifest
 def test_make_manifest_all_keys():
    m = bt.make_manifest(
        run_id=RUN_ID,
        input_filename="output/forum452.jsonl",
        input_sha256="abc123",
        model="gpt-4o",
        batch_id="batch_xyz",
        records_submitted=100,
        request_filename="analysis/gpt4o/requests/test-run-id-123.jsonl",
    )
    required = {
        "run_id", "input_filename", "input_sha256", "prompt_hash", "model",
        "batch_id", "records_submitted", "records_completed", "records_failed",
        "request_filename", "raw_output_filename", "normalized_output_filename",
        "created_at", "completed_at",
    }
    assert required == set(m.keys())
 def test_make_manifest_initial_nulls():
    m = bt.make_manifest(
        run_id=RUN_ID, input_filename="f", input_sha256="s",
        model="gpt-4o", batch_id="b", records_submitted=10, request_filename="r",
    )
    assert m["records_completed"] is None
    assert m["records_failed"] is None
    assert m["raw_output_filename"] is None
    assert m["normalized_output_filename"] is None
    assert m["completed_at"] is None
    assert m["prompt_hash"] == bt.PROMPT_VERSION
 def test_manifest_save_load_roundtrip(tmp_path, monkeypatch):
    monkeypatch.setattr(bt, "RUNS_DIR", tmp_path)
    m = bt.make_manifest(
        run_id=RUN_ID, input_filename="f", input_sha256="s",
        model="gpt-4o", batch_id="b", records_submitted=42, request_filename="r",
    )
    bt.save_manifest(m)
    loaded = bt.load_manifest(RUN_ID)
    assert loaded == m
 # ---------------------------------------------------------------------------
 # estimate_tokens
@@ -309,3 +278,112 @@ def test_chunk_preserves_all_comments(monkeypatch):
 def test_model_limits_has_required_models():
    for model in ("gpt-4o", "gpt-4o-mini", "gpt-5.4", "gpt-5.4-mini", "gpt-o4-mini"):
        assert model in bt.MODEL_LIMITS, f"{model} missing from MODEL_LIMITS"
 # ---------------------------------------------------------------------------
 # status.json helpers
 def test_status_save_load_roundtrip(tmp_path):
    status = _make_status()
    bt.save_status(status, tmp_path)
    loaded = bt.load_status(tmp_path)
    assert loaded == status
 # ---------------------------------------------------------------------------
 # _find_next_eligible_job
 def test_find_next_eligible_job_first_job_pending():
    jobs = _make_status()["jobs"]
    target, warning = bt._find_next_eligible_job(jobs)
    assert target["job_num"] == 1
    assert warning is None
 def test_find_next_eligible_job_after_completed():
    jobs = [
        {"job_num": 1, "status": "completed", "batch_id": "b1",
         "records_submitted": 60, "records_completed": 60, "records_failed": 0,
         "submitted_at": "t", "completed_at": "t", "run_id": "r1"},
        {"job_num": 2, "status": "pending", "batch_id": None,
         "records_submitted": 40, "records_completed": None, "records_failed": None,
         "submitted_at": None, "completed_at": None, "run_id": "r2"},
    ]
    target, warning = bt._find_next_eligible_job(jobs)
    assert target["job_num"] == 2
    assert warning is None
 def test_find_next_eligible_job_blocked_by_in_progress():
    jobs = [
        {"job_num": 1, "status": "in_progress", "batch_id": "b1",
         "records_submitted": 60, "records_completed": None, "records_failed": None,
         "submitted_at": "t", "completed_at": None, "run_id": "r1"},
        {"job_num": 2, "status": "pending", "batch_id": None,
         "records_submitted": 40, "records_completed": None, "records_failed": None,
         "submitted_at": None, "completed_at": None, "run_id": "r2"},
    ]
    target, warning = bt._find_next_eligible_job(jobs)
    assert target is None
    assert warning is not None
    assert "in_progress" in warning
 def test_find_next_eligible_job_all_completed():
    jobs = [
        {"job_num": 1, "status": "completed", "batch_id": "b1",
         "records_submitted": 60, "records_completed": 60, "records_failed": 0,
         "submitted_at": "t", "completed_at": "t", "run_id": "r1"},
    ]
    target, warning = bt._find_next_eligible_job(jobs)
    assert target is None
    assert warning is None
 def test_resume_from_status_json(tmp_path):
    """Reload a status.json with one completed job and find the next pending job."""
    jobs = [
        {"job_num": 1, "run_id": "r1", "status": "completed", "batch_id": "b1",
         "records_submitted": 60, "records_completed": 58, "records_failed": 2,
         "submitted_at": "2026-05-06T10:00:00+00:00", "completed_at": "2026-05-06T11:00:00+00:00"},
        {"job_num": 2, "run_id": "r2", "status": "pending", "batch_id": None,
         "records_submitted": 40, "records_completed": None, "records_failed": None,
         "submitted_at": None, "completed_at": None},
    ]
    bt.save_status(_make_status(jobs), tmp_path)
    loaded = bt.load_status(tmp_path)
    target, warning = bt._find_next_eligible_job(loaded["jobs"])
    assert target["job_num"] == 2
    assert warning is None
 # ---------------------------------------------------------------------------
 # normalize: out-of-order and duplicate custom_id
 def test_out_of_order_output_reconciled_by_custom_id():
    """Raw lines processed in any order are mapped to the correct comment."""
    c2 = {**COMMENT_ITEM, "comment_id": "99999", "title": "Second comment"}
    lookup = {COMMENT_ITEM["comment_id"]: COMMENT_ITEM, "99999": c2}
    line_for_99999 = {
        **RAW_SUCCESS_LINE,
        "custom_id": "comment_99999",
    }
    line_for_87914 = RAW_SUCCESS_LINE
    r1 = bt.normalize_output_line(line_for_99999, lookup, RUN_ID, ANALYZED_AT, MODEL, bt.PROMPT_VERSION)
    r2 = bt.normalize_output_line(line_for_87914, lookup, RUN_ID, ANALYZED_AT, MODEL, bt.PROMPT_VERSION)
    assert r1["comment_id"] == "99999"
    assert r1["input_title"] == "Second comment"
    assert r2["comment_id"] == "87914"
    assert r2["input_title"] == COMMENT_ITEM["title"]
 def test_duplicate_custom_id_both_produce_valid_records():
    """Two raw lines with the same custom_id each produce a valid record."""
    r1 = bt.normalize_output_line(RAW_SUCCESS_LINE, COMMENT_LOOKUP, RUN_ID, ANALYZED_AT, MODEL, bt.PROMPT_VERSION)
    r2 = bt.normalize_output_line(RAW_SUCCESS_LINE, COMMENT_LOOKUP, RUN_ID, ANALYZED_AT, MODEL, bt.PROMPT_VERSION)
    assert r1["comment_id"] == r2["comment_id"] == "87914"
    assert r1["error"] is None
    assert r2["error"] is None
--- a/tests/tokenizer.py
+++ b/tests/tokenizer.py
@@ -0,0 +1,201 @@
 """Unit tests for analysis/gpt4o/tokenizer.py — no real API calls."""
 import io
 import json
 import math
 import sys
 from pathlib import Path
 from unittest.mock import patch
 import pytest
 sys.path.insert(0, str(Path(__file__).parent.parent / "analysis" / "gpt4o"))
 import tokenizer as tk
 import analysis_batch as ab
 # ---------------------------------------------------------------------------
 # Fixtures
 FORUM_ITEM = {
    "forum_id": "452",
    "reg_title": "Model Policies for Transgender Students",
    "reg_desc": "Guidance developed in response to HB 145.",
 }
 COMMENT_A = {
    "forum_id": "452",
    "comment_id": "100",
    "author": "Alice",
    "date": "2021-01-04T09:15:00",
    "title": "Support",
    "text": "I support this policy.",
 }
 COMMENT_B = {
    "forum_id": "452",
    "comment_id": "101",
    "author": "Bob",
    "date": "2021-01-05T10:00:00",
    "title": "Oppose",
    "text": "I oppose this policy.",
 }
 COMMENTS = [COMMENT_A, COMMENT_B]
 PROMPT_HASH = "abc1234"
 INPUT_FILE = "output/f452.jsonl"
 INPUT_SHA256 = "deadbeef" * 8
 PROMPT_FILE = "analysis/prompt-1.txt"
 def _make_report(total_tokens=10_000):
    return tk.compute_report(
        COMMENTS, FORUM_ITEM, PROMPT_HASH, INPUT_FILE, INPUT_SHA256, PROMPT_FILE
    )
 # ---------------------------------------------------------------------------
 # compute_report: required top-level keys
 def test_report_has_top_level_keys():
    report = _make_report()
    required = {"prompt", "prompt_hash", "input_file", "input_sha256",
                "total_comments", "input_tokens"}
    assert required.issubset(set(report.keys()))
 def test_report_metadata_values():
    report = _make_report()
    assert report["prompt"] == PROMPT_FILE
    assert report["prompt_hash"] == PROMPT_HASH
    assert report["input_file"] == INPUT_FILE
    assert report["input_sha256"] == INPUT_SHA256
    assert report["total_comments"] == 2
 def test_report_input_tokens_positive():
    report = _make_report()
    assert isinstance(report["input_tokens"], int)
    assert report["input_tokens"] > 0
 # ---------------------------------------------------------------------------
 # compute_report: per-model entries
 def test_report_has_per_model_keys():
    report = _make_report()
    for model in ab.MODEL_LIMITS:
        assert model in report, f"Model {model} missing from report"
        assert isinstance(report[model], dict)
 def test_report_per_model_has_required_fields():
    report = _make_report()
    for model in ab.MODEL_LIMITS:
        m = report[model]
        assert "jobs" in m
        assert "cost_$" in m
        assert "est_queue_days" in m
 def test_report_jobs_at_least_one():
    report = _make_report()
    for model in ab.MODEL_LIMITS:
        assert report[model]["jobs"] >= 1
 # ---------------------------------------------------------------------------
 # compute_report: calculation accuracy
 def test_cost_calculation():
    """cost_$ = total_tokens / 1M * pricing_rate"""
    report = _make_report()
    total = report["input_tokens"]
    for model in ab.MODEL_LIMITS:
        expected_cost = round(total / 1_000_000 * tk.MODEL_PRICING.get(model, 0.0), 4)
        assert report[model]["cost_$"] == pytest.approx(expected_cost, abs=1e-6)
 def test_est_queue_days_calculation():
    """est_queue_days = total_tokens / tpd (rounded to 2 decimal places)"""
    report = _make_report()
    total = report["input_tokens"]
    for model, tpd in ab.MODEL_LIMITS.items():
        expected = round(total / tpd, 2)
        assert report[model]["est_queue_days"] == pytest.approx(expected, abs=1e-4)
 def test_jobs_ceiling_division():
    """jobs = ceil(total_tokens / (tpd * _LIMIT_BUFFER))"""
    report = _make_report()
    total = report["input_tokens"]
    for model, tpd in ab.MODEL_LIMITS.items():
        effective = int(tpd * ab._LIMIT_BUFFER)
        expected = math.ceil(total / effective)
        assert report[model]["jobs"] == expected
 def test_more_comments_increases_tokens():
    """More comments → more input_tokens."""
    few = tk.compute_report([COMMENT_A], FORUM_ITEM, PROMPT_HASH, INPUT_FILE, INPUT_SHA256, PROMPT_FILE)
    many = tk.compute_report(COMMENTS, FORUM_ITEM, PROMPT_HASH, INPUT_FILE, INPUT_SHA256, PROMPT_FILE)
    assert many["input_tokens"] > few["input_tokens"]
 # ---------------------------------------------------------------------------
 # MODEL_PRICING coverage
 def test_model_pricing_has_required_models():
    for model in ("gpt-4o", "gpt-4o-mini", "gpt-5.4", "gpt-5.4-mini", "gpt-o4-mini"):
        assert model in tk.MODEL_PRICING, f"{model} missing from MODEL_PRICING"
 def test_model_pricing_values_positive():
    for model, price in tk.MODEL_PRICING.items():
        assert price > 0, f"{model} has non-positive price"
 # ---------------------------------------------------------------------------
 # print_table: runs without error, produces output
 def test_print_table_runs():
    report = _make_report()
    buf = io.StringIO()
    with patch("sys.stdout", buf):
        tk.print_table(report)
    output = buf.getvalue()
    assert "gpt-4o" in output
    assert "gpt-4o-mini" in output
 def test_print_table_shows_all_models():
    report = _make_report()
    buf = io.StringIO()
    with patch("sys.stdout", buf):
        tk.print_table(report)
    output = buf.getvalue()
    for model in ab.MODEL_LIMITS:
        assert model in output, f"{model} not shown in print_table output"
 def test_print_table_highlights_recommended():
    """When a single-job cheapest model exists, table marks it as recommended."""
    report = _make_report()
    buf = io.StringIO()
    with patch("sys.stdout", buf):
        tk.print_table(report)
    output = buf.getvalue()
    assert "recommended" in output
 # ---------------------------------------------------------------------------
 # report.json round-trip (write → read)
 def test_report_json_roundtrip(tmp_path):
    report = _make_report()
    out = tmp_path / "report.json"
    out.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
    loaded = json.loads(out.read_text(encoding="utf-8"))
    assert loaded["total_comments"] == report["total_comments"]
    assert loaded["input_tokens"] == report["input_tokens"]
    assert loaded["gpt-4o-mini"]["jobs"] == report["gpt-4o-mini"]["jobs"]