completed openai batch work

2026-05-07 07:24:11 -04:00
parent 64a7a18721
commit f5d679808e
29 changed files with 36711 additions and 83 deletions
--- a/agents.md
+++ b/agents.md
@@ -43,3 +43,4 @@ Description and PM notes
 - project dir: `%userprofile%\projects\vath\`
 - python venv: `%userprofile%\projects\vath\venv\scripts\activate`
 - pytest (inside venv): `python -m pytest tests/`
+  - create tests without `test_` prefix, ie: `tests/tokenizer.py` not `tests/test_tokenizer.py`
--- a/analysis/gpt4o/init.py
+++ b/analysis/gpt4o/init.py
--- a/analysis/jobs/f452-1/forum.jsonl
+++ b/analysis/jobs/f452-1/forum.jsonl
--- a/analysis/jobs/f452-1/job1-input.jsonl
+++ b/analysis/jobs/f452-1/job1-input.jsonl
--- a/analysis/jobs/f452-1/job1-output-raw.jsonl
+++ b/analysis/jobs/f452-1/job1-output-raw.jsonl
--- a/analysis/jobs/f452-1/job1-output.jsonl
+++ b/analysis/jobs/f452-1/job1-output.jsonl
--- a/analysis/jobs/f452-1/job2-input.jsonl
+++ b/analysis/jobs/f452-1/job2-input.jsonl
--- a/analysis/jobs/f452-1/job2-output-raw.jsonl
+++ b/analysis/jobs/f452-1/job2-output-raw.jsonl
--- a/analysis/jobs/f452-1/job2-output.jsonl
+++ b/analysis/jobs/f452-1/job2-output.jsonl
--- a/analysis/jobs/f452-1/job3-input.jsonl
+++ b/analysis/jobs/f452-1/job3-input.jsonl
--- a/analysis/jobs/f452-1/job3-output-raw.jsonl
+++ b/analysis/jobs/f452-1/job3-output-raw.jsonl
--- a/analysis/jobs/f452-1/job3-output.jsonl
+++ b/analysis/jobs/f452-1/job3-output.jsonl
--- a/analysis/jobs/f452-1/job4-input.jsonl
+++ b/analysis/jobs/f452-1/job4-input.jsonl
--- a/analysis/jobs/f452-1/job4-output-raw.jsonl
+++ b/analysis/jobs/f452-1/job4-output-raw.jsonl
--- a/analysis/jobs/f452-1/job4-output.jsonl
+++ b/analysis/jobs/f452-1/job4-output.jsonl
--- a/analysis/jobs/f452-1/prompt.txt
+++ b/analysis/jobs/f452-1/prompt.txt
@@ -0,0 +1,23 @@
+You are an expert policy analyst classifying public comments submitted to the Virginia Town Hall
+regulatory comment system. You will be given the text of a proposed regulation and a single
+public comment. Return ONLY a JSON object — no other text.
+
+Definitions:
+- stance: the commenter's position on whether the regulation should be adopted.
+  "support" = wants it approved (as-is or with changes);
+  "oppose"  = wants it rejected or substantially weakened;
+  "neutral" = takes no position, asks a question, or provides factual input only;
+  "unknown" = too vague, off-topic, or uninterpretable to classify.
+- tone: the emotional register of the writing, independent of stance.
+  "positive" = affirming, hopeful, appreciative;
+  "negative" = angry, fearful, alarmed, or contemptuous;
+  "neutral"  = matter-of-fact, procedural, or informational;
+  "mixed"    = contains both positive and negative emotional content;
+  "unclear"  = tone cannot be determined (e.g., a one-word comment).
+- stance_confidence: float 0.0-1.0, your confidence in the stance label.
+- stance_rationale: 1-3 sentences explaining the key evidence; quote specific phrases where possible.
+- tags: up to 5 short topic labels relevant to the comment's specific concerns (e.g.
+  "parental rights", "student safety", "privacy", "religious freedom", "LGBTQ+ inclusion",
+  "bullying prevention", "school sports", "bathroom access"). Empty array if none apply.
+
+Return exactly these keys: stance, stance_confidence, stance_rationale, tone, tags.
--- a/analysis/jobs/f452-1/report.json
+++ b/analysis/jobs/f452-1/report.json
@@ -0,0 +1,43 @@
+{
+  "prompt": "analysis\\prompt-1.txt",
+  "prompt_hash": "cb41250",
+  "input_file": "output\\f452.jsonl",
+  "input_sha256": "59dcc8b13cc2a386977a8b934c498c7e639b7e684a94ca1bfd10a14878670018",
+  "total_comments": 9083,
+  "input_tokens": 6397254,
+  "gpt-5.5": {
+    "jobs": 9,
+    "cost_$": 15.9931,
+    "est_queue_days": 7.11
+  },
+  "gpt-5.4": {
+    "jobs": 9,
+    "cost_$": 7.9966,
+    "est_queue_days": 7.11
+  },
+  "gpt-5.4-mini": {
+    "jobs": 4,
+    "cost_$": 2.399,
+    "est_queue_days": 3.2
+  },
+  "gpt-5.4-nano": {
+    "jobs": 40,
+    "cost_$": 0.6397,
+    "est_queue_days": 31.99
+  },
+  "gpt-4o": {
+    "jobs": 9,
+    "cost_$": 7.9966,
+    "est_queue_days": 7.11
+  },
+  "gpt-4o-mini": {
+    "jobs": 4,
+    "cost_$": 0.4798,
+    "est_queue_days": 3.2
+  },
+  "gpt-o4-mini": {
+    "jobs": 4,
+    "cost_$": 3.5185,
+    "est_queue_days": 3.2
+  }
+}
--- a/analysis/jobs/f452-1/status.json
+++ b/analysis/jobs/f452-1/status.json
@@ -0,0 +1,57 @@
+{
+  "model": "gpt-5.4-mini",
+  "prompt_hash": "cb41250",
+  "input_file": "output\\f452.jsonl",
+  "input_sha256": "59dcc8b13cc2a386977a8b934c498c7e639b7e684a94ca1bfd10a14878670018",
+  "total_comments": 9083,
+  "input_tokens": 6397254,
+  "est_queue_days": 3.2,
+  "cost_$": 2.399,
+  "total_jobs": 4,
+  "jobs": [
+    {
+      "job_num": 1,
+      "run_id": "76c97113-63aa-43db-8f84-9c60ebcbb105",
+      "status": "completed",
+      "batch_id": "batch_69fb9081639881909be0c40d86edd747",
+      "records_submitted": 2270,
+      "records_completed": 2270,
+      "records_failed": 0,
+      "submitted_at": "2026-05-06T19:03:28.949240+00:00",
+      "completed_at": "2026-05-06T20:09:14+00:00"
+    },
+    {
+      "job_num": 2,
+      "run_id": "b8f3b0bb-f155-4a5c-acce-f3504c0e09aa",
+      "status": "completed",
+      "batch_id": "batch_69fba02df7b481909e96afa1ee8879f5",
+      "records_submitted": 2274,
+      "records_completed": 2274,
+      "records_failed": 0,
+      "submitted_at": "2026-05-06T20:10:21.424330+00:00",
+      "completed_at": "2026-05-06T20:37:11+00:00"
+    },
+    {
+      "job_num": 3,
+      "run_id": "8d769f37-6beb-4a1b-87ee-3f66cdc6adc8",
+      "status": "completed",
+      "batch_id": "batch_69fba69a85488190977792b6f95b614b",
+      "records_submitted": 2282,
+      "records_completed": 2282,
+      "records_failed": 0,
+      "submitted_at": "2026-05-06T20:37:45.586815+00:00",
+      "completed_at": "2026-05-06T21:09:24+00:00"
+    },
+    {
+      "job_num": 4,
+      "run_id": "e6affbc2-ddc9-43a6-b8e9-d1f47e736283",
+      "status": "completed",
+      "batch_id": "batch_69fbe44565748190ad19f17ee3143f8d",
+      "records_submitted": 2257,
+      "records_completed": 2257,
+      "records_failed": 0,
+      "submitted_at": "2026-05-07T01:00:52.886953+00:00",
+      "completed_at": "2026-05-07T09:20:01+00:00"
+    }
+  ]
+}
--- a/analysis/gpt4o/analysis_batch.py
+++ b/analysis/gpt4o/analysis_batch.py
@@ -1,6 +1,6 @@
 #!/usr/bin/env python3
 """
-analysis_batch.py — OpenAI Batch API job runner
+openai_batch.py — OpenAI Batch API job runner

 Run tokenizer.py first to generate report.json, then:
    create   <report.json> --model <model>   — build job directory
@@ -8,7 +8,7 @@ Run tokenizer.py first to generate report.json, then:
    status   [--job N] [--dir DIR]           — check job status
    download [--job N] [--dir DIR]           — download + normalize completed jobs

-DIR is a name under analysis/gpt4o/jobs/ (default: most recently created).
+DIR is a name under analysis/jobs/ (default: most recently created).
 """

 import argparse
@@ -52,17 +52,24 @@ _MODEL_ENCODING: dict[str, str] = {
    "gpt-4o-mini":   "o200k_base",
    "gpt-o4-mini":   "o200k_base",
 }
-_LIMIT_BUFFER = 0.90
+_LIMIT_BUFFER = 0.80


 def estimate_tokens(messages: list[dict], model: str) -> int:
-    """Exact token count via tiktoken; falls back to chars/3 + 4 overhead per message."""
+    """Token count per OpenAI cookbook chat formula; falls back to chars/3."""
    try:
        import tiktoken
        enc = tiktoken.get_encoding(_MODEL_ENCODING.get(model, "o200k_base"))
-        return sum(4 + len(enc.encode(m["content"])) for m in messages)
+        # Per OpenAI cookbook for gpt-4o: 3 overhead per message + role + content;
+        # plus 3 tokens for the reply primer (<|start|>assistant<|message|>).
+        total = 3  # reply primer
+        for m in messages:
+            total += 3
+            total += len(enc.encode(m.get("role", "")))
+            total += len(enc.encode(m["content"]))
+        return total
    except ImportError:
-        return sum(4 + len(m["content"]) // 3 for m in messages)
+        return 3 + sum(3 + len(m["content"]) // 3 for m in messages)


 def chunk_comments_by_tokens(
@@ -91,7 +98,7 @@ def chunk_comments_by_tokens(
 # ---------------------------------------------------------------------------
 # Prompt

-_DEFAULT_PROMPT_FILE = Path(__file__).parent.parent / "prompt-1.txt"
+_DEFAULT_PROMPT_FILE = Path(__file__).parent / "prompt-1.txt"
 SYSTEM_PROMPT = _DEFAULT_PROMPT_FILE.read_text(encoding="utf-8").strip()
 PROMPT_VERSION = hashlib.sha256(SYSTEM_PROMPT.encode("utf-8")).hexdigest()[:7]

@@ -375,7 +382,7 @@ def cmd_create(args) -> None:

    print(f"Created: {job_dir.name}")
    print(f"  {len(chunks)} job(s)  |  {len(comments)} comments  |  model: {args.model}")
-    print(f"\nNext:  python analysis/gpt4o/analysis_batch.py submit")
+    print(f"\nNext:  python analysis/openai_batch.py submit")


 # ---------------------------------------------------------------------------
@@ -431,7 +438,7 @@ def cmd_submit(args, client) -> None:
    save_status(status, job_dir)

    print(f"Job {n} submitted: {batch.id}  ({batch.status})")
-    print(f"  python analysis/gpt4o/analysis_batch.py status")
+    print(f"  python analysis/openai_batch.py status")


 # ---------------------------------------------------------------------------
--- a/analysis/gpt4o/analysis_realtime.py
+++ b/analysis/gpt4o/analysis_realtime.py
@@ -1,12 +1,12 @@
 #!/usr/bin/env python3
 """
-analysis/gpt4o/analysis-realtime.py — Synchronous GPT-4o pipeline for VA Townhall comments.
+analysis/openai_realtime.py — Synchronous GPT-4o pipeline for VA Townhall comments.

 Usage:
-    python analysis/gpt4o/analysis-realtime.py <input_jsonl> [--limit {5,10,20,50}] [--model MODEL]
+    python analysis/openai_realtime.py <input_jsonl> [--limit {5,10,20,50}] [--model MODEL]

 Output:
-    analysis/gpt4o/forum{id}_{scrape_ts}_{model}_{run_ts}.jsonl
+    analysis/forum{id}_{scrape_ts}_{model}_{run_ts}.jsonl
 """

 import argparse
@@ -30,7 +30,7 @@ except ImportError:
 # ---------------------------------------------------------------------------
 # Prompt — loaded from analysis/prompt-1.txt at import time

-_PROMPT_FILE = Path(__file__).parent.parent / "prompt-1.txt"
+_PROMPT_FILE = Path(__file__).parent / "prompt-1.txt"
 SYSTEM_PROMPT = _PROMPT_FILE.read_text(encoding="utf-8").strip()
 PROMPT_VERSION = hashlib.sha256(SYSTEM_PROMPT.encode("utf-8")).hexdigest()[:7]

--- a/analysis/gpt4o/tokenizer.py
+++ b/analysis/gpt4o/tokenizer.py
@@ -3,10 +3,11 @@
 tokenizer.py — estimate token usage and cost for a batch analysis run.

 Usage:
-    python analysis/gpt4o/tokenizer.py output/f452.jsonl [--prompt analysis/prompt-1.txt]
+    python analysis/tokenizer.py output/f452.jsonl [--prompt analysis/prompt-1.txt]
+    python analysis/tokenizer.py analysis/jobs/f452-1/job1-input.jsonl  # count actual tokens in a job

-Prints a per-model comparison table and writes report.json next to the input file.
-Run this before analysis_batch.py create.
+Prints a per-model comparison table and writes reports/<stem>-report.json.
+Run this before openai_batch.py create.
 """

 import argparse
@@ -17,7 +18,7 @@ import sys
 from pathlib import Path

 sys.path.insert(0, str(Path(__file__).parent))
-import analysis_batch as _ab
+import openai_batch as _ab

 # Input pricing ($/1M tokens, batch API) — from docs/openai.md, updated 2026-05-05.
 # Add Anthropic/other models here when needed; only models with a LIMITS entry are reported.
@@ -66,6 +67,32 @@ def compute_report(
    return report


+def count_input_tokens(path: Path, model: str = "gpt-4o") -> dict:
+    """Count tokens in an existing job input JSONL (batch request format).
+
+    Each line must have body.messages (as written by build_batch_request_line).
+    Returns {"total_tokens": int, "total_requests": int, "min": int, "max": int, "mean": float}.
+    """
+    counts = []
+    with open(path, encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if not line:
+                continue
+            req = json.loads(line)
+            messages = req["body"]["messages"]
+            counts.append(_ab.estimate_tokens(messages, model))
+    if not counts:
+        return {"total_tokens": 0, "total_requests": 0, "min": 0, "max": 0, "mean": 0.0}
+    return {
+        "total_tokens": sum(counts),
+        "total_requests": len(counts),
+        "min": min(counts),
+        "max": max(counts),
+        "mean": round(sum(counts) / len(counts), 1),
+    }
+
+
 def print_table(report: dict) -> None:
    """Print a human-readable model comparison table to stdout."""
    print(f"\nInput:    {report['input_file']}")
@@ -90,11 +117,21 @@ def print_table(report: dict) -> None:
    print()


+def _is_job_input(path: Path) -> bool:
+    """Return True if this JSONL looks like a batch request file (has custom_id)."""
+    with open(path, encoding="utf-8") as f:
+        for line in f:
+            line = line.strip()
+            if line:
+                return "custom_id" in json.loads(line)
+    return False
+
+
 def main() -> None:
-    _default_prompt = Path(__file__).parent.parent / "prompt-1.txt"
+    _default_prompt = Path(__file__).parent / "prompt-1.txt"

    parser = argparse.ArgumentParser(description="Estimate batch token usage and cost.")
-    parser.add_argument("input", help="Scraped JSONL file")
+    parser.add_argument("input", help="Scraped JSONL or job input JSONL (jobN-input.jsonl)")
    parser.add_argument(
        "--prompt",
        default=str(_default_prompt),
@@ -106,6 +143,16 @@ def main() -> None:
    if not input_path.exists():
        sys.exit(f"File not found: {input_path}")

+    # --- Mode: count tokens in an existing job input file ---
+    if _is_job_input(input_path):
+        result = count_input_tokens(input_path)
+        print(f"\nJob input: {input_path.name}")
+        print(f"  Requests : {result['total_requests']:,}")
+        print(f"  Tokens   : {result['total_tokens']:,}")
+        print(f"  Per-req  : min={result['min']}  max={result['max']}  mean={result['mean']}")
+        return
+
+    # --- Mode: estimate from raw scrape file and write report.json ---
    prompt_path = Path(args.prompt)
    if not prompt_path.exists():
        sys.exit(f"Prompt file not found: {prompt_path}")
@@ -131,10 +178,12 @@ def main() -> None:

    print_table(report)

-    out_path = input_path.parent / "report.json"
+    reports_dir = Path(__file__).parent.parent / "reports"
+    reports_dir.mkdir(exist_ok=True)
+    out_path = reports_dir / f"{input_path.stem}-report.json"
    out_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
    print(f"Report written to: {out_path}")
-    print(f"\nNext:  python analysis/gpt4o/analysis_batch.py create {out_path} --model <model>")
+    print(f"\nNext:  python analysis/openai_batch.py create {out_path} --model <model>")


 if __name__ == "__main__":
--- a/docs/pipeline-1.2.3.svg
+++ b/docs/pipeline-1.2.3.svg
--- a/docs/pipeline-v1.2.3.drawio
+++ b/docs/pipeline-v1.2.3.drawio
@@ -1,9 +1,18 @@
 <mxfile host="app.diagrams.net">
  <diagram name="Page-1" id="0sW-Vs8X5usvYmJikUIv">
-    <mxGraphModel dx="2179" dy="1118" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="0" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
+    <mxGraphModel dx="1315" dy="798" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="0" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
      <root>
        <mxCell id="0" />
        <mxCell id="1" parent="0" />
+        <mxCell id="mENAtx_syaeSO5uR6kG6-61" parent="1" style="rounded=0;whiteSpace=wrap;html=1;" value="" vertex="1">
+          <mxGeometry height="90" width="190" x="1000" y="330" as="geometry" />
+        </mxCell>
+        <mxCell id="mENAtx_syaeSO5uR6kG6-60" parent="1" style="rounded=0;whiteSpace=wrap;html=1;" value="" vertex="1">
+          <mxGeometry height="90" width="190" x="1010" y="340" as="geometry" />
+        </mxCell>
+        <mxCell id="mENAtx_syaeSO5uR6kG6-59" parent="1" style="rounded=0;whiteSpace=wrap;html=1;" value="" vertex="1">
+          <mxGeometry height="90" width="190" x="1020" y="350" as="geometry" />
+        </mxCell>
        <mxCell id="mENAtx_syaeSO5uR6kG6-3" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-1" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=16.5;entryDy=0;entryPerimeter=0;" target="mENAtx_syaeSO5uR6kG6-29">
          <mxGeometry relative="1" as="geometry">
            <mxPoint x="200" y="290" as="targetPoint" />
@@ -18,18 +27,18 @@
        <mxCell id="mENAtx_syaeSO5uR6kG6-5" parent="1" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" value="tokenizer" vertex="1">
          <mxGeometry height="60" width="120" x="400" y="170" as="geometry" />
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-6" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=center;verticalAlign=top;rounded=0;" value="gather forum data" vertex="1">
-          <mxGeometry height="60" width="120" x="20" y="240" as="geometry" />
+        <mxCell id="mENAtx_syaeSO5uR6kG6-6" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;" value="&lt;div align=&quot;left&quot;&gt;- collect forum data&lt;/div&gt;" vertex="1">
+          <mxGeometry height="60" width="120" x="40" y="240" as="geometry" />
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-7" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;" value="&lt;div&gt;tokenize forum,&lt;/div&gt;&lt;div&gt;generate report w/&lt;/div&gt;&lt;div&gt;recommendations&lt;/div&gt;" vertex="1">
+        <mxCell id="mENAtx_syaeSO5uR6kG6-7" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;" value="&lt;div&gt;- tokenize forum&lt;/div&gt;&lt;div&gt;- generate report w/&lt;/div&gt;&lt;div&gt;recommendations&lt;/div&gt;" vertex="1">
          <mxGeometry height="60" width="120" x="400" y="240" as="geometry" />
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-28" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;entryPerimeter=0;" target="mENAtx_syaeSO5uR6kG6-35">
+        <mxCell id="mENAtx_syaeSO5uR6kG6-28" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" target="mENAtx_syaeSO5uR6kG6-73">
          <mxGeometry relative="1" as="geometry">
-            <mxPoint x="910" y="270" as="targetPoint" />
+            <mxPoint x="953" y="240" as="targetPoint" />
          </mxGeometry>
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-19" parent="1" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" value="batch" vertex="1">
+        <mxCell id="mENAtx_syaeSO5uR6kG6-19" parent="1" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" value="openai_batch" vertex="1">
          <mxGeometry height="60" width="120" x="720" y="170" as="geometry" />
        </mxCell>
        <mxCell id="mENAtx_syaeSO5uR6kG6-21" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=right;verticalAlign=top;rounded=0;fontFamily=Courier New;" value="&lt;div&gt;--model&lt;/div&gt;&lt;div&gt;--limit&lt;/div&gt;" vertex="1">
@@ -38,11 +47,8 @@
        <mxCell id="mENAtx_syaeSO5uR6kG6-23" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=right;verticalAlign=top;rounded=0;fontFamily=Courier New;" value="--forum" vertex="1">
          <mxGeometry height="60" width="120" x="-90" y="170" as="geometry" />
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-25" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=right;verticalAlign=top;rounded=0;fontFamily=Courier New;" value="--prompt" vertex="1">
-          <mxGeometry height="60" width="120" x="270" y="210" as="geometry" />
-        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-26" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;" value="&lt;div&gt;split job into batches&lt;/div&gt;&lt;div&gt;submit first batch&lt;/div&gt;&lt;div&gt;status of current batch&lt;/div&gt;&lt;div&gt;download batch artifacts&lt;/div&gt;" vertex="1">
-          <mxGeometry height="60" width="120" x="720" y="240" as="geometry" />
+        <mxCell id="mENAtx_syaeSO5uR6kG6-26" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;" value="&lt;div&gt;- split job into batches&lt;/div&gt;&lt;div&gt;- submit first batch&lt;/div&gt;&lt;div&gt;- status of current batch&lt;/div&gt;&lt;div&gt;- download batch artifacts&lt;/div&gt;" vertex="1">
+          <mxGeometry height="70" width="140" x="720" y="240" as="geometry" />
        </mxCell>
        <mxCell id="mENAtx_syaeSO5uR6kG6-29" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="" vertex="1">
          <mxGeometry height="70" width="50" x="210" y="240" as="geometry" />
@@ -58,7 +64,7 @@
            </Array>
          </mxGeometry>
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-31" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="&lt;div&gt;forum&lt;/div&gt;&lt;div&gt;.jsonl&lt;/div&gt;" vertex="1">
+        <mxCell id="mENAtx_syaeSO5uR6kG6-31" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="&lt;div&gt;&amp;lt;forumid&amp;gt;&lt;/div&gt;&lt;div&gt;.jsonl&lt;/div&gt;" vertex="1">
          <mxGeometry height="70" width="50" x="230" y="260" as="geometry" />
        </mxCell>
        <mxCell id="mENAtx_syaeSO5uR6kG6-47" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-34" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=50;exitDy=43.5;exitPerimeter=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" target="mENAtx_syaeSO5uR6kG6-19">
@@ -69,30 +75,42 @@
            </Array>
          </mxGeometry>
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-34" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="&lt;div&gt;report&lt;/div&gt;&lt;div&gt;.json&lt;/div&gt;" vertex="1">
+        <mxCell id="mENAtx_syaeSO5uR6kG6-34" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;&amp;lt;forumid&amp;gt;&lt;br&gt;-report&lt;/div&gt;&lt;div&gt;.json&lt;/div&gt;" vertex="1">
          <mxGeometry height="70" width="50" x="560" y="240" as="geometry" />
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-35" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="job.json" vertex="1">
-          <mxGeometry height="70" width="50" x="890" y="240" as="geometry" />
+        <mxCell id="mENAtx_syaeSO5uR6kG6-35" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="&lt;div&gt;status&lt;/div&gt;&lt;div&gt;.json&lt;/div&gt;" vertex="1">
+          <mxGeometry height="70" width="50" x="913.25" y="360" as="geometry" />
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-41" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="" vertex="1">
-          <mxGeometry height="70" width="50" x="940" y="340" as="geometry" />
+        <mxCell id="mENAtx_syaeSO5uR6kG6-43" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="&lt;div&gt;jobN-&lt;/div&gt;&lt;div&gt;output&lt;/div&gt;&lt;div&gt;.jsonl&lt;/div&gt;" vertex="1">
+          <mxGeometry height="70" width="50" x="1090" y="360" as="geometry" />
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-42" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="" vertex="1">
-          <mxGeometry height="70" width="50" x="950" y="350" as="geometry" />
+        <mxCell id="mENAtx_syaeSO5uR6kG6-48" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="&lt;div&gt;jobN-errors&lt;/div&gt;&lt;div&gt;.jsonl&lt;/div&gt;" vertex="1">
+          <mxGeometry height="70" width="50" x="1150" y="360" as="geometry" />
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-43" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="&lt;div&gt;batchN-&lt;/div&gt;&lt;div&gt;output-&lt;/div&gt;&lt;div&gt;.jsonl&lt;/div&gt;" vertex="1">
-          <mxGeometry height="70" width="50" x="960" y="360" as="geometry" />
+        <mxCell id="mENAtx_syaeSO5uR6kG6-54" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="&lt;div&gt;jobN-&lt;/div&gt;&lt;div&gt;input&lt;/div&gt;&lt;div&gt;.jsonl&lt;/div&gt;" vertex="1">
+          <mxGeometry height="70" width="50" x="1030" y="360" as="geometry" />
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-48" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="&lt;div&gt;errors&lt;/div&gt;&lt;div&gt;.jsonl&lt;/div&gt;" vertex="1">
-          <mxGeometry height="70" width="50" x="980" y="240" as="geometry" />
-        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-51" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=16.5;entryDy=0;entryPerimeter=0;" target="mENAtx_syaeSO5uR6kG6-41">
+        <mxCell id="mENAtx_syaeSO5uR6kG6-64" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-63" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=50;exitDy=43.5;exitPerimeter=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" target="mENAtx_syaeSO5uR6kG6-5">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
-        <mxCell id="mENAtx_syaeSO5uR6kG6-53" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=16.5;entryDy=0;entryPerimeter=0;" target="mENAtx_syaeSO5uR6kG6-48">
+        <mxCell id="mENAtx_syaeSO5uR6kG6-63" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="&lt;div&gt;prompt&lt;/div&gt;&lt;div&gt;.txt&lt;/div&gt;" vertex="1">
+          <mxGeometry height="70" width="50" x="270" y="90" as="geometry" />
+        </mxCell>
+        <mxCell id="mENAtx_syaeSO5uR6kG6-67" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;fontFamily=Courier New;" value="create" vertex="1">
+          <mxGeometry height="20" width="120" x="850" y="170" as="geometry" />
+        </mxCell>
+        <mxCell id="mENAtx_syaeSO5uR6kG6-71" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;fontFamily=Courier New;" value="&lt;div&gt;submit&lt;/div&gt;&lt;div&gt;&lt;br&gt;&lt;/div&gt;&lt;div&gt;status&lt;/div&gt;&lt;div&gt;download&lt;/div&gt;" vertex="1">
+          <mxGeometry height="60" width="120" x="1020" y="240" as="geometry" />
+        </mxCell>
+        <mxCell id="mENAtx_syaeSO5uR6kG6-75" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-73" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" target="mENAtx_syaeSO5uR6kG6-35">
          <mxGeometry relative="1" as="geometry" />
        </mxCell>
+        <mxCell id="mENAtx_syaeSO5uR6kG6-76" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-73" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" target="mENAtx_syaeSO5uR6kG6-61">
+          <mxGeometry relative="1" as="geometry" />
+        </mxCell>
+        <mxCell id="mENAtx_syaeSO5uR6kG6-73" parent="1" style="image;aspect=fixed;perimeter=ellipsePerimeter;html=1;align=center;shadow=0;dashed=0;spacingTop=3;image=img/lib/active_directory/folder.svg;" value="&amp;lt;forumid&amp;gt;-N" vertex="1">
+          <mxGeometry height="50" width="36.5" x="920" y="240" as="geometry" />
+        </mxCell>
      </root>
    </mxGraphModel>
  </diagram>
--- a/docs/pipeline-v1.2.3.svg
+++ b/docs/pipeline-v1.2.3.svg
--- a/docs/tasks.org
+++ b/docs/tasks.org
@@ -201,44 +201,48 @@ batch processing should  be a resumable job queue, not a one-shot script. the us
   - remaining-comment detection

 ** notes
- analysis/gpt4o/tokenizer.py: new standalone script; imports analysis_batch for MODEL_LIMITS, estimate_tokens, build_messages. Reads input JSONL + prompt, computes per-model jobs/cost/time table, writes report.json to input file's directory. MODEL_PRICING dict lives here (not in analysis_batch).
- analysis/gpt4o/analysis_batch.py: fully rewritten with four subcommands: create, submit, status, download. No longer uses REQUESTS_DIR / RAW_DIR / RUNS_DIR.
- Job directories: analysis/gpt4o/jobs/<stem[:8]>-N/ (e.g. f452-1). Each run is self-contained: forum.jsonl, prompt.txt, report.json, jobN-input.jsonl, jobN-output-raw.jsonl, jobN-output.jsonl, jobN-errors.jsonl.
+- analysis/tokenizer.py: new standalone script; imports openai_batch for MODEL_LIMITS, estimate_tokens, build_messages. Reads input JSONL + prompt, computes per-model jobs/cost/time table, writes reports/<stem>-report.json. MODEL_PRICING dict lives here (not in openai_batch). Pass a jobN-input.jsonl to count actual tokens instead.
+- analysis/openai_batch.py: fully rewritten with four subcommands: create, submit, status, download. Job dirs at analysis/jobs/<stem[:8]>-N/.
+- Job directories: analysis/jobs/<stem[:8]>-N/ (e.g. f452-1). Each run is self-contained: forum.jsonl, prompt.txt, report.json, jobN-input.jsonl, jobN-output-raw.jsonl, jobN-output.jsonl, jobN-errors.jsonl.
 - status.json: tracks all jobs with pending/submitted/in_progress/completed/failed states. Updated by submit, status, download.
 - _find_next_eligible_job: pure function for testability. Returns (next_pending_job, None) or (None, warning). Blocks submission if previous job is in_progress/submitted.
 - create: no API key required. Reads report.json, re-chunks comments, writes all jobN-input.jsonl files, writes status.json.
 - submit: uploads jobN-input.jsonl to Files API, creates batch, updates status.json to 'submitted'. Will not stack batches.
 - status: retrieves batch from OpenAI, updates status.json counts and status.
 - download: auto-runs status first, downloads output_file_id → jobN-output-raw.jsonl, error_file_id → jobN-errors.jsonl, normalizes → jobN-output.jsonl. Updates status.json.
- tests/test_tokenizer.py: 15 tests for compute_report schema, cost/time calculation, MODEL_PRICING coverage, print_table output, report.json round-trip.
+- tests/tokenizer.py: 19 tests for compute_report schema, cost/time calculation, MODEL_PRICING coverage, print_table output, count_input_tokens, report.json round-trip.
+- Token limit buffer: _LIMIT_BUFFER=0.80 (20% headroom). Estimate uses OpenAI cookbook chat formula (role tokens + 3-token reply primer). Verify a job file with: python analysis/tokenizer.py analysis/jobs/<dir>/jobN-input.jsonl

 *** usage
-#+begin_src sh
+#+begin_src powershell
 # 1. estimate tokens and cost
-python analysis/gpt4o/tokenizer.py output/f452.jsonl --prompt analysis/prompt-1.txt
-# writes output/report.json
+python analysis/tokenizer.py output/f452.jsonl --prompt analysis/prompt-1.txt
+# writes reports/f452-report.json

-# 2. create job directory (no api key needed)
-python analysis/gpt4o/analysis_batch.py create output/report.json --model gpt-4o-mini
-# creates analysis/gpt4o/jobs/f452-1/
+# 2. verify actual tokens in a job file (optional sanity check)
+python analysis/tokenizer.py analysis/jobs/f452-1/job1-input.jsonl

-# 3. submit first job
-python analysis/gpt4o/analysis_batch.py submit
+# 3. create job directory (no api key needed)
+python analysis/openai_batch.py create reports/f452-report.json --model gpt-5.4-mini
+# creates analysis/jobs/f452-1/

-# 4. check status (repeat until completed)
-python analysis/gpt4o/analysis_batch.py status
+# 4. submit first job
+python analysis/openai_batch.py submit

-# 5. download and normalize
-python analysis/gpt4o/analysis_batch.py download
+# 5. check status (repeat until completed)
+python analysis/openai_batch.py status

-# 6. submit next job (if multi-job run), then repeat 4-5
-python analysis/gpt4o/analysis_batch.py submit
+# 6. download and normalize
+python analysis/openai_batch.py download
+
+# 7. submit next job (if multi-job run), then repeat 5-6
+python analysis/openai_batch.py submit
 #+end_src

 ** evidence
 - commit:
- tests: passing (pytest tests/analysis_gpt4o_batch.py tests/test_tokenizer.py)
- datetime: [2026-05-05 Tue]
+- tests: passing (pytest tests/openai_batch.py tests/openai_realtime.py tests/tokenizer.py)
+- datetime: [2026-05-06 Wed]

 * === Backlog ===
 * [ ] X: analysis validation view
--- a/reports/f452-1.json
+++ b/reports/f452-1.json
@@ -0,0 +1,43 @@
+{
+  "prompt": "analysis\\prompt-1.txt",
+  "prompt_hash": "cb41250",
+  "input_file": "output\\f452.jsonl",
+  "input_sha256": "59dcc8b13cc2a386977a8b934c498c7e639b7e684a94ca1bfd10a14878670018",
+  "total_comments": 9083,
+  "input_tokens": 6397254,
+  "gpt-5.5": {
+    "jobs": 9,
+    "cost_$": 15.9931,
+    "est_queue_days": 7.11
+  },
+  "gpt-5.4": {
+    "jobs": 9,
+    "cost_$": 7.9966,
+    "est_queue_days": 7.11
+  },
+  "gpt-5.4-mini": {
+    "jobs": 4,
+    "cost_$": 2.399,
+    "est_queue_days": 3.2
+  },
+  "gpt-5.4-nano": {
+    "jobs": 40,
+    "cost_$": 0.6397,
+    "est_queue_days": 31.99
+  },
+  "gpt-4o": {
+    "jobs": 9,
+    "cost_$": 7.9966,
+    "est_queue_days": 7.11
+  },
+  "gpt-4o-mini": {
+    "jobs": 4,
+    "cost_$": 0.4798,
+    "est_queue_days": 3.2
+  },
+  "gpt-o4-mini": {
+    "jobs": 4,
+    "cost_$": 3.5185,
+    "est_queue_days": 3.2
+  }
+}
--- a/tests/analysis_gpt4o_batch.py
+++ b/tests/analysis_gpt4o_batch.py
@@ -1,4 +1,4 @@
-"""Unit tests for analysis/gpt4o/analysis_batch.py — no real API calls."""
+"""Unit tests for analysis/openai_batch.py — no real API calls."""

 import json
 import sys
@@ -7,8 +7,8 @@ from unittest.mock import MagicMock

 import pytest

-sys.path.insert(0, str(Path(__file__).parent.parent / "analysis" / "gpt4o"))
-import analysis_batch as bt
+sys.path.insert(0, str(Path(__file__).parent.parent / "analysis"))
+import openai_batch as bt


 # ---------------------------------------------------------------------------
@@ -101,7 +101,7 @@ def test_prompt_version_is_7_hex_chars():

 def test_prompt_version_matches_realtime():
    """Both scripts must derive the same PROMPT_VERSION from the same file."""
-    import analysis_realtime as rt
+    import openai_realtime as rt
    assert bt.PROMPT_VERSION == rt.PROMPT_VERSION


@@ -242,7 +242,8 @@ def test_estimate_tokens_fallback_without_tiktoken(monkeypatch):
    monkeypatch.setitem(_sys.modules, "tiktoken", None)
    messages = [{"role": "user", "content": "x" * 300}]
    result = bt.estimate_tokens(messages, "gpt-4o")
-    assert result == 4 + 300 // 3
+    # fallback: 3 primer + (3 + 300//3) per message
+    assert result == 3 + (3 + 300 // 3)


 # ---------------------------------------------------------------------------
--- a/tests/analysis_gpt4o_realtime.py
+++ b/tests/analysis_gpt4o_realtime.py
@@ -1,4 +1,4 @@
-"""Unit tests for analysis/gpt4o/analysis_realtime.py — no real API calls."""
+"""Unit tests for analysis/openai_realtime.py — no real API calls."""

 import json
 import sys
@@ -7,8 +7,8 @@ from unittest.mock import MagicMock

 import pytest

-sys.path.insert(0, str(Path(__file__).parent.parent / "analysis" / "gpt4o"))
-import analysis_realtime as rt
+sys.path.insert(0, str(Path(__file__).parent.parent / "analysis"))
+import openai_realtime as rt


 # ---------------------------------------------------------------------------
--- a/tests/tokenizer.py
+++ b/tests/tokenizer.py
@@ -1,4 +1,4 @@
-"""Unit tests for analysis/gpt4o/tokenizer.py — no real API calls."""
+"""Unit tests for analysis/tokenizer.py — no real API calls."""

 import io
 import json
@@ -9,9 +9,9 @@ from unittest.mock import patch

 import pytest

-sys.path.insert(0, str(Path(__file__).parent.parent / "analysis" / "gpt4o"))
+sys.path.insert(0, str(Path(__file__).parent.parent / "analysis"))
 import tokenizer as tk
-import analysis_batch as ab
+import openai_batch as ab


 # ---------------------------------------------------------------------------
@@ -199,3 +199,52 @@ def test_report_json_roundtrip(tmp_path):
    assert loaded["total_comments"] == report["total_comments"]
    assert loaded["input_tokens"] == report["input_tokens"]
    assert loaded["gpt-4o-mini"]["jobs"] == report["gpt-4o-mini"]["jobs"]
+
+
+# ---------------------------------------------------------------------------
+# count_input_tokens
+
+def _make_job_input(tmp_path, comments, forum=None) -> Path:
+    """Write a batch request JSONL in the same format as job1-input.jsonl."""
+    p = tmp_path / "job1-input.jsonl"
+    with open(p, "w", encoding="utf-8") as f:
+        for c in comments:
+            f.write(json.dumps(ab.build_batch_request_line(c, forum, "gpt-4o-mini")) + "\n")
+    return p
+
+
+def test_count_input_tokens_matches_estimate(tmp_path):
+    """count_input_tokens on a freshly written job file equals the sum estimate_tokens produces."""
+    p = _make_job_input(tmp_path, COMMENTS, FORUM_ITEM)
+    result = tk.count_input_tokens(p, "gpt-4o-mini")
+    expected = sum(
+        ab.estimate_tokens(ab.build_messages(c, FORUM_ITEM)[0], "gpt-4o-mini")
+        for c in COMMENTS
+    )
+    assert result["total_tokens"] == expected
+    assert result["total_requests"] == len(COMMENTS)
+
+
+def test_count_input_tokens_fields(tmp_path):
+    p = _make_job_input(tmp_path, COMMENTS, FORUM_ITEM)
+    result = tk.count_input_tokens(p)
+    assert set(result.keys()) == {"total_tokens", "total_requests", "min", "max", "mean"}
+    assert result["min"] <= result["mean"] <= result["max"]
+    assert result["min"] > 0
+
+
+def test_count_input_tokens_empty_file(tmp_path):
+    p = tmp_path / "empty.jsonl"
+    p.write_text("", encoding="utf-8")
+    result = tk.count_input_tokens(p)
+    assert result["total_tokens"] == 0
+    assert result["total_requests"] == 0
+
+
+def test_count_input_tokens_includes_system_prompt(tmp_path):
+    """Token count must be higher than user-message-only text length / 3 (prompt adds tokens)."""
+    p = _make_job_input(tmp_path, [COMMENT_A], FORUM_ITEM)
+    result = tk.count_input_tokens(p)
+    user_chars = len(COMMENT_A.get("text", ""))
+    # system prompt alone is hundreds of tokens; total must exceed naive user-text estimate
+    assert result["total_tokens"] > user_chars // 3