completed openai batch work
This commit is contained in:
@@ -43,3 +43,4 @@ Description and PM notes
|
||||
- project dir: `%userprofile%\projects\vath\`
|
||||
- python venv: `%userprofile%\projects\vath\venv\scripts\activate`
|
||||
- pytest (inside venv): `python -m pytest tests/`
|
||||
- create tests without `test_` prefix, ie: `tests/tokenizer.py` not `tests/test_tokenizer.py`
|
||||
|
||||
9084
analysis/jobs/f452-1/forum.jsonl
Normal file
9084
analysis/jobs/f452-1/forum.jsonl
Normal file
File diff suppressed because one or more lines are too long
2270
analysis/jobs/f452-1/job1-input.jsonl
Normal file
2270
analysis/jobs/f452-1/job1-input.jsonl
Normal file
File diff suppressed because one or more lines are too long
2270
analysis/jobs/f452-1/job1-output-raw.jsonl
Normal file
2270
analysis/jobs/f452-1/job1-output-raw.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
2270
analysis/jobs/f452-1/job1-output.jsonl
Normal file
2270
analysis/jobs/f452-1/job1-output.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
2274
analysis/jobs/f452-1/job2-input.jsonl
Normal file
2274
analysis/jobs/f452-1/job2-input.jsonl
Normal file
File diff suppressed because one or more lines are too long
2274
analysis/jobs/f452-1/job2-output-raw.jsonl
Normal file
2274
analysis/jobs/f452-1/job2-output-raw.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
2274
analysis/jobs/f452-1/job2-output.jsonl
Normal file
2274
analysis/jobs/f452-1/job2-output.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
2282
analysis/jobs/f452-1/job3-input.jsonl
Normal file
2282
analysis/jobs/f452-1/job3-input.jsonl
Normal file
File diff suppressed because one or more lines are too long
2282
analysis/jobs/f452-1/job3-output-raw.jsonl
Normal file
2282
analysis/jobs/f452-1/job3-output-raw.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
2282
analysis/jobs/f452-1/job3-output.jsonl
Normal file
2282
analysis/jobs/f452-1/job3-output.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
2257
analysis/jobs/f452-1/job4-input.jsonl
Normal file
2257
analysis/jobs/f452-1/job4-input.jsonl
Normal file
File diff suppressed because one or more lines are too long
2257
analysis/jobs/f452-1/job4-output-raw.jsonl
Normal file
2257
analysis/jobs/f452-1/job4-output-raw.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
2257
analysis/jobs/f452-1/job4-output.jsonl
Normal file
2257
analysis/jobs/f452-1/job4-output.jsonl
Normal file
File diff suppressed because it is too large
Load Diff
23
analysis/jobs/f452-1/prompt.txt
Normal file
23
analysis/jobs/f452-1/prompt.txt
Normal file
@@ -0,0 +1,23 @@
|
||||
You are an expert policy analyst classifying public comments submitted to the Virginia Town Hall
|
||||
regulatory comment system. You will be given the text of a proposed regulation and a single
|
||||
public comment. Return ONLY a JSON object — no other text.
|
||||
|
||||
Definitions:
|
||||
- stance: the commenter's position on whether the regulation should be adopted.
|
||||
"support" = wants it approved (as-is or with changes);
|
||||
"oppose" = wants it rejected or substantially weakened;
|
||||
"neutral" = takes no position, asks a question, or provides factual input only;
|
||||
"unknown" = too vague, off-topic, or uninterpretable to classify.
|
||||
- tone: the emotional register of the writing, independent of stance.
|
||||
"positive" = affirming, hopeful, appreciative;
|
||||
"negative" = angry, fearful, alarmed, or contemptuous;
|
||||
"neutral" = matter-of-fact, procedural, or informational;
|
||||
"mixed" = contains both positive and negative emotional content;
|
||||
"unclear" = tone cannot be determined (e.g., a one-word comment).
|
||||
- stance_confidence: float 0.0-1.0, your confidence in the stance label.
|
||||
- stance_rationale: 1-3 sentences explaining the key evidence; quote specific phrases where possible.
|
||||
- tags: up to 5 short topic labels relevant to the comment's specific concerns (e.g.
|
||||
"parental rights", "student safety", "privacy", "religious freedom", "LGBTQ+ inclusion",
|
||||
"bullying prevention", "school sports", "bathroom access"). Empty array if none apply.
|
||||
|
||||
Return exactly these keys: stance, stance_confidence, stance_rationale, tone, tags.
|
||||
43
analysis/jobs/f452-1/report.json
Normal file
43
analysis/jobs/f452-1/report.json
Normal file
@@ -0,0 +1,43 @@
|
||||
{
|
||||
"prompt": "analysis\\prompt-1.txt",
|
||||
"prompt_hash": "cb41250",
|
||||
"input_file": "output\\f452.jsonl",
|
||||
"input_sha256": "59dcc8b13cc2a386977a8b934c498c7e639b7e684a94ca1bfd10a14878670018",
|
||||
"total_comments": 9083,
|
||||
"input_tokens": 6397254,
|
||||
"gpt-5.5": {
|
||||
"jobs": 9,
|
||||
"cost_$": 15.9931,
|
||||
"est_queue_days": 7.11
|
||||
},
|
||||
"gpt-5.4": {
|
||||
"jobs": 9,
|
||||
"cost_$": 7.9966,
|
||||
"est_queue_days": 7.11
|
||||
},
|
||||
"gpt-5.4-mini": {
|
||||
"jobs": 4,
|
||||
"cost_$": 2.399,
|
||||
"est_queue_days": 3.2
|
||||
},
|
||||
"gpt-5.4-nano": {
|
||||
"jobs": 40,
|
||||
"cost_$": 0.6397,
|
||||
"est_queue_days": 31.99
|
||||
},
|
||||
"gpt-4o": {
|
||||
"jobs": 9,
|
||||
"cost_$": 7.9966,
|
||||
"est_queue_days": 7.11
|
||||
},
|
||||
"gpt-4o-mini": {
|
||||
"jobs": 4,
|
||||
"cost_$": 0.4798,
|
||||
"est_queue_days": 3.2
|
||||
},
|
||||
"gpt-o4-mini": {
|
||||
"jobs": 4,
|
||||
"cost_$": 3.5185,
|
||||
"est_queue_days": 3.2
|
||||
}
|
||||
}
|
||||
57
analysis/jobs/f452-1/status.json
Normal file
57
analysis/jobs/f452-1/status.json
Normal file
@@ -0,0 +1,57 @@
|
||||
{
|
||||
"model": "gpt-5.4-mini",
|
||||
"prompt_hash": "cb41250",
|
||||
"input_file": "output\\f452.jsonl",
|
||||
"input_sha256": "59dcc8b13cc2a386977a8b934c498c7e639b7e684a94ca1bfd10a14878670018",
|
||||
"total_comments": 9083,
|
||||
"input_tokens": 6397254,
|
||||
"est_queue_days": 3.2,
|
||||
"cost_$": 2.399,
|
||||
"total_jobs": 4,
|
||||
"jobs": [
|
||||
{
|
||||
"job_num": 1,
|
||||
"run_id": "76c97113-63aa-43db-8f84-9c60ebcbb105",
|
||||
"status": "completed",
|
||||
"batch_id": "batch_69fb9081639881909be0c40d86edd747",
|
||||
"records_submitted": 2270,
|
||||
"records_completed": 2270,
|
||||
"records_failed": 0,
|
||||
"submitted_at": "2026-05-06T19:03:28.949240+00:00",
|
||||
"completed_at": "2026-05-06T20:09:14+00:00"
|
||||
},
|
||||
{
|
||||
"job_num": 2,
|
||||
"run_id": "b8f3b0bb-f155-4a5c-acce-f3504c0e09aa",
|
||||
"status": "completed",
|
||||
"batch_id": "batch_69fba02df7b481909e96afa1ee8879f5",
|
||||
"records_submitted": 2274,
|
||||
"records_completed": 2274,
|
||||
"records_failed": 0,
|
||||
"submitted_at": "2026-05-06T20:10:21.424330+00:00",
|
||||
"completed_at": "2026-05-06T20:37:11+00:00"
|
||||
},
|
||||
{
|
||||
"job_num": 3,
|
||||
"run_id": "8d769f37-6beb-4a1b-87ee-3f66cdc6adc8",
|
||||
"status": "completed",
|
||||
"batch_id": "batch_69fba69a85488190977792b6f95b614b",
|
||||
"records_submitted": 2282,
|
||||
"records_completed": 2282,
|
||||
"records_failed": 0,
|
||||
"submitted_at": "2026-05-06T20:37:45.586815+00:00",
|
||||
"completed_at": "2026-05-06T21:09:24+00:00"
|
||||
},
|
||||
{
|
||||
"job_num": 4,
|
||||
"run_id": "e6affbc2-ddc9-43a6-b8e9-d1f47e736283",
|
||||
"status": "completed",
|
||||
"batch_id": "batch_69fbe44565748190ad19f17ee3143f8d",
|
||||
"records_submitted": 2257,
|
||||
"records_completed": 2257,
|
||||
"records_failed": 0,
|
||||
"submitted_at": "2026-05-07T01:00:52.886953+00:00",
|
||||
"completed_at": "2026-05-07T09:20:01+00:00"
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
analysis_batch.py — OpenAI Batch API job runner
|
||||
openai_batch.py — OpenAI Batch API job runner
|
||||
|
||||
Run tokenizer.py first to generate report.json, then:
|
||||
create <report.json> --model <model> — build job directory
|
||||
@@ -8,7 +8,7 @@ Run tokenizer.py first to generate report.json, then:
|
||||
status [--job N] [--dir DIR] — check job status
|
||||
download [--job N] [--dir DIR] — download + normalize completed jobs
|
||||
|
||||
DIR is a name under analysis/gpt4o/jobs/ (default: most recently created).
|
||||
DIR is a name under analysis/jobs/ (default: most recently created).
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -52,17 +52,24 @@ _MODEL_ENCODING: dict[str, str] = {
|
||||
"gpt-4o-mini": "o200k_base",
|
||||
"gpt-o4-mini": "o200k_base",
|
||||
}
|
||||
_LIMIT_BUFFER = 0.90
|
||||
_LIMIT_BUFFER = 0.80
|
||||
|
||||
|
||||
def estimate_tokens(messages: list[dict], model: str) -> int:
|
||||
"""Exact token count via tiktoken; falls back to chars/3 + 4 overhead per message."""
|
||||
"""Token count per OpenAI cookbook chat formula; falls back to chars/3."""
|
||||
try:
|
||||
import tiktoken
|
||||
enc = tiktoken.get_encoding(_MODEL_ENCODING.get(model, "o200k_base"))
|
||||
return sum(4 + len(enc.encode(m["content"])) for m in messages)
|
||||
# Per OpenAI cookbook for gpt-4o: 3 overhead per message + role + content;
|
||||
# plus 3 tokens for the reply primer (<|start|>assistant<|message|>).
|
||||
total = 3 # reply primer
|
||||
for m in messages:
|
||||
total += 3
|
||||
total += len(enc.encode(m.get("role", "")))
|
||||
total += len(enc.encode(m["content"]))
|
||||
return total
|
||||
except ImportError:
|
||||
return sum(4 + len(m["content"]) // 3 for m in messages)
|
||||
return 3 + sum(3 + len(m["content"]) // 3 for m in messages)
|
||||
|
||||
|
||||
def chunk_comments_by_tokens(
|
||||
@@ -91,7 +98,7 @@ def chunk_comments_by_tokens(
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prompt
|
||||
|
||||
_DEFAULT_PROMPT_FILE = Path(__file__).parent.parent / "prompt-1.txt"
|
||||
_DEFAULT_PROMPT_FILE = Path(__file__).parent / "prompt-1.txt"
|
||||
SYSTEM_PROMPT = _DEFAULT_PROMPT_FILE.read_text(encoding="utf-8").strip()
|
||||
PROMPT_VERSION = hashlib.sha256(SYSTEM_PROMPT.encode("utf-8")).hexdigest()[:7]
|
||||
|
||||
@@ -375,7 +382,7 @@ def cmd_create(args) -> None:
|
||||
|
||||
print(f"Created: {job_dir.name}")
|
||||
print(f" {len(chunks)} job(s) | {len(comments)} comments | model: {args.model}")
|
||||
print(f"\nNext: python analysis/gpt4o/analysis_batch.py submit")
|
||||
print(f"\nNext: python analysis/openai_batch.py submit")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -431,7 +438,7 @@ def cmd_submit(args, client) -> None:
|
||||
save_status(status, job_dir)
|
||||
|
||||
print(f"Job {n} submitted: {batch.id} ({batch.status})")
|
||||
print(f" python analysis/gpt4o/analysis_batch.py status")
|
||||
print(f" python analysis/openai_batch.py status")
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1,12 +1,12 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
analysis/gpt4o/analysis-realtime.py — Synchronous GPT-4o pipeline for VA Townhall comments.
|
||||
analysis/openai_realtime.py — Synchronous GPT-4o pipeline for VA Townhall comments.
|
||||
|
||||
Usage:
|
||||
python analysis/gpt4o/analysis-realtime.py <input_jsonl> [--limit {5,10,20,50}] [--model MODEL]
|
||||
python analysis/openai_realtime.py <input_jsonl> [--limit {5,10,20,50}] [--model MODEL]
|
||||
|
||||
Output:
|
||||
analysis/gpt4o/forum{id}_{scrape_ts}_{model}_{run_ts}.jsonl
|
||||
analysis/forum{id}_{scrape_ts}_{model}_{run_ts}.jsonl
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -30,7 +30,7 @@ except ImportError:
|
||||
# ---------------------------------------------------------------------------
|
||||
# Prompt — loaded from analysis/prompt-1.txt at import time
|
||||
|
||||
_PROMPT_FILE = Path(__file__).parent.parent / "prompt-1.txt"
|
||||
_PROMPT_FILE = Path(__file__).parent / "prompt-1.txt"
|
||||
SYSTEM_PROMPT = _PROMPT_FILE.read_text(encoding="utf-8").strip()
|
||||
PROMPT_VERSION = hashlib.sha256(SYSTEM_PROMPT.encode("utf-8")).hexdigest()[:7]
|
||||
|
||||
@@ -3,10 +3,11 @@
|
||||
tokenizer.py — estimate token usage and cost for a batch analysis run.
|
||||
|
||||
Usage:
|
||||
python analysis/gpt4o/tokenizer.py output/f452.jsonl [--prompt analysis/prompt-1.txt]
|
||||
python analysis/tokenizer.py output/f452.jsonl [--prompt analysis/prompt-1.txt]
|
||||
python analysis/tokenizer.py analysis/jobs/f452-1/job1-input.jsonl # count actual tokens in a job
|
||||
|
||||
Prints a per-model comparison table and writes report.json next to the input file.
|
||||
Run this before analysis_batch.py create.
|
||||
Prints a per-model comparison table and writes reports/<stem>-report.json.
|
||||
Run this before openai_batch.py create.
|
||||
"""
|
||||
|
||||
import argparse
|
||||
@@ -17,7 +18,7 @@ import sys
|
||||
from pathlib import Path
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
import analysis_batch as _ab
|
||||
import openai_batch as _ab
|
||||
|
||||
# Input pricing ($/1M tokens, batch API) — from docs/openai.md, updated 2026-05-05.
|
||||
# Add Anthropic/other models here when needed; only models with a LIMITS entry are reported.
|
||||
@@ -66,6 +67,32 @@ def compute_report(
|
||||
return report
|
||||
|
||||
|
||||
def count_input_tokens(path: Path, model: str = "gpt-4o") -> dict:
|
||||
"""Count tokens in an existing job input JSONL (batch request format).
|
||||
|
||||
Each line must have body.messages (as written by build_batch_request_line).
|
||||
Returns {"total_tokens": int, "total_requests": int, "min": int, "max": int, "mean": float}.
|
||||
"""
|
||||
counts = []
|
||||
with open(path, encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
req = json.loads(line)
|
||||
messages = req["body"]["messages"]
|
||||
counts.append(_ab.estimate_tokens(messages, model))
|
||||
if not counts:
|
||||
return {"total_tokens": 0, "total_requests": 0, "min": 0, "max": 0, "mean": 0.0}
|
||||
return {
|
||||
"total_tokens": sum(counts),
|
||||
"total_requests": len(counts),
|
||||
"min": min(counts),
|
||||
"max": max(counts),
|
||||
"mean": round(sum(counts) / len(counts), 1),
|
||||
}
|
||||
|
||||
|
||||
def print_table(report: dict) -> None:
|
||||
"""Print a human-readable model comparison table to stdout."""
|
||||
print(f"\nInput: {report['input_file']}")
|
||||
@@ -90,11 +117,21 @@ def print_table(report: dict) -> None:
|
||||
print()
|
||||
|
||||
|
||||
def _is_job_input(path: Path) -> bool:
|
||||
"""Return True if this JSONL looks like a batch request file (has custom_id)."""
|
||||
with open(path, encoding="utf-8") as f:
|
||||
for line in f:
|
||||
line = line.strip()
|
||||
if line:
|
||||
return "custom_id" in json.loads(line)
|
||||
return False
|
||||
|
||||
|
||||
def main() -> None:
|
||||
_default_prompt = Path(__file__).parent.parent / "prompt-1.txt"
|
||||
_default_prompt = Path(__file__).parent / "prompt-1.txt"
|
||||
|
||||
parser = argparse.ArgumentParser(description="Estimate batch token usage and cost.")
|
||||
parser.add_argument("input", help="Scraped JSONL file")
|
||||
parser.add_argument("input", help="Scraped JSONL or job input JSONL (jobN-input.jsonl)")
|
||||
parser.add_argument(
|
||||
"--prompt",
|
||||
default=str(_default_prompt),
|
||||
@@ -106,6 +143,16 @@ def main() -> None:
|
||||
if not input_path.exists():
|
||||
sys.exit(f"File not found: {input_path}")
|
||||
|
||||
# --- Mode: count tokens in an existing job input file ---
|
||||
if _is_job_input(input_path):
|
||||
result = count_input_tokens(input_path)
|
||||
print(f"\nJob input: {input_path.name}")
|
||||
print(f" Requests : {result['total_requests']:,}")
|
||||
print(f" Tokens : {result['total_tokens']:,}")
|
||||
print(f" Per-req : min={result['min']} max={result['max']} mean={result['mean']}")
|
||||
return
|
||||
|
||||
# --- Mode: estimate from raw scrape file and write report.json ---
|
||||
prompt_path = Path(args.prompt)
|
||||
if not prompt_path.exists():
|
||||
sys.exit(f"Prompt file not found: {prompt_path}")
|
||||
@@ -131,10 +178,12 @@ def main() -> None:
|
||||
|
||||
print_table(report)
|
||||
|
||||
out_path = input_path.parent / "report.json"
|
||||
reports_dir = Path(__file__).parent.parent / "reports"
|
||||
reports_dir.mkdir(exist_ok=True)
|
||||
out_path = reports_dir / f"{input_path.stem}-report.json"
|
||||
out_path.write_text(json.dumps(report, indent=2, ensure_ascii=False), encoding="utf-8")
|
||||
print(f"Report written to: {out_path}")
|
||||
print(f"\nNext: python analysis/gpt4o/analysis_batch.py create {out_path} --model <model>")
|
||||
print(f"\nNext: python analysis/openai_batch.py create {out_path} --model <model>")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
File diff suppressed because one or more lines are too long
|
Before Width: | Height: | Size: 130 KiB |
@@ -1,9 +1,18 @@
|
||||
<mxfile host="app.diagrams.net">
|
||||
<diagram name="Page-1" id="0sW-Vs8X5usvYmJikUIv">
|
||||
<mxGraphModel dx="2179" dy="1118" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="0" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
|
||||
<mxGraphModel dx="1315" dy="798" grid="1" gridSize="10" guides="1" tooltips="1" connect="1" arrows="1" fold="1" page="0" pageScale="1" pageWidth="850" pageHeight="1100" math="0" shadow="0">
|
||||
<root>
|
||||
<mxCell id="0" />
|
||||
<mxCell id="1" parent="0" />
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-61" parent="1" style="rounded=0;whiteSpace=wrap;html=1;" value="" vertex="1">
|
||||
<mxGeometry height="90" width="190" x="1000" y="330" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-60" parent="1" style="rounded=0;whiteSpace=wrap;html=1;" value="" vertex="1">
|
||||
<mxGeometry height="90" width="190" x="1010" y="340" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-59" parent="1" style="rounded=0;whiteSpace=wrap;html=1;" value="" vertex="1">
|
||||
<mxGeometry height="90" width="190" x="1020" y="350" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-3" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-1" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=16.5;entryDy=0;entryPerimeter=0;" target="mENAtx_syaeSO5uR6kG6-29">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<mxPoint x="200" y="290" as="targetPoint" />
|
||||
@@ -18,18 +27,18 @@
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-5" parent="1" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" value="tokenizer" vertex="1">
|
||||
<mxGeometry height="60" width="120" x="400" y="170" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-6" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=center;verticalAlign=top;rounded=0;" value="gather forum data" vertex="1">
|
||||
<mxGeometry height="60" width="120" x="20" y="240" as="geometry" />
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-6" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;" value="<div align="left">- collect forum data</div>" vertex="1">
|
||||
<mxGeometry height="60" width="120" x="40" y="240" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-7" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;" value="<div>tokenize forum,</div><div>generate report w/</div><div>recommendations</div>" vertex="1">
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-7" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;" value="<div>- tokenize forum</div><div>- generate report w/</div><div>recommendations</div>" vertex="1">
|
||||
<mxGeometry height="60" width="120" x="400" y="240" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-28" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;entryPerimeter=0;" target="mENAtx_syaeSO5uR6kG6-35">
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-28" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" target="mENAtx_syaeSO5uR6kG6-73">
|
||||
<mxGeometry relative="1" as="geometry">
|
||||
<mxPoint x="910" y="270" as="targetPoint" />
|
||||
<mxPoint x="953" y="240" as="targetPoint" />
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-19" parent="1" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" value="batch" vertex="1">
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-19" parent="1" style="shape=process;whiteSpace=wrap;html=1;backgroundOutline=1;" value="openai_batch" vertex="1">
|
||||
<mxGeometry height="60" width="120" x="720" y="170" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-21" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=right;verticalAlign=top;rounded=0;fontFamily=Courier New;" value="<div>--model</div><div>--limit</div>" vertex="1">
|
||||
@@ -38,11 +47,8 @@
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-23" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=right;verticalAlign=top;rounded=0;fontFamily=Courier New;" value="--forum" vertex="1">
|
||||
<mxGeometry height="60" width="120" x="-90" y="170" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-25" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=right;verticalAlign=top;rounded=0;fontFamily=Courier New;" value="--prompt" vertex="1">
|
||||
<mxGeometry height="60" width="120" x="270" y="210" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-26" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;" value="<div>split job into batches</div><div>submit first batch</div><div>status of current batch</div><div>download batch artifacts</div>" vertex="1">
|
||||
<mxGeometry height="60" width="120" x="720" y="240" as="geometry" />
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-26" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;" value="<div>- split job into batches</div><div>- submit first batch</div><div>- status of current batch</div><div>- download batch artifacts</div>" vertex="1">
|
||||
<mxGeometry height="70" width="140" x="720" y="240" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-29" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="210" y="240" as="geometry" />
|
||||
@@ -58,7 +64,7 @@
|
||||
</Array>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-31" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="<div>forum</div><div>.jsonl</div>" vertex="1">
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-31" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="<div>&lt;forumid&gt;</div><div>.jsonl</div>" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="230" y="260" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-47" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-34" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=50;exitDy=43.5;exitPerimeter=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" target="mENAtx_syaeSO5uR6kG6-19">
|
||||
@@ -69,30 +75,42 @@
|
||||
</Array>
|
||||
</mxGeometry>
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-34" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="<div>report</div><div>.json</div>" vertex="1">
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-34" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="<div><br></div><div>&lt;forumid&gt;<br>-report</div><div>.json</div>" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="560" y="240" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-35" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="job.json" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="890" y="240" as="geometry" />
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-35" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="<div>status</div><div>.json</div>" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="913.25" y="360" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-41" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="940" y="340" as="geometry" />
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-43" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="<div>jobN-</div><div>output</div><div>.jsonl</div>" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="1090" y="360" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-42" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="950" y="350" as="geometry" />
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-48" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="<div>jobN-errors</div><div>.jsonl</div>" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="1150" y="360" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-43" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="<div>batchN-</div><div>output-</div><div>.jsonl</div>" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="960" y="360" as="geometry" />
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-54" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="<div>jobN-</div><div>input</div><div>.jsonl</div>" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="1030" y="360" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-48" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="<div>errors</div><div>.jsonl</div>" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="980" y="240" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-51" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=16.5;entryDy=0;entryPerimeter=0;" target="mENAtx_syaeSO5uR6kG6-41">
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-64" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-63" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=0;exitY=0;exitDx=50;exitDy=43.5;exitPerimeter=0;entryX=0;entryY=0.5;entryDx=0;entryDy=0;" target="mENAtx_syaeSO5uR6kG6-5">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-53" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-19" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;exitX=1;exitY=0.5;exitDx=0;exitDy=0;entryX=0;entryY=0;entryDx=16.5;entryDy=0;entryPerimeter=0;" target="mENAtx_syaeSO5uR6kG6-48">
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-63" parent="1" style="shape=note;whiteSpace=wrap;html=1;backgroundOutline=1;darkOpacity=0.05;size=17;" value="<div>prompt</div><div>.txt</div>" vertex="1">
|
||||
<mxGeometry height="70" width="50" x="270" y="90" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-67" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;fontFamily=Courier New;" value="create" vertex="1">
|
||||
<mxGeometry height="20" width="120" x="850" y="170" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-71" parent="1" style="text;html=1;whiteSpace=wrap;strokeColor=none;fillColor=none;align=left;verticalAlign=top;rounded=0;fontFamily=Courier New;" value="<div>submit</div><div><br></div><div>status</div><div>download</div>" vertex="1">
|
||||
<mxGeometry height="60" width="120" x="1020" y="240" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-75" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-73" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;" target="mENAtx_syaeSO5uR6kG6-35">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-76" edge="1" parent="1" source="mENAtx_syaeSO5uR6kG6-73" style="edgeStyle=orthogonalEdgeStyle;rounded=0;orthogonalLoop=1;jettySize=auto;html=1;entryX=0.5;entryY=0;entryDx=0;entryDy=0;" target="mENAtx_syaeSO5uR6kG6-61">
|
||||
<mxGeometry relative="1" as="geometry" />
|
||||
</mxCell>
|
||||
<mxCell id="mENAtx_syaeSO5uR6kG6-73" parent="1" style="image;aspect=fixed;perimeter=ellipsePerimeter;html=1;align=center;shadow=0;dashed=0;spacingTop=3;image=img/lib/active_directory/folder.svg;" value="&lt;forumid&gt;-N" vertex="1">
|
||||
<mxGeometry height="50" width="36.5" x="920" y="240" as="geometry" />
|
||||
</mxCell>
|
||||
</root>
|
||||
</mxGraphModel>
|
||||
</diagram>
|
||||
|
||||
4
docs/pipeline-v1.2.3.svg
Normal file
4
docs/pipeline-v1.2.3.svg
Normal file
File diff suppressed because one or more lines are too long
|
After Width: | Height: | Size: 170 KiB |
@@ -201,44 +201,48 @@ batch processing should be a resumable job queue, not a one-shot script. the us
|
||||
- remaining-comment detection
|
||||
|
||||
** notes
|
||||
- analysis/gpt4o/tokenizer.py: new standalone script; imports analysis_batch for MODEL_LIMITS, estimate_tokens, build_messages. Reads input JSONL + prompt, computes per-model jobs/cost/time table, writes report.json to input file's directory. MODEL_PRICING dict lives here (not in analysis_batch).
|
||||
- analysis/gpt4o/analysis_batch.py: fully rewritten with four subcommands: create, submit, status, download. No longer uses REQUESTS_DIR / RAW_DIR / RUNS_DIR.
|
||||
- Job directories: analysis/gpt4o/jobs/<stem[:8]>-N/ (e.g. f452-1). Each run is self-contained: forum.jsonl, prompt.txt, report.json, jobN-input.jsonl, jobN-output-raw.jsonl, jobN-output.jsonl, jobN-errors.jsonl.
|
||||
- analysis/tokenizer.py: new standalone script; imports openai_batch for MODEL_LIMITS, estimate_tokens, build_messages. Reads input JSONL + prompt, computes per-model jobs/cost/time table, writes reports/<stem>-report.json. MODEL_PRICING dict lives here (not in openai_batch). Pass a jobN-input.jsonl to count actual tokens instead.
|
||||
- analysis/openai_batch.py: fully rewritten with four subcommands: create, submit, status, download. Job dirs at analysis/jobs/<stem[:8]>-N/.
|
||||
- Job directories: analysis/jobs/<stem[:8]>-N/ (e.g. f452-1). Each run is self-contained: forum.jsonl, prompt.txt, report.json, jobN-input.jsonl, jobN-output-raw.jsonl, jobN-output.jsonl, jobN-errors.jsonl.
|
||||
- status.json: tracks all jobs with pending/submitted/in_progress/completed/failed states. Updated by submit, status, download.
|
||||
- _find_next_eligible_job: pure function for testability. Returns (next_pending_job, None) or (None, warning). Blocks submission if previous job is in_progress/submitted.
|
||||
- create: no API key required. Reads report.json, re-chunks comments, writes all jobN-input.jsonl files, writes status.json.
|
||||
- submit: uploads jobN-input.jsonl to Files API, creates batch, updates status.json to 'submitted'. Will not stack batches.
|
||||
- status: retrieves batch from OpenAI, updates status.json counts and status.
|
||||
- download: auto-runs status first, downloads output_file_id → jobN-output-raw.jsonl, error_file_id → jobN-errors.jsonl, normalizes → jobN-output.jsonl. Updates status.json.
|
||||
- tests/test_tokenizer.py: 15 tests for compute_report schema, cost/time calculation, MODEL_PRICING coverage, print_table output, report.json round-trip.
|
||||
- tests/tokenizer.py: 19 tests for compute_report schema, cost/time calculation, MODEL_PRICING coverage, print_table output, count_input_tokens, report.json round-trip.
|
||||
- Token limit buffer: _LIMIT_BUFFER=0.80 (20% headroom). Estimate uses OpenAI cookbook chat formula (role tokens + 3-token reply primer). Verify a job file with: python analysis/tokenizer.py analysis/jobs/<dir>/jobN-input.jsonl
|
||||
|
||||
*** usage
|
||||
#+begin_src sh
|
||||
#+begin_src powershell
|
||||
# 1. estimate tokens and cost
|
||||
python analysis/gpt4o/tokenizer.py output/f452.jsonl --prompt analysis/prompt-1.txt
|
||||
# writes output/report.json
|
||||
python analysis/tokenizer.py output/f452.jsonl --prompt analysis/prompt-1.txt
|
||||
# writes reports/f452-report.json
|
||||
|
||||
# 2. create job directory (no api key needed)
|
||||
python analysis/gpt4o/analysis_batch.py create output/report.json --model gpt-4o-mini
|
||||
# creates analysis/gpt4o/jobs/f452-1/
|
||||
# 2. verify actual tokens in a job file (optional sanity check)
|
||||
python analysis/tokenizer.py analysis/jobs/f452-1/job1-input.jsonl
|
||||
|
||||
# 3. submit first job
|
||||
python analysis/gpt4o/analysis_batch.py submit
|
||||
# 3. create job directory (no api key needed)
|
||||
python analysis/openai_batch.py create reports/f452-report.json --model gpt-5.4-mini
|
||||
# creates analysis/jobs/f452-1/
|
||||
|
||||
# 4. check status (repeat until completed)
|
||||
python analysis/gpt4o/analysis_batch.py status
|
||||
# 4. submit first job
|
||||
python analysis/openai_batch.py submit
|
||||
|
||||
# 5. download and normalize
|
||||
python analysis/gpt4o/analysis_batch.py download
|
||||
# 5. check status (repeat until completed)
|
||||
python analysis/openai_batch.py status
|
||||
|
||||
# 6. submit next job (if multi-job run), then repeat 4-5
|
||||
python analysis/gpt4o/analysis_batch.py submit
|
||||
# 6. download and normalize
|
||||
python analysis/openai_batch.py download
|
||||
|
||||
# 7. submit next job (if multi-job run), then repeat 5-6
|
||||
python analysis/openai_batch.py submit
|
||||
#+end_src
|
||||
|
||||
** evidence
|
||||
- commit:
|
||||
- tests: passing (pytest tests/analysis_gpt4o_batch.py tests/test_tokenizer.py)
|
||||
- datetime: [2026-05-05 Tue]
|
||||
- tests: passing (pytest tests/openai_batch.py tests/openai_realtime.py tests/tokenizer.py)
|
||||
- datetime: [2026-05-06 Wed]
|
||||
|
||||
* === Backlog ===
|
||||
* [ ] X: analysis validation view
|
||||
|
||||
43
reports/f452-1.json
Normal file
43
reports/f452-1.json
Normal file
@@ -0,0 +1,43 @@
|
||||
{
|
||||
"prompt": "analysis\\prompt-1.txt",
|
||||
"prompt_hash": "cb41250",
|
||||
"input_file": "output\\f452.jsonl",
|
||||
"input_sha256": "59dcc8b13cc2a386977a8b934c498c7e639b7e684a94ca1bfd10a14878670018",
|
||||
"total_comments": 9083,
|
||||
"input_tokens": 6397254,
|
||||
"gpt-5.5": {
|
||||
"jobs": 9,
|
||||
"cost_$": 15.9931,
|
||||
"est_queue_days": 7.11
|
||||
},
|
||||
"gpt-5.4": {
|
||||
"jobs": 9,
|
||||
"cost_$": 7.9966,
|
||||
"est_queue_days": 7.11
|
||||
},
|
||||
"gpt-5.4-mini": {
|
||||
"jobs": 4,
|
||||
"cost_$": 2.399,
|
||||
"est_queue_days": 3.2
|
||||
},
|
||||
"gpt-5.4-nano": {
|
||||
"jobs": 40,
|
||||
"cost_$": 0.6397,
|
||||
"est_queue_days": 31.99
|
||||
},
|
||||
"gpt-4o": {
|
||||
"jobs": 9,
|
||||
"cost_$": 7.9966,
|
||||
"est_queue_days": 7.11
|
||||
},
|
||||
"gpt-4o-mini": {
|
||||
"jobs": 4,
|
||||
"cost_$": 0.4798,
|
||||
"est_queue_days": 3.2
|
||||
},
|
||||
"gpt-o4-mini": {
|
||||
"jobs": 4,
|
||||
"cost_$": 3.5185,
|
||||
"est_queue_days": 3.2
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Unit tests for analysis/gpt4o/analysis_batch.py — no real API calls."""
|
||||
"""Unit tests for analysis/openai_batch.py — no real API calls."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
@@ -7,8 +7,8 @@ from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "analysis" / "gpt4o"))
|
||||
import analysis_batch as bt
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "analysis"))
|
||||
import openai_batch as bt
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -101,7 +101,7 @@ def test_prompt_version_is_7_hex_chars():
|
||||
|
||||
def test_prompt_version_matches_realtime():
|
||||
"""Both scripts must derive the same PROMPT_VERSION from the same file."""
|
||||
import analysis_realtime as rt
|
||||
import openai_realtime as rt
|
||||
assert bt.PROMPT_VERSION == rt.PROMPT_VERSION
|
||||
|
||||
|
||||
@@ -242,7 +242,8 @@ def test_estimate_tokens_fallback_without_tiktoken(monkeypatch):
|
||||
monkeypatch.setitem(_sys.modules, "tiktoken", None)
|
||||
messages = [{"role": "user", "content": "x" * 300}]
|
||||
result = bt.estimate_tokens(messages, "gpt-4o")
|
||||
assert result == 4 + 300 // 3
|
||||
# fallback: 3 primer + (3 + 300//3) per message
|
||||
assert result == 3 + (3 + 300 // 3)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Unit tests for analysis/gpt4o/analysis_realtime.py — no real API calls."""
|
||||
"""Unit tests for analysis/openai_realtime.py — no real API calls."""
|
||||
|
||||
import json
|
||||
import sys
|
||||
@@ -7,8 +7,8 @@ from unittest.mock import MagicMock
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "analysis" / "gpt4o"))
|
||||
import analysis_realtime as rt
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "analysis"))
|
||||
import openai_realtime as rt
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -1,4 +1,4 @@
|
||||
"""Unit tests for analysis/gpt4o/tokenizer.py — no real API calls."""
|
||||
"""Unit tests for analysis/tokenizer.py — no real API calls."""
|
||||
|
||||
import io
|
||||
import json
|
||||
@@ -9,9 +9,9 @@ from unittest.mock import patch
|
||||
|
||||
import pytest
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "analysis" / "gpt4o"))
|
||||
sys.path.insert(0, str(Path(__file__).parent.parent / "analysis"))
|
||||
import tokenizer as tk
|
||||
import analysis_batch as ab
|
||||
import openai_batch as ab
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
@@ -199,3 +199,52 @@ def test_report_json_roundtrip(tmp_path):
|
||||
assert loaded["total_comments"] == report["total_comments"]
|
||||
assert loaded["input_tokens"] == report["input_tokens"]
|
||||
assert loaded["gpt-4o-mini"]["jobs"] == report["gpt-4o-mini"]["jobs"]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# count_input_tokens
|
||||
|
||||
def _make_job_input(tmp_path, comments, forum=None) -> Path:
|
||||
"""Write a batch request JSONL in the same format as job1-input.jsonl."""
|
||||
p = tmp_path / "job1-input.jsonl"
|
||||
with open(p, "w", encoding="utf-8") as f:
|
||||
for c in comments:
|
||||
f.write(json.dumps(ab.build_batch_request_line(c, forum, "gpt-4o-mini")) + "\n")
|
||||
return p
|
||||
|
||||
|
||||
def test_count_input_tokens_matches_estimate(tmp_path):
|
||||
"""count_input_tokens on a freshly written job file equals the sum estimate_tokens produces."""
|
||||
p = _make_job_input(tmp_path, COMMENTS, FORUM_ITEM)
|
||||
result = tk.count_input_tokens(p, "gpt-4o-mini")
|
||||
expected = sum(
|
||||
ab.estimate_tokens(ab.build_messages(c, FORUM_ITEM)[0], "gpt-4o-mini")
|
||||
for c in COMMENTS
|
||||
)
|
||||
assert result["total_tokens"] == expected
|
||||
assert result["total_requests"] == len(COMMENTS)
|
||||
|
||||
|
||||
def test_count_input_tokens_fields(tmp_path):
|
||||
p = _make_job_input(tmp_path, COMMENTS, FORUM_ITEM)
|
||||
result = tk.count_input_tokens(p)
|
||||
assert set(result.keys()) == {"total_tokens", "total_requests", "min", "max", "mean"}
|
||||
assert result["min"] <= result["mean"] <= result["max"]
|
||||
assert result["min"] > 0
|
||||
|
||||
|
||||
def test_count_input_tokens_empty_file(tmp_path):
|
||||
p = tmp_path / "empty.jsonl"
|
||||
p.write_text("", encoding="utf-8")
|
||||
result = tk.count_input_tokens(p)
|
||||
assert result["total_tokens"] == 0
|
||||
assert result["total_requests"] == 0
|
||||
|
||||
|
||||
def test_count_input_tokens_includes_system_prompt(tmp_path):
|
||||
"""Token count must be higher than user-message-only text length / 3 (prompt adds tokens)."""
|
||||
p = _make_job_input(tmp_path, [COMMENT_A], FORUM_ITEM)
|
||||
result = tk.count_input_tokens(p)
|
||||
user_chars = len(COMMENT_A.get("text", ""))
|
||||
# system prompt alone is hundreds of tokens; total must exceed naive user-text estimate
|
||||
assert result["total_tokens"] > user_chars // 3
|
||||
|
||||
Reference in New Issue
Block a user