618 lines
21 KiB
Python
618 lines
21 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
analysis_batch.py — OpenAI Batch API job runner
|
|
|
|
Run tokenizer.py first to generate report.json, then:
|
|
create <report.json> --model <model> — build job directory
|
|
submit [--job N] [--dir DIR] — submit next eligible job
|
|
status [--job N] [--dir DIR] — check job status
|
|
download [--job N] [--dir DIR] — download + normalize completed jobs
|
|
|
|
DIR is a name under analysis/gpt4o/jobs/ (default: most recently created).
|
|
"""
|
|
|
|
import argparse
|
|
import hashlib
|
|
import json
|
|
import os
|
|
import shutil
|
|
import sys
|
|
import uuid
|
|
from datetime import datetime, timezone
|
|
from pathlib import Path
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
try:
|
|
import openai
|
|
except ImportError:
|
|
sys.exit("openai package not installed. Run: pip install openai")
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Model limits and token estimation
|
|
|
|
# Max enqueued tokens across ALL concurrent batches (docs/openai.md, 2026-05-05).
|
|
# Org-tier limits may be lower; use --job to limit submission size if needed.
|
|
MODEL_LIMITS: dict[str, int] = {
|
|
"gpt-5.5": 900_000,
|
|
"gpt-5.4": 900_000,
|
|
"gpt-5.4-mini": 2_000_000,
|
|
"gpt-5.4-nano": 200_000,
|
|
"gpt-4o": 900_000,
|
|
"gpt-4o-mini": 2_000_000,
|
|
"gpt-o4-mini": 2_000_000,
|
|
}
|
|
_DEFAULT_TOKEN_LIMIT = 900_000
|
|
_MODEL_ENCODING: dict[str, str] = {
|
|
"gpt-5.5": "o200k_base",
|
|
"gpt-5.4": "o200k_base",
|
|
"gpt-5.4-mini": "o200k_base",
|
|
"gpt-5.4-nano": "o200k_base",
|
|
"gpt-4o": "o200k_base",
|
|
"gpt-4o-mini": "o200k_base",
|
|
"gpt-o4-mini": "o200k_base",
|
|
}
|
|
_LIMIT_BUFFER = 0.90
|
|
|
|
|
|
def estimate_tokens(messages: list[dict], model: str) -> int:
|
|
"""Exact token count via tiktoken; falls back to chars/3 + 4 overhead per message."""
|
|
try:
|
|
import tiktoken
|
|
enc = tiktoken.get_encoding(_MODEL_ENCODING.get(model, "o200k_base"))
|
|
return sum(4 + len(enc.encode(m["content"])) for m in messages)
|
|
except ImportError:
|
|
return sum(4 + len(m["content"]) // 3 for m in messages)
|
|
|
|
|
|
def chunk_comments_by_tokens(
|
|
comments: list[dict], forum: dict | None, model: str
|
|
) -> list[list[dict]]:
|
|
"""Greedy bin-pack comments into chunks that fit under the model TPD limit."""
|
|
token_limit = int(MODEL_LIMITS.get(model, _DEFAULT_TOKEN_LIMIT) * _LIMIT_BUFFER)
|
|
chunks: list[list[dict]] = []
|
|
current: list[dict] = []
|
|
current_tokens = 0
|
|
for comment in comments:
|
|
messages, _ = build_messages(comment, forum)
|
|
tokens = estimate_tokens(messages, model)
|
|
if current and current_tokens + tokens > token_limit:
|
|
chunks.append(current)
|
|
current = [comment]
|
|
current_tokens = tokens
|
|
else:
|
|
current.append(comment)
|
|
current_tokens += tokens
|
|
if current:
|
|
chunks.append(current)
|
|
return chunks
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Prompt
|
|
|
|
_DEFAULT_PROMPT_FILE = Path(__file__).parent.parent / "prompt-1.txt"
|
|
SYSTEM_PROMPT = _DEFAULT_PROMPT_FILE.read_text(encoding="utf-8").strip()
|
|
PROMPT_VERSION = hashlib.sha256(SYSTEM_PROMPT.encode("utf-8")).hexdigest()[:7]
|
|
|
|
|
|
def _load_prompt(path: Path) -> None:
|
|
global SYSTEM_PROMPT, PROMPT_VERSION
|
|
SYSTEM_PROMPT = path.read_text(encoding="utf-8").strip()
|
|
PROMPT_VERSION = hashlib.sha256(SYSTEM_PROMPT.encode("utf-8")).hexdigest()[:7]
|
|
|
|
|
|
USER_TEMPLATE = """\
|
|
## Proposed Regulation
|
|
Title: {reg_title}
|
|
Description: {reg_desc}
|
|
|
|
---
|
|
|
|
## Public Comment
|
|
Comment ID: {comment_id}
|
|
Title: {comment_title}
|
|
Body:
|
|
{comment_text}
|
|
|
|
---
|
|
Classify this comment per the instructions. Return only JSON.\
|
|
"""
|
|
|
|
MAX_COMMENT_CHARS = 6000
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Directories
|
|
|
|
_SCRIPT_DIR = Path(__file__).parent
|
|
JOBS_DIR = _SCRIPT_DIR / "jobs"
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Core functions (importable for tests)
|
|
|
|
|
|
def load_items(path: Path) -> tuple[dict | None, list[dict]]:
|
|
"""Read a scraped JSONL. Returns (forum_item_or_None, [comment_items])."""
|
|
forum = None
|
|
comments = []
|
|
with open(path, encoding="utf-8") as f:
|
|
for line in f:
|
|
line = line.strip()
|
|
if not line:
|
|
continue
|
|
item = json.loads(line)
|
|
if "comment_id" in item:
|
|
comments.append(item)
|
|
elif "reg_title" in item:
|
|
forum = item
|
|
return forum, comments
|
|
|
|
|
|
def custom_id_from(comment_id: str) -> str:
|
|
return f"comment_{comment_id}"
|
|
|
|
|
|
def parse_custom_id(custom_id: str) -> str:
|
|
return custom_id.removeprefix("comment_")
|
|
|
|
|
|
def build_messages(comment: dict, forum: dict | None) -> tuple[list, bool]:
|
|
"""Build OpenAI messages for one comment. Returns (messages, truncated)."""
|
|
reg_title = (forum or {}).get("reg_title", "[unknown]")
|
|
reg_desc = (forum or {}).get("reg_desc", "[unknown]")
|
|
body = (comment.get("text") or "").strip()
|
|
truncated = False
|
|
if not body:
|
|
body = "[No body text provided]"
|
|
elif len(body) > MAX_COMMENT_CHARS:
|
|
body = body[:MAX_COMMENT_CHARS] + "... [truncated]"
|
|
truncated = True
|
|
user_text = USER_TEMPLATE.format(
|
|
reg_title=reg_title,
|
|
reg_desc=reg_desc,
|
|
comment_id=comment.get("comment_id", ""),
|
|
comment_title=comment.get("title", ""),
|
|
comment_text=body,
|
|
)
|
|
return [
|
|
{"role": "system", "content": SYSTEM_PROMPT},
|
|
{"role": "user", "content": user_text},
|
|
], truncated
|
|
|
|
|
|
def build_batch_request_line(comment: dict, forum: dict | None, model: str) -> dict:
|
|
messages, _ = build_messages(comment, forum)
|
|
return {
|
|
"custom_id": custom_id_from(comment["comment_id"]),
|
|
"method": "POST",
|
|
"url": "/v1/chat/completions",
|
|
"body": {
|
|
"model": model,
|
|
"messages": messages,
|
|
"response_format": {"type": "json_object"},
|
|
"temperature": 0.0,
|
|
},
|
|
}
|
|
|
|
|
|
def normalize_output_line(
|
|
raw_line: dict,
|
|
comment_lookup: dict,
|
|
run_id: str,
|
|
analyzed_at: str,
|
|
model: str,
|
|
prompt_version: str,
|
|
) -> dict:
|
|
"""Convert one raw batch output line into a normalized analysis record."""
|
|
comment_id = parse_custom_id(raw_line.get("custom_id", ""))
|
|
comment = comment_lookup.get(comment_id, {})
|
|
base = {
|
|
"run_id": run_id,
|
|
"forum_id": comment.get("forum_id", ""),
|
|
"comment_id": comment_id,
|
|
"analyzed_at": analyzed_at,
|
|
"model": model,
|
|
"prompt_version": prompt_version,
|
|
"input_title": comment.get("title", ""),
|
|
"truncated": len(comment.get("text") or "") > MAX_COMMENT_CHARS,
|
|
}
|
|
if raw_line.get("error"):
|
|
err = raw_line["error"]
|
|
err_msg = err.get("message", str(err)) if isinstance(err, dict) else str(err)
|
|
return {**base, "stance": None, "stance_confidence": None,
|
|
"stance_rationale": None, "tone": None, "tags": None, "error": err_msg}
|
|
response = raw_line.get("response") or {}
|
|
if response.get("status_code") != 200:
|
|
return {**base, "stance": None, "stance_confidence": None,
|
|
"stance_rationale": None, "tone": None, "tags": None,
|
|
"error": f"status {response.get('status_code')}"}
|
|
try:
|
|
content = response["body"]["choices"][0]["message"]["content"]
|
|
data = json.loads(content)
|
|
keys = ("stance", "stance_confidence", "stance_rationale", "tone", "tags")
|
|
parsed = {k: data.get(k) for k in keys}
|
|
return {**base, **parsed, "error": None}
|
|
except Exception as exc:
|
|
return {**base, "stance": None, "stance_confidence": None,
|
|
"stance_rationale": None, "tone": None, "tags": None, "error": str(exc)}
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Job directory management
|
|
|
|
|
|
def _next_job_dir(stem: str) -> Path:
|
|
base = stem[:8]
|
|
i = 1
|
|
while (JOBS_DIR / f"{base}-{i}").exists():
|
|
i += 1
|
|
return JOBS_DIR / f"{base}-{i}"
|
|
|
|
|
|
def _latest_job_dir() -> Path:
|
|
if not JOBS_DIR.exists():
|
|
sys.exit(f"No jobs directory found. Run 'create' first.")
|
|
status_files = list(JOBS_DIR.glob("*/status.json"))
|
|
if not status_files:
|
|
sys.exit(f"No jobs found in {JOBS_DIR}. Run 'create' first.")
|
|
return max(status_files, key=lambda p: p.stat().st_mtime).parent
|
|
|
|
|
|
def _resolve_job_dir(args) -> Path:
|
|
if getattr(args, "dir", None):
|
|
d = Path(args.dir)
|
|
if not d.is_absolute():
|
|
d = JOBS_DIR / d
|
|
if not d.exists():
|
|
sys.exit(f"Job directory not found: {d}")
|
|
return d
|
|
return _latest_job_dir()
|
|
|
|
|
|
def load_status(job_dir: Path) -> dict:
|
|
return json.loads((job_dir / "status.json").read_text(encoding="utf-8"))
|
|
|
|
|
|
def save_status(status: dict, job_dir: Path) -> None:
|
|
(job_dir / "status.json").write_text(
|
|
json.dumps(status, indent=2, ensure_ascii=False), encoding="utf-8"
|
|
)
|
|
|
|
|
|
def _find_next_eligible_job(jobs: list[dict]) -> tuple[dict | None, str | None]:
|
|
"""Return (next_pending_job, None) or (None, warning_message).
|
|
|
|
A job is eligible when it is 'pending' and either it is the first job
|
|
or its predecessor is 'completed'.
|
|
"""
|
|
for j in jobs:
|
|
if j["status"] != "pending":
|
|
continue
|
|
if j["job_num"] == 1:
|
|
return j, None
|
|
prev = next(p for p in jobs if p["job_num"] == j["job_num"] - 1)
|
|
if prev["status"] == "completed":
|
|
return j, None
|
|
if prev["status"] in ("submitted", "in_progress", "validating", "finalizing"):
|
|
return None, (
|
|
f"Job {prev['job_num']} is '{prev['status']}'. "
|
|
f"Wait for it to complete before submitting job {j['job_num']}."
|
|
)
|
|
return None, None
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Subcommand: create
|
|
|
|
|
|
def cmd_create(args) -> None:
|
|
report_path = Path(args.report)
|
|
if not report_path.exists():
|
|
sys.exit(f"Report not found: {report_path}")
|
|
|
|
report = json.loads(report_path.read_text(encoding="utf-8"))
|
|
|
|
if args.model not in report or not isinstance(report[args.model], dict):
|
|
available = [k for k in report if isinstance(report.get(k), dict)]
|
|
sys.exit(f"Model '{args.model}' not in report. Available: {', '.join(available)}")
|
|
|
|
prompt_path = Path(report["prompt"])
|
|
if not prompt_path.exists():
|
|
sys.exit(f"Prompt file not found: {prompt_path}")
|
|
_load_prompt(prompt_path)
|
|
|
|
input_path = Path(report["input_file"])
|
|
if not input_path.exists():
|
|
sys.exit(f"Input file not found: {input_path}")
|
|
forum, comments = load_items(input_path)
|
|
if not comments:
|
|
sys.exit("No comment items found in input file.")
|
|
|
|
chunks = chunk_comments_by_tokens(comments, forum, args.model)
|
|
|
|
stem = input_path.stem[:8]
|
|
job_dir = _next_job_dir(stem)
|
|
JOBS_DIR.mkdir(parents=True, exist_ok=True)
|
|
job_dir.mkdir()
|
|
|
|
shutil.copy2(input_path, job_dir / "forum.jsonl")
|
|
shutil.copy2(prompt_path, job_dir / "prompt.txt")
|
|
shutil.copy2(report_path, job_dir / "report.json")
|
|
|
|
jobs_meta = []
|
|
for i, chunk in enumerate(chunks, start=1):
|
|
req_path = job_dir / f"job{i}-input.jsonl"
|
|
with open(req_path, "w", encoding="utf-8") as f:
|
|
for comment in chunk:
|
|
f.write(json.dumps(build_batch_request_line(comment, forum, args.model),
|
|
ensure_ascii=False) + "\n")
|
|
jobs_meta.append({
|
|
"job_num": i,
|
|
"run_id": str(uuid.uuid4()),
|
|
"status": "pending",
|
|
"batch_id": None,
|
|
"records_submitted": len(chunk),
|
|
"records_completed": None,
|
|
"records_failed": None,
|
|
"submitted_at": None,
|
|
"completed_at": None,
|
|
})
|
|
|
|
model_info = report[args.model]
|
|
status = {
|
|
"model": args.model,
|
|
"prompt_hash": report["prompt_hash"],
|
|
"input_file": str(input_path),
|
|
"input_sha256": report["input_sha256"],
|
|
"total_comments": report["total_comments"],
|
|
"input_tokens": report["input_tokens"],
|
|
"est_queue_days": model_info["est_queue_days"],
|
|
"cost_$": model_info["cost_$"],
|
|
"total_jobs": len(chunks),
|
|
"jobs": jobs_meta,
|
|
}
|
|
save_status(status, job_dir)
|
|
|
|
print(f"Created: {job_dir.name}")
|
|
print(f" {len(chunks)} job(s) | {len(comments)} comments | model: {args.model}")
|
|
print(f"\nNext: python analysis/gpt4o/analysis_batch.py submit")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Subcommand: submit
|
|
|
|
|
|
def cmd_submit(args, client) -> None:
|
|
job_dir = _resolve_job_dir(args)
|
|
status = load_status(job_dir)
|
|
jobs = status["jobs"]
|
|
|
|
if args.job:
|
|
target = next((j for j in jobs if j["job_num"] == args.job), None)
|
|
if target is None:
|
|
sys.exit(f"Job {args.job} not found in {job_dir.name}.")
|
|
if target["status"] != "pending":
|
|
sys.exit(f"Job {args.job} is already '{target['status']}' — cannot resubmit.")
|
|
if target["job_num"] > 1:
|
|
prev = next(p for p in jobs if p["job_num"] == target["job_num"] - 1)
|
|
if prev["status"] != "completed":
|
|
sys.exit(
|
|
f"Cannot submit job {target['job_num']}: "
|
|
f"job {prev['job_num']} is '{prev['status']}' (must be 'completed')."
|
|
)
|
|
else:
|
|
target, warning = _find_next_eligible_job(jobs)
|
|
if warning:
|
|
print(warning, file=sys.stderr)
|
|
sys.exit(1)
|
|
if target is None:
|
|
all_done = all(j["status"] == "completed" for j in jobs)
|
|
print("All jobs completed." if all_done else "No pending jobs eligible for submission.")
|
|
return
|
|
|
|
n = target["job_num"]
|
|
req_path = job_dir / f"job{n}-input.jsonl"
|
|
print(f"Submitting job {n}/{status['total_jobs']} ({target['records_submitted']} comments) ...",
|
|
file=sys.stderr)
|
|
|
|
with open(req_path, "rb") as f:
|
|
uploaded = client.files.create(file=f, purpose="batch")
|
|
|
|
batch = client.batches.create(
|
|
input_file_id=uploaded.id,
|
|
endpoint="/v1/chat/completions",
|
|
completion_window="24h",
|
|
metadata={"run_id": target["run_id"], "job_dir": job_dir.name},
|
|
)
|
|
|
|
target["status"] = "submitted"
|
|
target["batch_id"] = batch.id
|
|
target["submitted_at"] = datetime.now(timezone.utc).isoformat()
|
|
save_status(status, job_dir)
|
|
|
|
print(f"Job {n} submitted: {batch.id} ({batch.status})")
|
|
print(f" python analysis/gpt4o/analysis_batch.py status")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Subcommand: status
|
|
|
|
|
|
def cmd_status(args, client) -> None:
|
|
job_dir = _resolve_job_dir(args)
|
|
status = load_status(job_dir)
|
|
jobs = status["jobs"]
|
|
|
|
job_filter = getattr(args, "job", None)
|
|
|
|
for job in jobs:
|
|
if job_filter is not None and job["job_num"] != job_filter:
|
|
continue
|
|
if not job["batch_id"]:
|
|
continue
|
|
if job["status"] in ("completed", "failed", "expired", "cancelled", "pending"):
|
|
continue
|
|
batch = client.batches.retrieve(job["batch_id"])
|
|
counts = batch.request_counts
|
|
if batch.status == "completed":
|
|
job["status"] = "completed"
|
|
if batch.completed_at:
|
|
job["completed_at"] = datetime.fromtimestamp(
|
|
batch.completed_at, tz=timezone.utc
|
|
).isoformat()
|
|
elif batch.status in ("failed", "expired", "cancelled"):
|
|
job["status"] = batch.status
|
|
else:
|
|
job["status"] = batch.status
|
|
job["records_completed"] = counts.completed
|
|
job["records_failed"] = counts.failed
|
|
|
|
save_status(status, job_dir)
|
|
|
|
target_jobs = jobs if not job_filter else [j for j in jobs if j["job_num"] == job_filter]
|
|
print(f"Dir: {job_dir.name} | Model: {status['model']} | {status['total_jobs']} job(s)")
|
|
print(f"{'Job':<5} {'Status':<14} {'Records':>12} {'Submitted':<20} {'Completed':<20}")
|
|
print("-" * 76)
|
|
for j in target_jobs:
|
|
rec = (f"{j['records_completed']}/{j['records_submitted']}"
|
|
if j["records_completed"] is not None else f"-/{j['records_submitted']}")
|
|
sub = (j["submitted_at"] or "-")[:19]
|
|
done = (j["completed_at"] or "-")[:19]
|
|
print(f"{j['job_num']:<5} {j['status']:<14} {rec:>12} {sub:<20} {done:<20}")
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# Subcommand: download
|
|
|
|
|
|
def cmd_download(args, client) -> None:
|
|
job_dir = _resolve_job_dir(args)
|
|
|
|
# Refresh status before deciding what to download
|
|
cmd_status(args, client)
|
|
status = load_status(job_dir)
|
|
jobs = status["jobs"]
|
|
|
|
job_filter = getattr(args, "job", None)
|
|
if job_filter:
|
|
candidates = [j for j in jobs if j["job_num"] == job_filter]
|
|
else:
|
|
candidates = [
|
|
j for j in jobs
|
|
if j["status"] == "completed"
|
|
and not (job_dir / f"job{j['job_num']}-output.jsonl").exists()
|
|
]
|
|
|
|
if not candidates:
|
|
print("No completed jobs pending download.", file=sys.stderr)
|
|
return
|
|
|
|
_, all_comments = load_items(job_dir / "forum.jsonl")
|
|
comment_lookup = {c["comment_id"]: c for c in all_comments}
|
|
|
|
for job in candidates:
|
|
n = job["job_num"]
|
|
|
|
if job["status"] != "completed":
|
|
print(f"Job {n} not yet completed ('{job['status']}'), skipping.", file=sys.stderr)
|
|
continue
|
|
|
|
batch = client.batches.retrieve(job["batch_id"])
|
|
|
|
if not batch.output_file_id:
|
|
print(f"Job {n}: no output file available from OpenAI.", file=sys.stderr)
|
|
continue
|
|
|
|
raw_text = client.files.content(batch.output_file_id).text
|
|
raw_path = job_dir / f"job{n}-output-raw.jsonl"
|
|
raw_path.write_text(raw_text, encoding="utf-8")
|
|
print(f"Job {n} raw → {raw_path.name}", file=sys.stderr)
|
|
|
|
if batch.error_file_id:
|
|
err_text = client.files.content(batch.error_file_id).text
|
|
err_path = job_dir / f"job{n}-errors.jsonl"
|
|
err_path.write_text(err_text, encoding="utf-8")
|
|
n_err_lines = sum(1 for line in err_text.splitlines() if line.strip())
|
|
print(f"Job {n} errors → {err_path.name} ({n_err_lines} lines)", file=sys.stderr)
|
|
|
|
completed_at = job.get("completed_at") or datetime.now(timezone.utc).isoformat()
|
|
norm_path = job_dir / f"job{n}-output.jsonl"
|
|
n_ok = n_err = 0
|
|
with open(norm_path, "w", encoding="utf-8") as out:
|
|
for line in raw_text.splitlines():
|
|
if not line.strip():
|
|
continue
|
|
record = normalize_output_line(
|
|
json.loads(line), comment_lookup,
|
|
job["run_id"], completed_at,
|
|
status["model"], status["prompt_hash"],
|
|
)
|
|
out.write(json.dumps(record, ensure_ascii=False) + "\n")
|
|
if record["error"]:
|
|
n_err += 1
|
|
else:
|
|
n_ok += 1
|
|
|
|
print(f"Job {n} normalized → {norm_path.name} ({n_ok} ok, {n_err} errors)", file=sys.stderr)
|
|
job["records_completed"] = n_ok
|
|
job["records_failed"] = n_err
|
|
|
|
save_status(status, job_dir)
|
|
|
|
|
|
# ---------------------------------------------------------------------------
|
|
# CLI
|
|
|
|
|
|
def _add_common_args(p: argparse.ArgumentParser) -> None:
|
|
p.add_argument("--job", type=int, default=None, metavar="N",
|
|
help="Job number within the run (default: auto)")
|
|
p.add_argument("--dir", default=None, metavar="DIR",
|
|
help="Job directory name or path (default: most recent)")
|
|
|
|
|
|
def main() -> None:
|
|
load_dotenv()
|
|
|
|
parser = argparse.ArgumentParser(
|
|
description="Batch analysis job runner.",
|
|
formatter_class=argparse.RawDescriptionHelpFormatter,
|
|
epilog=__doc__,
|
|
)
|
|
sub = parser.add_subparsers(dest="command", required=True)
|
|
|
|
p_create = sub.add_parser("create", help="Create job directory from tokenizer report")
|
|
p_create.add_argument("report", help="Path to report.json from tokenizer.py")
|
|
p_create.add_argument("--model", required=True, help="Model (e.g. gpt-4o-mini)")
|
|
|
|
p_submit = sub.add_parser("submit", help="Submit next eligible job")
|
|
_add_common_args(p_submit)
|
|
|
|
p_status = sub.add_parser("status", help="Check job status")
|
|
_add_common_args(p_status)
|
|
|
|
p_download = sub.add_parser("download", help="Download and normalize completed jobs")
|
|
_add_common_args(p_download)
|
|
|
|
args = parser.parse_args()
|
|
|
|
if args.command == "create":
|
|
cmd_create(args)
|
|
return
|
|
|
|
api_key = os.environ.get("OPENAI_API_KEY")
|
|
if not api_key:
|
|
sys.exit("OPENAI_API_KEY not set. Create a .env file or export the variable.")
|
|
client = openai.OpenAI(api_key=api_key)
|
|
|
|
if args.command == "submit":
|
|
cmd_submit(args, client)
|
|
elif args.command == "status":
|
|
cmd_status(args, client)
|
|
elif args.command == "download":
|
|
cmd_download(args, client)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|