add usajobs.py cli with full api, filter, display, and export pipeline

milestones 1-6 complete: fetch/cache from data.usajobs.gov, local filters for pay plan/grade/salary/location, rich table output, questionary selection prompt, and org-mode export. key field mappings resolved from live api inspection (JobGrade[0].Code for pay plan, UserArea.Details for grades and clearance, city-part location matching due to api returning full state names). Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
2026-05-18 15:17:24 -04:00
parent 8344025a51
commit 3405023e64
5 changed files with 901 additions and 0 deletions
--- a/usajobs.py
+++ b/usajobs.py
@@ -0,0 +1,542 @@
+#!/usr/bin/env python3
+import hashlib
+import json
+import os
+import re
+import sys
+from datetime import datetime
+from pathlib import Path
+
+import click
+import questionary
+import requests
+from dotenv import load_dotenv
+from questionary import Choice
+from rich.console import Console
+from rich.table import Table
+
+load_dotenv()
+
+console = Console()
+API_URL = "https://data.usajobs.gov/api/search"
+
+
+# ---------------------------------------------------------------------------
+# credentials
+# ---------------------------------------------------------------------------
+
+def get_credentials() -> tuple[str, str]:
+    email = os.environ.get("USAJOBS_EMAIL")
+    key = os.environ.get("USAJOBS_KEY")
+    missing = [v for v, val in [("USAJOBS_EMAIL", email), ("USAJOBS_KEY", key)] if not val]
+    if missing:
+        click.echo(f"Error: missing environment variable(s): {', '.join(missing)}", err=True)
+        click.echo("Add them to your .env file or export them before running.", err=True)
+        sys.exit(1)
+    return email, key
+
+
+# ---------------------------------------------------------------------------
+# api layer
+# ---------------------------------------------------------------------------
+
+def build_params(
+    location: str | None,
+    radius: int | None,
+    series: tuple[str, ...],
+    clearance: tuple[str, ...],
+    pay_plans: tuple[str, ...],
+) -> dict:
+    # NOTE: JobCategoryCode and SecurityClearances param names are best guesses
+    # pending verification against a live response — update after first real call.
+    params: dict = {
+        "Fields": "Full",
+        "ResultsPerPage": 500,
+        "SortField": "OpenDate",
+        "SortDirection": "Desc",
+    }
+    if location:
+        params["LocationName"] = location
+    if radius is not None:
+        params["Radius"] = radius
+    if series:
+        params["JobCategoryCode"] = ";".join(series)
+    if clearance:
+        params["SecurityClearances"] = ";".join(str(c) for c in clearance)
+    if pay_plans:
+        params["PayPlanCode"] = ";".join(p.upper() for p in pay_plans)
+    return params
+
+
+def _cache_path(cache_dir: Path, params: dict, page: int) -> Path:
+    key_src = str(sorted(params.items())) + f"|p{page}"
+    digest = hashlib.sha256(key_src.encode()).hexdigest()[:16]
+    return cache_dir / f"{digest}_p{page}.json"
+
+
+def fetch_page(
+    params: dict,
+    page: int,
+    credentials: tuple[str, str],
+    cache_dir: Path,
+    offline: bool,
+) -> dict:
+    cache_dir.mkdir(parents=True, exist_ok=True)
+    path = _cache_path(cache_dir, params, page)
+
+    if path.exists():
+        return json.loads(path.read_text(encoding="utf-8"))
+
+    if offline:
+        raise click.ClickException(f"Offline mode: no cache found for page {page} ({path.name})")
+
+    email, key = credentials
+    resp = requests.get(
+        API_URL,
+        params={**params, "Page": page},
+        headers={
+            "Host": "data.usajobs.gov",
+            "User-Agent": email,
+            "Authorization-Key": key,
+        },
+        timeout=30,
+    )
+    resp.raise_for_status()
+    data = resp.json()
+    path.write_text(json.dumps(data, indent=2), encoding="utf-8")
+    return data
+
+
+def fetch_all(
+    params: dict,
+    limit: int,
+    credentials: tuple[str, str],
+    cache_dir: Path,
+    offline: bool,
+    debug: bool,
+) -> list[dict]:
+    collected: list[dict] = []
+    page = 1
+    while len(collected) < limit:
+        data = fetch_page(params, page, credentials, cache_dir, offline)
+        result = data.get("SearchResult", {})
+        items = result.get("SearchResultItems", [])
+        if not items:
+            break
+        collected.extend(items)
+        total_available = int(result.get("SearchResultCountAll", 0))
+        if debug:
+            click.echo(
+                f"[debug] page {page}: got {len(items)}, running total {len(collected)}, "
+                f"api reports {total_available} total"
+            )
+        if len(collected) >= total_available:
+            break
+        page += 1
+    if debug:
+        click.echo(f"[debug] fetch complete: {len(collected)} raw jobs")
+    return collected[:limit]
+
+
+# ---------------------------------------------------------------------------
+# normalization
+# ---------------------------------------------------------------------------
+
+def _strip_html(text: str) -> str:
+    return re.sub(r"<[^>]+>", "", text or "").strip()
+
+
+def _to_int(val) -> int | None:
+    try:
+        result = int(float(val))
+        return result if result else None
+    except (TypeError, ValueError):
+        return None
+
+
+def normalize_job(raw: dict) -> dict:
+    mod = raw.get("MatchedObjectDescriptor", raw)
+    details = mod.get("UserArea", {}).get("Details", {})
+
+    # pay plan — lives in JobGrade[0].Code (e.g. "GS", "GG")
+    job_grade = (mod.get("JobGrade") or [{}])[0]
+    pay_plan: str | None = job_grade.get("Code") or None
+    if pay_plan:
+        pay_plan = pay_plan.upper()
+
+    # grades
+    low_grade = _to_int(details.get("LowGrade") or mod.get("JobGradeLow"))
+    high_grade = _to_int(details.get("HighGrade") or mod.get("JobGradeHigh"))
+
+    # salary
+    salary_min = salary_max = None
+    remuneration = mod.get("PositionRemuneration") or []
+    if remuneration:
+        r = remuneration[0]
+        salary_min = _to_int(r.get("MinimumRange"))
+        salary_max = _to_int(r.get("MaximumRange"))
+
+    # location — join all location names if multiple
+    locations = mod.get("PositionLocation") or []
+    if locations:
+        location = locations[0].get("LocationName", "")
+    else:
+        location = ""
+
+    # url
+    apply_uris = mod.get("ApplyURI") or []
+    url = apply_uris[0] if apply_uris else mod.get("PositionURI", "")
+
+    # clearance — shape TBD; store raw text for now
+    clearance_raw = details.get("SecurityClearance") or details.get("Clearances") or ""
+    if isinstance(clearance_raw, list):
+        clearance_raw = "; ".join(str(x) for x in clearance_raw)
+
+    # close date — trim to YYYY-MM-DD
+    close_date = (mod.get("ApplicationCloseDate") or "")[:10]
+
+    # raw posting text
+    section_keys = [
+        ("Summary", ["JobSummary"]),
+        ("Duties", ["MajorDuties", "Duties"]),
+        ("Requirements", ["Requirements"]),
+        ("Qualifications", ["Qualifications"]),
+        ("Evaluations", ["Evaluations"]),
+        ("Other Information", ["OtherInformation", "OtherInfo"]),
+        ("Key Requirements", ["KeyRequirements"]),
+    ]
+    parts: list[str] = []
+    for heading, keys in section_keys:
+        for k in keys:
+            content = details.get(k)
+            if content:
+                if isinstance(content, list):
+                    content = "\n".join(str(x) for x in content)
+                parts.append(f"{heading}\n{_strip_html(content)}")
+                break
+
+    return {
+        "document_id": raw.get("MatchedObjectId") or mod.get("MatchedObjectId", ""),
+        "title": mod.get("PositionTitle", ""),
+        "agency": mod.get("OrganizationName", ""),
+        "department": mod.get("DepartmentName", ""),
+        "pay_plan": pay_plan,
+        "low_grade": low_grade,
+        "high_grade": high_grade,
+        "salary_min": salary_min,
+        "salary_max": salary_max,
+        "location": location,
+        "close_date": close_date,
+        "travel": details.get("TravelPercentage") or details.get("Travel") or "",
+        "clearance": clearance_raw,
+        "clearance_text_match": clearance_raw,
+        "url": url,
+        "raw_posting_text": "\n\n".join(parts),
+    }
+
+
+# ---------------------------------------------------------------------------
+# filtering
+# ---------------------------------------------------------------------------
+
+def passes_filters(
+    job: dict,
+    pay_plans: tuple[str, ...],
+    grade_min: int | None,
+    grade_max: int | None,
+    salary_min_k: int | None,
+    location: str | None,
+) -> bool:
+    if pay_plans and job["pay_plan"] is not None:
+        if job["pay_plan"].upper() not in {p.upper() for p in pay_plans}:
+            return False
+
+    if grade_min is not None and job["low_grade"] is not None:
+        if job["low_grade"] < grade_min:
+            return False
+
+    if grade_max is not None and job["high_grade"] is not None:
+        if job["high_grade"] > grade_max:
+            return False
+
+    if salary_min_k is not None:
+        threshold = salary_min_k * 1000
+        if job["salary_max"] is not None:
+            if job["salary_max"] < threshold:
+                return False
+        elif job["salary_min"] is not None:
+            if job["salary_min"] < threshold:
+                return False
+
+    if location and job["location"]:
+        # match on the city part only ("Washington, DC" → "washington")
+        # because the API returns full names like "Washington, District of Columbia"
+        city = location.split(",")[0].strip().lower()
+        if city not in job["location"].lower():
+            return False
+
+    return True
+
+
+# ---------------------------------------------------------------------------
+# display
+# ---------------------------------------------------------------------------
+
+def _fmt_salary(sal_min: int | None, sal_max: int | None) -> str:
+    if sal_min is None:
+        return "n/a"
+    lo = f"${sal_min // 1000}k"
+    if sal_max:
+        return f"{lo}-${sal_max // 1000}k"
+    return lo
+
+
+def _fmt_grade(pay_plan: str | None, low: int | None, high: int | None) -> str:
+    pp = (pay_plan or "").upper()
+    if low is None:
+        return pp or "n/a"
+    if high is not None and high != low:
+        return f"{pp}-{low}/{high}"
+    return f"{pp}-{low}"
+
+
+def _trunc(s: str, n: int) -> str:
+    s = s or ""
+    return s if len(s) <= n else s[: n - 3] + "..."
+
+
+def render_table(jobs: list[dict]) -> None:
+    if not jobs:
+        console.print("[yellow]No jobs matched your filters.[/yellow]")
+        return
+
+    table = Table(show_header=True, header_style="bold cyan", box=None, pad_edge=False)
+    table.add_column("#", style="dim", width=4)
+    table.add_column("Title", min_width=28)
+    table.add_column("Agency", min_width=16)
+    table.add_column("Grade", width=9)
+    table.add_column("Salary", width=14)
+    table.add_column("Location", min_width=16)
+    table.add_column("Closes", width=11)
+    table.add_column("Clearance", min_width=12)
+    table.add_column("URL")
+
+    for idx, job in enumerate(jobs, start=1):
+        table.add_row(
+            str(idx),
+            _trunc(job["title"], 50),
+            _trunc(job["agency"], 22),
+            _fmt_grade(job["pay_plan"], job["low_grade"], job["high_grade"]),
+            _fmt_salary(job["salary_min"], job["salary_max"]),
+            _trunc(job["location"], 20),
+            job["close_date"] or "",
+            _trunc(job["clearance"] or "", 16),
+            job["url"] or "",
+        )
+
+    console.print(table)
+
+
+def compact_job_label(job: dict, idx: int) -> str:
+    grade = _fmt_grade(job["pay_plan"], job["low_grade"], job["high_grade"])
+    salary = _fmt_salary(job["salary_min"], job["salary_max"])
+    return (
+        f"[{idx:>3}] {_trunc(job['agency'], 20):<20} | "
+        f"{grade:<8} | {salary:<14} | "
+        f"{_trunc(job['location'], 18):<18} | "
+        f"{_trunc(job['title'], 55)}"
+    )
+
+
+# ---------------------------------------------------------------------------
+# selection
+# ---------------------------------------------------------------------------
+
+def choose_jobs(jobs: list[dict], select_all: bool = False) -> list[dict]:
+    by_id = {job["document_id"]: job for job in jobs}
+    choices = [
+        Choice(
+            title=compact_job_label(job, idx),
+            value=job["document_id"],
+            checked=select_all,
+        )
+        for idx, job in enumerate(jobs, start=1)
+    ]
+    selected_ids = questionary.checkbox(
+        "mark jobs to export",
+        choices=choices,
+        instruction="space=mark/unmark, enter=export, ctrl-c=cancel",
+        use_jk_keys=True,
+        use_emacs_keys=True,
+    ).ask()
+    if not selected_ids:
+        return []
+    return [by_id[job_id] for job_id in selected_ids]
+
+
+# ---------------------------------------------------------------------------
+# export
+# ---------------------------------------------------------------------------
+
+def _shorten_title(title: str) -> str:
+    def _lower_long_caps(m: re.Match) -> str:
+        words = m.group(0).split()
+        return " ".join(w.capitalize() for w in words) if len(words) >= 3 else m.group(0)
+    shortened = re.sub(r"(?:[A-Z]{2,}\s+){2,}[A-Z]{2,}", _lower_long_caps, title)
+    return shortened[:80].strip()
+
+
+def _location_slug(location: str) -> str:
+    s = re.sub(r"[^\w\s-]", "", location.lower())
+    return re.sub(r"\s+", "-", s.strip()) or "unknown"
+
+
+def _filters_slug(
+    series: tuple,
+    pay_plans: tuple,
+    grade_min: int | None,
+    grade_max: int | None,
+    salary_min_k: int | None,
+) -> str:
+    parts: list[str] = []
+    if series:
+        parts.append("-".join(series))
+    if pay_plans:
+        pp = "".join(p.lower() for p in pay_plans)
+        lo, hi = grade_min, grade_max
+        if lo is not None or hi is not None:
+            suffix = str(lo or "") if lo == hi else f"{lo or ''}-{hi or ''}"
+            parts.append(f"{pp}{suffix}")
+        else:
+            parts.append(pp)
+    if salary_min_k:
+        parts.append(f"salary{salary_min_k}")
+    return "_".join(parts) or "all"
+
+
+def make_output_path(
+    out: str | None,
+    out_dir: str,
+    location: str | None,
+    series: tuple,
+    pay_plans: tuple,
+    grade_min: int | None,
+    grade_max: int | None,
+    salary_min_k: int | None,
+) -> Path:
+    if out:
+        return Path(out)
+    exports = Path(out_dir)
+    exports.mkdir(parents=True, exist_ok=True)
+    loc_slug = _location_slug(location or "")
+    filt_slug = _filters_slug(series, pay_plans, grade_min, grade_max, salary_min_k)
+    ts = datetime.now().strftime("%Y%m%d-%H%M")
+    return exports / f"usajobs_{loc_slug}_{filt_slug}_{ts}.org"
+
+
+def export_org(jobs: list[dict], path: Path) -> None:
+    lines: list[str] = []
+    for job in jobs:
+        title = _shorten_title(job["title"])
+        url = job["url"] or ""
+        grade = _fmt_grade(job["pay_plan"], job["low_grade"], job["high_grade"])
+        salary = _fmt_salary(job["salary_min"], job["salary_max"])
+
+        lines += [
+            f"** {title} [[{url}][link]]",
+            ":properties:",
+            f":agency: {job['agency'] or 'unknown'}",
+            f":grade: {grade}",
+            f":close_date: {job['close_date'] or 'unknown'}",
+            ":end:",
+            "",
+            f"salary: {salary}",
+            f"location: {job['location'] or 'unknown'}",
+            f"travel: {job['travel'] or 'unknown'}",
+            f"clearance: {job['clearance'] or 'unknown'}",
+            "",
+            "*** posting",
+            job["raw_posting_text"] or "",
+            "",
+        ]
+    path.write_text("\n".join(lines), encoding="utf-8")
+
+
+# ---------------------------------------------------------------------------
+# cli
+# ---------------------------------------------------------------------------
+
+@click.group()
+def cli() -> None:
+    pass
+
+
+@cli.command()
+@click.option("--location", default=None, help="Location name (e.g. 'Washington, DC')")
+@click.option("--radius", default=None, type=int, help="Search radius in miles")
+@click.option("--series", multiple=True, help="Occupational series code, repeatable")
+@click.option("--clearance", multiple=True, help="Clearance level code, repeatable")
+@click.option("--pay-plan", "pay_plans", multiple=True, default=("GS", "GG"), show_default=True)
+@click.option("--grade-min", default=None, type=int, help="Min grade (local filter)")
+@click.option("--grade-max", default=None, type=int, help="Max grade (local filter)")
+@click.option("--salary-min", "salary_min_k", default=None, type=int,
+              help="Min salary in thousands, e.g. 150 = $150,000 (local filter)")
+@click.option("--limit", default=100, show_default=True, help="Max jobs to fetch")
+@click.option("--out-dir", default="exports", show_default=True)
+@click.option("--out", default=None, help="Explicit output path (overrides --out-dir)")
+@click.option("--cache-dir", default=".cache/usajobs", show_default=True)
+@click.option("--interactive/--no-interactive", default=True, show_default=True)
+@click.option("--select-all", is_flag=True, help="Preselect all jobs in picker")
+@click.option("--dry-run", is_flag=True, help="Show export list without writing")
+@click.option("--offline", is_flag=True, help="Read from cache only, no network")
+@click.option("--debug", is_flag=True, help="Print params and filter counts")
+def search(
+    location, radius, series, clearance, pay_plans,
+    grade_min, grade_max, salary_min_k,
+    limit, out_dir, out, cache_dir,
+    interactive, select_all, dry_run, offline, debug,
+) -> None:
+    credentials = get_credentials()
+    params = build_params(location, radius, series, clearance, pay_plans)
+
+    if debug:
+        click.echo(f"[debug] api params: {json.dumps(params, indent=2)}")
+
+    raw_jobs = fetch_all(params, limit, credentials, Path(cache_dir), offline, debug)
+    jobs = [normalize_job(r) for r in raw_jobs]
+
+    if debug:
+        click.echo(f"[debug] before local filter: {len(jobs)}")
+
+    jobs = [j for j in jobs if passes_filters(j, pay_plans, grade_min, grade_max, salary_min_k, location)]
+
+    if debug:
+        click.echo(f"[debug] after local filter: {len(jobs)}")
+
+    render_table(jobs)
+
+    if not jobs:
+        return
+
+    if not interactive:
+        selected = jobs
+    else:
+        selected = choose_jobs(jobs, select_all=select_all)
+
+    if not selected:
+        click.echo("Nothing selected. Exiting without writing.")
+        return
+
+    if dry_run:
+        click.echo(f"[dry-run] would export {len(selected)} job(s):")
+        for j in selected:
+            click.echo(f"  {_trunc(j['title'], 70)} — {j['agency']}")
+        return
+
+    path = make_output_path(out, out_dir, location, series, pay_plans, grade_min, grade_max, salary_min_k)
+    export_org(selected, path)
+    click.echo(f"Exported {len(selected)} job(s) -> {path}")
+
+
+if __name__ == "__main__":
+    cli()