#!/usr/bin/env python3 import hashlib import json import os import re import sys from datetime import datetime from pathlib import Path import click import questionary import requests from dotenv import load_dotenv from questionary import Choice from rich.console import Console from rich.table import Table load_dotenv() console = Console() API_URL = "https://data.usajobs.gov/api/search" # --------------------------------------------------------------------------- # credentials # --------------------------------------------------------------------------- def get_credentials() -> tuple[str, str]: email = os.environ.get("USAJOBS_EMAIL") key = os.environ.get("USAJOBS_KEY") missing = [v for v, val in [("USAJOBS_EMAIL", email), ("USAJOBS_KEY", key)] if not val] if missing: click.echo(f"Error: missing environment variable(s): {', '.join(missing)}", err=True) click.echo("Add them to your .env file or export them before running.", err=True) sys.exit(1) return email, key # --------------------------------------------------------------------------- # api layer # --------------------------------------------------------------------------- def build_params( location: str | None, radius: int | None, series: tuple[str, ...], clearance: tuple[str, ...], pay_plans: tuple[str, ...], ) -> dict: # NOTE: JobCategoryCode and SecurityClearances param names are best guesses # pending verification against a live response — update after first real call. params: dict = { "Fields": "Full", "ResultsPerPage": 500, "SortField": "OpenDate", "SortDirection": "Desc", } if location: params["LocationName"] = location if radius is not None: params["Radius"] = radius if series: params["JobCategoryCode"] = ";".join(series) if clearance: params["SecurityClearances"] = ";".join(str(c) for c in clearance) if pay_plans: params["PayPlanCode"] = ";".join(p.upper() for p in pay_plans) return params def _cache_path(cache_dir: Path, params: dict, page: int) -> Path: key_src = str(sorted(params.items())) + f"|p{page}" digest = hashlib.sha256(key_src.encode()).hexdigest()[:16] return cache_dir / f"{digest}_p{page}.json" def fetch_page( params: dict, page: int, credentials: tuple[str, str], cache_dir: Path, offline: bool, ) -> dict: cache_dir.mkdir(parents=True, exist_ok=True) path = _cache_path(cache_dir, params, page) if path.exists(): return json.loads(path.read_text(encoding="utf-8")) if offline: raise click.ClickException(f"Offline mode: no cache found for page {page} ({path.name})") email, key = credentials resp = requests.get( API_URL, params={**params, "Page": page}, headers={ "Host": "data.usajobs.gov", "User-Agent": email, "Authorization-Key": key, }, timeout=30, ) resp.raise_for_status() data = resp.json() path.write_text(json.dumps(data, indent=2), encoding="utf-8") return data def fetch_all( params: dict, limit: int, credentials: tuple[str, str], cache_dir: Path, offline: bool, debug: bool, ) -> list[dict]: collected: list[dict] = [] page = 1 while len(collected) < limit: data = fetch_page(params, page, credentials, cache_dir, offline) result = data.get("SearchResult", {}) items = result.get("SearchResultItems", []) if not items: break collected.extend(items) total_available = int(result.get("SearchResultCountAll", 0)) if debug: click.echo( f"[debug] page {page}: got {len(items)}, running total {len(collected)}, " f"api reports {total_available} total" ) if len(collected) >= total_available: break page += 1 if debug: click.echo(f"[debug] fetch complete: {len(collected)} raw jobs") return collected[:limit] # --------------------------------------------------------------------------- # normalization # --------------------------------------------------------------------------- def _strip_html(text: str) -> str: return re.sub(r"<[^>]+>", "", text or "").strip() def _to_int(val) -> int | None: try: result = int(float(val)) return result if result else None except (TypeError, ValueError): return None def normalize_job(raw: dict) -> dict: mod = raw.get("MatchedObjectDescriptor", raw) details = mod.get("UserArea", {}).get("Details", {}) # pay plan — lives in JobGrade[0].Code (e.g. "GS", "GG") job_grade = (mod.get("JobGrade") or [{}])[0] pay_plan: str | None = job_grade.get("Code") or None if pay_plan: pay_plan = pay_plan.upper() # grades low_grade = _to_int(details.get("LowGrade") or mod.get("JobGradeLow")) high_grade = _to_int(details.get("HighGrade") or mod.get("JobGradeHigh")) # salary salary_min = salary_max = None remuneration = mod.get("PositionRemuneration") or [] if remuneration: r = remuneration[0] salary_min = _to_int(r.get("MinimumRange")) salary_max = _to_int(r.get("MaximumRange")) # location — join all location names if multiple locations = mod.get("PositionLocation") or [] if locations: location = locations[0].get("LocationName", "") else: location = "" # url apply_uris = mod.get("ApplyURI") or [] url = apply_uris[0] if apply_uris else mod.get("PositionURI", "") # clearance — shape TBD; store raw text for now clearance_raw = details.get("SecurityClearance") or details.get("Clearances") or "" if isinstance(clearance_raw, list): clearance_raw = "; ".join(str(x) for x in clearance_raw) # close date — trim to YYYY-MM-DD close_date = (mod.get("ApplicationCloseDate") or "")[:10] # raw posting text section_keys = [ ("Summary", ["JobSummary"]), ("Duties", ["MajorDuties", "Duties"]), ("Requirements", ["Requirements"]), ("Qualifications", ["Qualifications"]), ("Evaluations", ["Evaluations"]), ("Other Information", ["OtherInformation", "OtherInfo"]), ("Key Requirements", ["KeyRequirements"]), ] parts: list[str] = [] for heading, keys in section_keys: for k in keys: content = details.get(k) if content: if isinstance(content, list): content = "\n".join(str(x) for x in content) parts.append(f"{heading}\n{_strip_html(content)}") break return { "document_id": raw.get("MatchedObjectId") or mod.get("MatchedObjectId", ""), "title": mod.get("PositionTitle", ""), "agency": mod.get("OrganizationName", ""), "department": mod.get("DepartmentName", ""), "pay_plan": pay_plan, "low_grade": low_grade, "high_grade": high_grade, "salary_min": salary_min, "salary_max": salary_max, "location": location, "close_date": close_date, "travel": details.get("TravelPercentage") or details.get("Travel") or "", "clearance": clearance_raw, "clearance_text_match": clearance_raw, "url": url, "raw_posting_text": "\n\n".join(parts), } # --------------------------------------------------------------------------- # filtering # --------------------------------------------------------------------------- def passes_filters( job: dict, pay_plans: tuple[str, ...], grade_min: int | None, grade_max: int | None, salary_min_k: int | None, location: str | None, ) -> bool: if pay_plans and job["pay_plan"] is not None: if job["pay_plan"].upper() not in {p.upper() for p in pay_plans}: return False if grade_min is not None and job["low_grade"] is not None: if job["low_grade"] < grade_min: return False if grade_max is not None and job["high_grade"] is not None: if job["high_grade"] > grade_max: return False if salary_min_k is not None: threshold = salary_min_k * 1000 if job["salary_max"] is not None: if job["salary_max"] < threshold: return False elif job["salary_min"] is not None: if job["salary_min"] < threshold: return False if location and job["location"]: # match on the city part only ("Washington, DC" → "washington") # because the API returns full names like "Washington, District of Columbia" city = location.split(",")[0].strip().lower() if city not in job["location"].lower(): return False return True # --------------------------------------------------------------------------- # display # --------------------------------------------------------------------------- def _fmt_salary(sal_min: int | None, sal_max: int | None) -> str: if sal_min is None: return "n/a" lo = f"${sal_min // 1000}k" if sal_max: return f"{lo}-${sal_max // 1000}k" return lo def _fmt_grade(pay_plan: str | None, low: int | None, high: int | None) -> str: pp = (pay_plan or "").upper() if low is None: return pp or "n/a" if high is not None and high != low: return f"{pp}-{low}/{high}" return f"{pp}-{low}" def _trunc(s: str, n: int) -> str: s = s or "" return s if len(s) <= n else s[: n - 3] + "..." def render_table(jobs: list[dict]) -> None: if not jobs: console.print("[yellow]No jobs matched your filters.[/yellow]") return table = Table(show_header=True, header_style="bold cyan", box=None, pad_edge=False) table.add_column("#", style="dim", width=4) table.add_column("Title", min_width=28) table.add_column("Agency", min_width=16) table.add_column("Grade", width=9) table.add_column("Salary", width=14) table.add_column("Location", min_width=16) table.add_column("Closes", width=11) table.add_column("Clearance", min_width=12) table.add_column("URL") for idx, job in enumerate(jobs, start=1): table.add_row( str(idx), _trunc(job["title"], 50), _trunc(job["agency"], 22), _fmt_grade(job["pay_plan"], job["low_grade"], job["high_grade"]), _fmt_salary(job["salary_min"], job["salary_max"]), _trunc(job["location"], 20), job["close_date"] or "", _trunc(job["clearance"] or "", 16), job["url"] or "", ) console.print(table) def compact_job_label(job: dict, idx: int) -> str: grade = _fmt_grade(job["pay_plan"], job["low_grade"], job["high_grade"]) salary = _fmt_salary(job["salary_min"], job["salary_max"]) return ( f"[{idx:>3}] {_trunc(job['agency'], 20):<20} | " f"{grade:<8} | {salary:<14} | " f"{_trunc(job['location'], 18):<18} | " f"{_trunc(job['title'], 55)}" ) # --------------------------------------------------------------------------- # selection # --------------------------------------------------------------------------- def choose_jobs(jobs: list[dict], select_all: bool = False) -> list[dict]: by_id = {job["document_id"]: job for job in jobs} choices = [ Choice( title=compact_job_label(job, idx), value=job["document_id"], checked=select_all, ) for idx, job in enumerate(jobs, start=1) ] selected_ids = questionary.checkbox( "mark jobs to export", choices=choices, instruction="space=mark/unmark, enter=export, ctrl-c=cancel", use_jk_keys=True, use_emacs_keys=True, ).ask() if not selected_ids: return [] return [by_id[job_id] for job_id in selected_ids] # --------------------------------------------------------------------------- # export # --------------------------------------------------------------------------- def _shorten_title(title: str) -> str: def _lower_long_caps(m: re.Match) -> str: words = m.group(0).split() return " ".join(w.capitalize() for w in words) if len(words) >= 3 else m.group(0) shortened = re.sub(r"(?:[A-Z]{2,}\s+){2,}[A-Z]{2,}", _lower_long_caps, title) return shortened[:80].strip() def _location_slug(location: str) -> str: s = re.sub(r"[^\w\s-]", "", location.lower()) return re.sub(r"\s+", "-", s.strip()) or "unknown" def _filters_slug( series: tuple, pay_plans: tuple, grade_min: int | None, grade_max: int | None, salary_min_k: int | None, ) -> str: parts: list[str] = [] if series: parts.append("-".join(series)) if pay_plans: pp = "".join(p.lower() for p in pay_plans) lo, hi = grade_min, grade_max if lo is not None or hi is not None: suffix = str(lo or "") if lo == hi else f"{lo or ''}-{hi or ''}" parts.append(f"{pp}{suffix}") else: parts.append(pp) if salary_min_k: parts.append(f"salary{salary_min_k}") return "_".join(parts) or "all" def make_output_path( out: str | None, out_dir: str, location: str | None, series: tuple, pay_plans: tuple, grade_min: int | None, grade_max: int | None, salary_min_k: int | None, ) -> Path: if out: return Path(out) exports = Path(out_dir) exports.mkdir(parents=True, exist_ok=True) loc_slug = _location_slug(location or "") filt_slug = _filters_slug(series, pay_plans, grade_min, grade_max, salary_min_k) ts = datetime.now().strftime("%Y%m%d-%H%M") return exports / f"usajobs_{loc_slug}_{filt_slug}_{ts}.org" def export_org(jobs: list[dict], path: Path) -> None: lines: list[str] = [] for job in jobs: title = _shorten_title(job["title"]) url = job["url"] or "" grade = _fmt_grade(job["pay_plan"], job["low_grade"], job["high_grade"]) salary = _fmt_salary(job["salary_min"], job["salary_max"]) lines += [ f"** {title} [[{url}][link]]", ":properties:", f":agency: {job['agency'] or 'unknown'}", f":grade: {grade}", f":close_date: {job['close_date'] or 'unknown'}", ":end:", "", f"salary: {salary}", f"location: {job['location'] or 'unknown'}", f"travel: {job['travel'] or 'unknown'}", f"clearance: {job['clearance'] or 'unknown'}", "", "*** posting", job["raw_posting_text"] or "", "", ] path.write_text("\n".join(lines), encoding="utf-8") # --------------------------------------------------------------------------- # cli # --------------------------------------------------------------------------- @click.group() def cli() -> None: pass @cli.command() @click.option("--location", default=None, help="Location name (e.g. 'Washington, DC')") @click.option("--radius", default=None, type=int, help="Search radius in miles") @click.option("--series", multiple=True, help="Occupational series code, repeatable") @click.option("--clearance", multiple=True, help="Clearance level code, repeatable") @click.option("--pay-plan", "pay_plans", multiple=True, default=("GS", "GG"), show_default=True) @click.option("--grade-min", default=None, type=int, help="Min grade (local filter)") @click.option("--grade-max", default=None, type=int, help="Max grade (local filter)") @click.option("--salary-min", "salary_min_k", default=None, type=int, help="Min salary in thousands, e.g. 150 = $150,000 (local filter)") @click.option("--limit", default=100, show_default=True, help="Max jobs to fetch") @click.option("--out-dir", default="exports", show_default=True) @click.option("--out", default=None, help="Explicit output path (overrides --out-dir)") @click.option("--cache-dir", default=".cache/usajobs", show_default=True) @click.option("--interactive/--no-interactive", default=True, show_default=True) @click.option("--select-all", is_flag=True, help="Preselect all jobs in picker") @click.option("--dry-run", is_flag=True, help="Show export list without writing") @click.option("--offline", is_flag=True, help="Read from cache only, no network") @click.option("--debug", is_flag=True, help="Print params and filter counts") def search( location, radius, series, clearance, pay_plans, grade_min, grade_max, salary_min_k, limit, out_dir, out, cache_dir, interactive, select_all, dry_run, offline, debug, ) -> None: credentials = get_credentials() params = build_params(location, radius, series, clearance, pay_plans) if debug: click.echo(f"[debug] api params: {json.dumps(params, indent=2)}") raw_jobs = fetch_all(params, limit, credentials, Path(cache_dir), offline, debug) jobs = [normalize_job(r) for r in raw_jobs] if debug: click.echo(f"[debug] before local filter: {len(jobs)}") jobs = [j for j in jobs if passes_filters(j, pay_plans, grade_min, grade_max, salary_min_k, location)] if debug: click.echo(f"[debug] after local filter: {len(jobs)}") render_table(jobs) if not jobs: return if not interactive: selected = jobs else: selected = choose_jobs(jobs, select_all=select_all) if not selected: click.echo("Nothing selected. Exiting without writing.") return if dry_run: click.echo(f"[dry-run] would export {len(selected)} job(s):") for j in selected: click.echo(f" {_trunc(j['title'], 70)} — {j['agency']}") return path = make_output_path(out, out_dir, location, series, pay_plans, grade_min, grade_max, salary_min_k) export_org(selected, path) click.echo(f"Exported {len(selected)} job(s) -> {path}") if __name__ == "__main__": cli()