added token and dotenv so costco scrapes successfully 36 mo

This commit is contained in:
2026-03-16 13:46:22 -04:00
parent de0c276a24
commit a48a3c8396
2 changed files with 46 additions and 10 deletions

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,9 @@
import os
import csv
import json
import time
import re
from dotenv import load_dotenv
from calendar import monthrange
from datetime import datetime, timedelta
from pathlib import Path
@@ -208,8 +211,16 @@ ITEM_FIELDS = [
]
def build_headers():
def load_config():
load_dotenv()
return {
"authorization": os.getenv("COSTCO_X_AUTHORIZATION", "").strip(),
"client_id": os.getenv("COSTCO_X_WCS_CLIENTID", "").strip(),
"client_identifier": os.getenv("COSTCO_CLIENT_IDENTIFIER", "").strip(),
}
def build_headers(config):
headers = {
"accept": "*/*",
"content-type": "application/json-patch+json",
"costco.service": "restOrders",
@@ -221,12 +232,18 @@ def build_headers():
"Gecko/20100101 Firefox/148.0"
),
}
if config["authorization"]:
headers["costco-x-authorization"] = config["authorization"]
if config["client_id"]:
headers["costco-x-wcs-clientId"] = config["client_id"]
if config["client_identifier"]:
headers["client-identifier"] = config["client_identifier"]
return headers
def build_session():
def build_session(config):
session = requests.Session()
session.cookies.update(browser_cookie3.firefox(domain_name="costco.com"))
session.headers.update(build_headers())
session.cookies.update(browser_cookie3.firefox(domain_name=".costco.com"))
session.headers.update(build_headers(config))
return session
@@ -254,6 +271,8 @@ def graphql_post(session, query, variables):
raise RuntimeError("failed to fetch Costco GraphQL payload")
def safe_filename(value):
return re.sub(r'[<>:"/\\|?*]+', "-", str(value))
def summary_receipts(payload):
return payload.get("data", {}).get("receiptsWithCounts", {}).get("receipts", [])
@@ -423,7 +442,7 @@ def flatten_costco_data(summary_payload, detail_payloads, raw_dir):
for row in summary_row.get("couponArray", []) or []
if row.get("upcnumberCoupon")
}
raw_order_path = raw_dir / f"{receipt_id or order_id}.json"
raw_order_path = raw_dir / f"{safe_filename(receipt_id or order_id)}.json"
orders.append(
{
@@ -568,16 +587,23 @@ def write_csv(path, rows, fieldnames):
)
@click.option(
"--months-back",
default=3,
default=36,
show_default=True,
type=int,
help="How many months of receipts to enumerate back from today.",
)
def main(outdir, document_type, document_sub_type, window_days, months_back):
outdir = Path(outdir)
raw_dir = outdir / "raw"
raw_dir = outdir / "raw"
try:
session = build_session()
config = load_config()
click.echo(
"auth headers present: "
f"authorization={bool(config['authorization'])}, "
f"client_id={bool(config['client_id'])}, "
f"client_identifier={bool(config['client_identifier'])}"
)
session = build_session(config)
except Exception as exc:
raise click.ClickException(
f"failed to load Costco Firefox cookies: {exc}"
@@ -607,7 +633,7 @@ def main(outdir, document_type, document_sub_type, window_days, months_back):
{"barcode": barcode, "documentType": "warehouse"},
)
detail_payloads.append(detail_payload)
write_json(raw_dir / f"{receipt_id}.json", detail_payload)
write_json(raw_dir / f"{safe_filename(receipt_id)}.json", detail_payload)
orders, items = flatten_costco_data(summary_payload, detail_payloads, raw_dir)
write_csv(outdir / "orders.csv", orders, ORDER_FIELDS)
@@ -617,3 +643,4 @@ def main(outdir, document_type, document_sub_type, window_days, months_back):
if __name__ == "__main__":
main()