added token and dotenv so costco scrapes successfully 36 mo

This commit is contained in:
2026-03-16 13:46:22 -04:00
parent de0c276a24
commit a48a3c8396
2 changed files with 46 additions and 10 deletions

File diff suppressed because one or more lines are too long

View File

@@ -1,6 +1,9 @@
import os
import csv import csv
import json import json
import time import time
import re
from dotenv import load_dotenv
from calendar import monthrange from calendar import monthrange
from datetime import datetime, timedelta from datetime import datetime, timedelta
from pathlib import Path from pathlib import Path
@@ -208,8 +211,16 @@ ITEM_FIELDS = [
] ]
def build_headers(): def load_config():
load_dotenv()
return { return {
"authorization": os.getenv("COSTCO_X_AUTHORIZATION", "").strip(),
"client_id": os.getenv("COSTCO_X_WCS_CLIENTID", "").strip(),
"client_identifier": os.getenv("COSTCO_CLIENT_IDENTIFIER", "").strip(),
}
def build_headers(config):
headers = {
"accept": "*/*", "accept": "*/*",
"content-type": "application/json-patch+json", "content-type": "application/json-patch+json",
"costco.service": "restOrders", "costco.service": "restOrders",
@@ -221,12 +232,18 @@ def build_headers():
"Gecko/20100101 Firefox/148.0" "Gecko/20100101 Firefox/148.0"
), ),
} }
if config["authorization"]:
headers["costco-x-authorization"] = config["authorization"]
if config["client_id"]:
headers["costco-x-wcs-clientId"] = config["client_id"]
if config["client_identifier"]:
headers["client-identifier"] = config["client_identifier"]
return headers
def build_session(config):
def build_session():
session = requests.Session() session = requests.Session()
session.cookies.update(browser_cookie3.firefox(domain_name="costco.com")) session.cookies.update(browser_cookie3.firefox(domain_name=".costco.com"))
session.headers.update(build_headers()) session.headers.update(build_headers(config))
return session return session
@@ -254,6 +271,8 @@ def graphql_post(session, query, variables):
raise RuntimeError("failed to fetch Costco GraphQL payload") raise RuntimeError("failed to fetch Costco GraphQL payload")
def safe_filename(value):
return re.sub(r'[<>:"/\\|?*]+', "-", str(value))
def summary_receipts(payload): def summary_receipts(payload):
return payload.get("data", {}).get("receiptsWithCounts", {}).get("receipts", []) return payload.get("data", {}).get("receiptsWithCounts", {}).get("receipts", [])
@@ -423,7 +442,7 @@ def flatten_costco_data(summary_payload, detail_payloads, raw_dir):
for row in summary_row.get("couponArray", []) or [] for row in summary_row.get("couponArray", []) or []
if row.get("upcnumberCoupon") if row.get("upcnumberCoupon")
} }
raw_order_path = raw_dir / f"{receipt_id or order_id}.json" raw_order_path = raw_dir / f"{safe_filename(receipt_id or order_id)}.json"
orders.append( orders.append(
{ {
@@ -568,7 +587,7 @@ def write_csv(path, rows, fieldnames):
) )
@click.option( @click.option(
"--months-back", "--months-back",
default=3, default=36,
show_default=True, show_default=True,
type=int, type=int,
help="How many months of receipts to enumerate back from today.", help="How many months of receipts to enumerate back from today.",
@@ -577,7 +596,14 @@ def main(outdir, document_type, document_sub_type, window_days, months_back):
outdir = Path(outdir) outdir = Path(outdir)
raw_dir = outdir / "raw" raw_dir = outdir / "raw"
try: try:
session = build_session() config = load_config()
click.echo(
"auth headers present: "
f"authorization={bool(config['authorization'])}, "
f"client_id={bool(config['client_id'])}, "
f"client_identifier={bool(config['client_identifier'])}"
)
session = build_session(config)
except Exception as exc: except Exception as exc:
raise click.ClickException( raise click.ClickException(
f"failed to load Costco Firefox cookies: {exc}" f"failed to load Costco Firefox cookies: {exc}"
@@ -607,7 +633,7 @@ def main(outdir, document_type, document_sub_type, window_days, months_back):
{"barcode": barcode, "documentType": "warehouse"}, {"barcode": barcode, "documentType": "warehouse"},
) )
detail_payloads.append(detail_payload) detail_payloads.append(detail_payload)
write_json(raw_dir / f"{receipt_id}.json", detail_payload) write_json(raw_dir / f"{safe_filename(receipt_id)}.json", detail_payload)
orders, items = flatten_costco_data(summary_payload, detail_payloads, raw_dir) orders, items = flatten_costco_data(summary_payload, detail_payloads, raw_dir)
write_csv(outdir / "orders.csv", orders, ORDER_FIELDS) write_csv(outdir / "orders.csv", orders, ORDER_FIELDS)
@@ -617,3 +643,4 @@ def main(outdir, document_type, document_sub_type, window_days, months_back):
if __name__ == "__main__": if __name__ == "__main__":
main() main()