added token and dotenv so costco scrapes successfully 36 mo
This commit is contained in:
@@ -1,6 +1,9 @@
|
||||
import os
|
||||
import csv
|
||||
import json
|
||||
import time
|
||||
import re
|
||||
from dotenv import load_dotenv
|
||||
from calendar import monthrange
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
@@ -208,8 +211,16 @@ ITEM_FIELDS = [
|
||||
]
|
||||
|
||||
|
||||
def build_headers():
|
||||
def load_config():
|
||||
load_dotenv()
|
||||
return {
|
||||
"authorization": os.getenv("COSTCO_X_AUTHORIZATION", "").strip(),
|
||||
"client_id": os.getenv("COSTCO_X_WCS_CLIENTID", "").strip(),
|
||||
"client_identifier": os.getenv("COSTCO_CLIENT_IDENTIFIER", "").strip(),
|
||||
}
|
||||
|
||||
def build_headers(config):
|
||||
headers = {
|
||||
"accept": "*/*",
|
||||
"content-type": "application/json-patch+json",
|
||||
"costco.service": "restOrders",
|
||||
@@ -221,12 +232,18 @@ def build_headers():
|
||||
"Gecko/20100101 Firefox/148.0"
|
||||
),
|
||||
}
|
||||
if config["authorization"]:
|
||||
headers["costco-x-authorization"] = config["authorization"]
|
||||
if config["client_id"]:
|
||||
headers["costco-x-wcs-clientId"] = config["client_id"]
|
||||
if config["client_identifier"]:
|
||||
headers["client-identifier"] = config["client_identifier"]
|
||||
return headers
|
||||
|
||||
|
||||
def build_session():
|
||||
def build_session(config):
|
||||
session = requests.Session()
|
||||
session.cookies.update(browser_cookie3.firefox(domain_name="costco.com"))
|
||||
session.headers.update(build_headers())
|
||||
session.cookies.update(browser_cookie3.firefox(domain_name=".costco.com"))
|
||||
session.headers.update(build_headers(config))
|
||||
return session
|
||||
|
||||
|
||||
@@ -254,6 +271,8 @@ def graphql_post(session, query, variables):
|
||||
|
||||
raise RuntimeError("failed to fetch Costco GraphQL payload")
|
||||
|
||||
def safe_filename(value):
|
||||
return re.sub(r'[<>:"/\\|?*]+', "-", str(value))
|
||||
|
||||
def summary_receipts(payload):
|
||||
return payload.get("data", {}).get("receiptsWithCounts", {}).get("receipts", [])
|
||||
@@ -423,7 +442,7 @@ def flatten_costco_data(summary_payload, detail_payloads, raw_dir):
|
||||
for row in summary_row.get("couponArray", []) or []
|
||||
if row.get("upcnumberCoupon")
|
||||
}
|
||||
raw_order_path = raw_dir / f"{receipt_id or order_id}.json"
|
||||
raw_order_path = raw_dir / f"{safe_filename(receipt_id or order_id)}.json"
|
||||
|
||||
orders.append(
|
||||
{
|
||||
@@ -568,16 +587,23 @@ def write_csv(path, rows, fieldnames):
|
||||
)
|
||||
@click.option(
|
||||
"--months-back",
|
||||
default=3,
|
||||
default=36,
|
||||
show_default=True,
|
||||
type=int,
|
||||
help="How many months of receipts to enumerate back from today.",
|
||||
)
|
||||
def main(outdir, document_type, document_sub_type, window_days, months_back):
|
||||
outdir = Path(outdir)
|
||||
raw_dir = outdir / "raw"
|
||||
raw_dir = outdir / "raw"
|
||||
try:
|
||||
session = build_session()
|
||||
config = load_config()
|
||||
click.echo(
|
||||
"auth headers present: "
|
||||
f"authorization={bool(config['authorization'])}, "
|
||||
f"client_id={bool(config['client_id'])}, "
|
||||
f"client_identifier={bool(config['client_identifier'])}"
|
||||
)
|
||||
session = build_session(config)
|
||||
except Exception as exc:
|
||||
raise click.ClickException(
|
||||
f"failed to load Costco Firefox cookies: {exc}"
|
||||
@@ -607,7 +633,7 @@ def main(outdir, document_type, document_sub_type, window_days, months_back):
|
||||
{"barcode": barcode, "documentType": "warehouse"},
|
||||
)
|
||||
detail_payloads.append(detail_payload)
|
||||
write_json(raw_dir / f"{receipt_id}.json", detail_payload)
|
||||
write_json(raw_dir / f"{safe_filename(receipt_id)}.json", detail_payload)
|
||||
|
||||
orders, items = flatten_costco_data(summary_payload, detail_payloads, raw_dir)
|
||||
write_csv(outdir / "orders.csv", orders, ORDER_FIELDS)
|
||||
@@ -617,3 +643,4 @@ def main(outdir, document_type, document_sub_type, window_days, months_back):
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user