Simplify browser session bootstrap
This commit is contained in:
107
scrape_costco.py
107
scrape_costco.py
@@ -3,14 +3,19 @@ import csv
|
||||
import json
|
||||
import time
|
||||
import re
|
||||
from pathlib import Path
|
||||
from calendar import monthrange
|
||||
from datetime import datetime, timedelta
|
||||
from pathlib import Path
|
||||
from dotenv import load_dotenv
|
||||
import click
|
||||
from curl_cffi import requests
|
||||
|
||||
from retailer_sessions import load_costco_session
|
||||
from browser_session import (
|
||||
find_firefox_profile_dir,
|
||||
load_firefox_cookies,
|
||||
read_firefox_local_storage,
|
||||
read_firefox_webapps_store,
|
||||
)
|
||||
|
||||
BASE_URL = "https://ecom-api.costco.com/ebusiness/order/v1/orders/graphql"
|
||||
RETAILER = "costco"
|
||||
@@ -210,6 +215,18 @@ ITEM_FIELDS = [
|
||||
"is_coupon_line",
|
||||
]
|
||||
|
||||
COSTCO_STORAGE_ORIGIN = "costco.com"
|
||||
COSTCO_AUTH_STORAGE_KEY = "costco-x-authorization"
|
||||
COSTCO_HEADERS_BLOB_KEY = "headers"
|
||||
|
||||
def load_config():
|
||||
load_dotenv()
|
||||
return {
|
||||
"client_id": os.getenv("COSTCO_X_WCS_CLIENTID", "").strip(),
|
||||
"client_identifier": os.getenv("COSTCO_CLIENT_IDENTIFIER", "").strip(),
|
||||
}
|
||||
|
||||
|
||||
def build_headers(auth_headers):
|
||||
headers = {
|
||||
"accept": "*/*",
|
||||
@@ -226,11 +243,52 @@ def build_headers(auth_headers):
|
||||
headers.update(auth_headers)
|
||||
return headers
|
||||
|
||||
def build_session(retailer_session):
|
||||
|
||||
def load_costco_browser_headers(profile_dir, client_id, client_identifier):
|
||||
local_storage = read_firefox_local_storage(profile_dir, COSTCO_STORAGE_ORIGIN)
|
||||
webapps_store = read_firefox_webapps_store(profile_dir, COSTCO_STORAGE_ORIGIN)
|
||||
auth_token = (
|
||||
local_storage.get(COSTCO_AUTH_STORAGE_KEY, "").strip()
|
||||
or webapps_store.get(COSTCO_AUTH_STORAGE_KEY, "").strip()
|
||||
)
|
||||
|
||||
if not auth_token:
|
||||
header_blob = (
|
||||
local_storage.get(COSTCO_HEADERS_BLOB_KEY, "").strip()
|
||||
or webapps_store.get(COSTCO_HEADERS_BLOB_KEY, "").strip()
|
||||
)
|
||||
if header_blob:
|
||||
try:
|
||||
blob_data = json.loads(header_blob)
|
||||
except json.JSONDecodeError:
|
||||
blob_data = {}
|
||||
auth_token = str(blob_data.get(COSTCO_AUTH_STORAGE_KEY, "")).strip()
|
||||
client_id = client_id or str(blob_data.get("costco-x-wcs-clientId", "")).strip()
|
||||
client_identifier = client_identifier or str(
|
||||
blob_data.get("client-identifier", "")
|
||||
).strip()
|
||||
|
||||
if not auth_token:
|
||||
raise click.ClickException(
|
||||
"could not find Costco auth token in Firefox session storage"
|
||||
)
|
||||
if not client_id or not client_identifier:
|
||||
raise click.ClickException(
|
||||
"missing Costco client ids; set COSTCO_X_WCS_CLIENTID and COSTCO_CLIENT_IDENTIFIER"
|
||||
)
|
||||
|
||||
return {
|
||||
"costco-x-authorization": auth_token,
|
||||
"costco-x-wcs-clientId": client_id,
|
||||
"client-identifier": client_identifier,
|
||||
}
|
||||
|
||||
|
||||
def build_session(profile_dir, auth_headers):
|
||||
session = requests.Session()
|
||||
session.cookies.update(retailer_session.cookies)
|
||||
session.cookies.update(load_firefox_cookies(".costco.com", profile_dir))
|
||||
session.headers.update(build_headers())
|
||||
session.headers.update(retailer_session.headers)
|
||||
session.headers.update(auth_headers)
|
||||
return session
|
||||
|
||||
|
||||
@@ -594,27 +652,24 @@ def main(
|
||||
):
|
||||
outdir = Path(outdir)
|
||||
raw_dir = outdir / "raw"
|
||||
if firefox_profile_dir is None:
|
||||
firefox_profile_dir = next(
|
||||
(Path(os.getenv("APPDATA")) / "Mozilla" / "Firefox" / "Profiles").iterdir()
|
||||
)
|
||||
try:
|
||||
retailer_session = load_costco_session(
|
||||
browser="firefox",
|
||||
profile_dir=firefox_profile_dir,
|
||||
)
|
||||
click.echo(
|
||||
"session bootstrap: "
|
||||
f"cookies={bool(retailer_session.cookies)}, "
|
||||
f"authorization={'costco-x-authorization' in retailer_session.headers}, "
|
||||
f"client_id={'costco-x-wcs-clientId' in retailer_session.headers}, "
|
||||
f"client_identifier={'client-identifier' in retailer_session.headers}"
|
||||
)
|
||||
session = build_session(retailer_session)
|
||||
except Exception as exc:
|
||||
raise click.ClickException(
|
||||
f"failed to load Costco browser session: {exc}"
|
||||
) from exc
|
||||
config = load_config()
|
||||
|
||||
profile_dir = Path(firefox_profile_dir) if firefox_profile_dir else None
|
||||
if profile_dir is None:
|
||||
try:
|
||||
profile_dir = find_firefox_profile_dir()
|
||||
except Exception:
|
||||
profile_dir = click.prompt(
|
||||
"Firefox profile dir",
|
||||
type=click.Path(exists=True, file_okay=False, path_type=Path),
|
||||
)
|
||||
|
||||
auth_headers = load_costco_browser_headers(
|
||||
profile_dir,
|
||||
client_id=config["client_id"],
|
||||
client_identifier=config["client_identifier"],
|
||||
)
|
||||
session = build_session(profile_dir, auth_headers)
|
||||
|
||||
start_date, end_date = resolve_date_range(months_back)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user