Files
scrape-giant/retailer_sessions.py

117 lines
3.4 KiB
Python

import os
from dataclasses import dataclass
from dotenv import load_dotenv
from browser_session import (
find_json_storage_value,
find_storage_value,
list_storage_keys,
load_browser_context,
)
COSTCO_STORAGE_ORIGINS = ["costco.com"]
COSTCO_HEADER_FIELDS = [
("costco-x-authorization", "costco-x-authorization"),
("costco-x-wcs-clientId", "costco-x-wcs-clientId"),
("client-identifier", "client-identifier"),
]
COSTCO_JSON_HEADER_KEYS = ["headers", "costco.headers"]
@dataclass
class RetailerSession:
cookies: object
headers: dict[str, str]
def load_giant_session(browser="firefox", profile_dir=None):
context = load_browser_context(
browser=browser,
domain_name="giantfood.com",
storage_origins=["giantfood.com"],
profile_dir=profile_dir,
)
return RetailerSession(cookies=context.cookies, headers={})
def load_costco_session(browser="firefox", profile_dir=None):
load_dotenv()
headers = {
"costco-x-authorization": os.getenv("COSTCO_X_AUTHORIZATION", "").strip(),
"costco-x-wcs-clientId": os.getenv("COSTCO_WCS_CLIENT_ID", "").strip(),
"client-identifier": os.getenv("COSTCO_CLIENT_IDENTIFIER", "").strip(),
}
context = load_browser_context(
browser=browser,
domain_name=".costco.com",
storage_origins=["costco.com"],
profile_dir=profile_dir,
)
storage = {entry.key: entry.value for entry in context.storage_entries}
id_token = storage.get("idToken", "").strip()
client_id = storage.get("clientID", "").strip()
if id_token:
headers["costco-x-authorization"] = (
id_token if id_token.startswith("Bearer ") else f"Bearer {id_token}"
)
if client_id:
headers["costco-x-wcs-clientId"] = client_id
headers = {k: v for k, v in headers.items() if v}
return RetailerSession(cookies=context.cookies, headers=headers)
#def load_costco_session(browser="firefox", profile_dir=None):
# context = load_browser_context(
# browser=browser,
# domain_name=".costco.com",
# storage_origins=COSTCO_STORAGE_ORIGINS,
# profile_dir=profile_dir,
# )
# headers = extract_costco_headers(context.storage_entries)
# missing = [
# header_name for header_name, value in headers.items() if not value
# ]
# if missing:
# available_keys = ", ".join(
# list_storage_keys(context.storage_entries, COSTCO_STORAGE_ORIGINS)
# )
# raise ValueError(
# "missing Costco browser session headers: "
# f"{', '.join(missing)}. "
# f"Available Costco storage keys: {available_keys or '(none)'}"
# )
# return RetailerSession(cookies=context.cookies, headers=headers)
def extract_costco_headers(storage_entries):
headers = {}
for header_name, storage_key in COSTCO_HEADER_FIELDS:
value = find_storage_value(
storage_entries,
COSTCO_STORAGE_ORIGINS,
storage_key,
)
if not value:
value = find_costco_header_in_json(storage_entries, header_name)
headers[header_name] = value
return headers
def find_costco_header_in_json(storage_entries, header_name):
for json_key in COSTCO_JSON_HEADER_KEYS:
value = find_json_storage_value(
storage_entries,
COSTCO_STORAGE_ORIGINS,
json_key,
header_name,
)
if value:
return value
return ""