Simplify browser session bootstrap
This commit is contained in:
@@ -1,47 +1,13 @@
|
|||||||
import configparser
|
import configparser
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
import shutil
|
import shutil
|
||||||
import sqlite3
|
import sqlite3
|
||||||
import tempfile
|
import tempfile
|
||||||
from dataclasses import dataclass
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import browser_cookie3
|
import browser_cookie3
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class StorageEntry:
|
|
||||||
origin: str
|
|
||||||
key: str
|
|
||||||
value: str
|
|
||||||
source: str
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class BrowserContext:
|
|
||||||
cookies: object
|
|
||||||
storage_entries: list[StorageEntry]
|
|
||||||
|
|
||||||
|
|
||||||
def load_browser_context(
|
|
||||||
browser,
|
|
||||||
domain_name,
|
|
||||||
storage_origins=None,
|
|
||||||
profile_dir=None,
|
|
||||||
):
|
|
||||||
if browser != "firefox":
|
|
||||||
raise ValueError(f"unsupported browser: {browser}")
|
|
||||||
|
|
||||||
profile = Path(profile_dir) if profile_dir else find_firefox_profile_dir()
|
|
||||||
cookies = load_firefox_cookies(domain_name, profile)
|
|
||||||
storage_entries = read_firefox_storage_entries(
|
|
||||||
profile,
|
|
||||||
origin_filters=storage_origins or [],
|
|
||||||
)
|
|
||||||
return BrowserContext(cookies=cookies, storage_entries=storage_entries)
|
|
||||||
|
|
||||||
|
|
||||||
def find_firefox_profile_dir():
|
def find_firefox_profile_dir():
|
||||||
profiles_ini = firefox_profiles_root() / "profiles.ini"
|
profiles_ini = firefox_profiles_root() / "profiles.ini"
|
||||||
parser = configparser.RawConfigParser()
|
parser = configparser.RawConfigParser()
|
||||||
@@ -88,106 +54,37 @@ def load_firefox_cookies(domain_name, profile_dir):
|
|||||||
return browser_cookie3.firefox(cookie_file=str(cookie_file), domain_name=domain_name)
|
return browser_cookie3.firefox(cookie_file=str(cookie_file), domain_name=domain_name)
|
||||||
|
|
||||||
|
|
||||||
def read_firefox_storage_entries(profile_dir, origin_filters):
|
def read_firefox_local_storage(profile_dir, origin_filter):
|
||||||
profile_dir = Path(profile_dir)
|
|
||||||
entries = []
|
|
||||||
entries.extend(read_firefox_ls_entries(profile_dir, origin_filters))
|
|
||||||
entries.extend(read_firefox_webapps_entries(profile_dir, origin_filters))
|
|
||||||
|
|
||||||
deduped = []
|
|
||||||
seen = set()
|
|
||||||
for entry in entries:
|
|
||||||
key = (entry.origin, entry.key, entry.value, entry.source)
|
|
||||||
if key in seen:
|
|
||||||
continue
|
|
||||||
seen.add(key)
|
|
||||||
deduped.append(entry)
|
|
||||||
return deduped
|
|
||||||
|
|
||||||
|
|
||||||
def storage_entries_for_origin(storage_entries, origin_filters):
|
|
||||||
return [
|
|
||||||
entry
|
|
||||||
for entry in storage_entries
|
|
||||||
if origin_matches(entry.origin, origin_filters)
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
def find_storage_value(storage_entries, origin_filters, key):
|
|
||||||
for entry in storage_entries_for_origin(storage_entries, origin_filters):
|
|
||||||
if entry.key == key:
|
|
||||||
return entry.value
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def find_json_storage_value(storage_entries, origin_filters, key, field):
|
|
||||||
raw_value = find_storage_value(storage_entries, origin_filters, key)
|
|
||||||
if not raw_value:
|
|
||||||
return ""
|
|
||||||
try:
|
|
||||||
payload = json.loads(raw_value)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
return ""
|
|
||||||
value = payload.get(field, "")
|
|
||||||
if value is None:
|
|
||||||
return ""
|
|
||||||
return str(value)
|
|
||||||
|
|
||||||
|
|
||||||
def list_storage_keys(storage_entries, origin_filters):
|
|
||||||
return sorted(
|
|
||||||
{
|
|
||||||
entry.key
|
|
||||||
for entry in storage_entries_for_origin(storage_entries, origin_filters)
|
|
||||||
if entry.key
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def read_firefox_ls_entries(profile_dir, origin_filters):
|
|
||||||
entries = []
|
|
||||||
storage_root = profile_dir / "storage" / "default"
|
storage_root = profile_dir / "storage" / "default"
|
||||||
if not storage_root.exists():
|
if not storage_root.exists():
|
||||||
return entries
|
return {}
|
||||||
|
|
||||||
for ls_path in storage_root.glob("*/ls/data.sqlite"):
|
for ls_path in storage_root.glob("*/ls/data.sqlite"):
|
||||||
origin = decode_firefox_origin(ls_path.parents[1].name)
|
origin = decode_firefox_origin(ls_path.parents[1].name)
|
||||||
if not origin_matches(origin, origin_filters):
|
if origin_filter.lower() not in origin.lower():
|
||||||
continue
|
continue
|
||||||
for row in query_sqlite(ls_path, "SELECT key, value FROM data"):
|
return {
|
||||||
entries.append(
|
stringify_sql_value(row[0]): stringify_sql_value(row[1])
|
||||||
StorageEntry(
|
for row in query_sqlite(ls_path, "SELECT key, value FROM data")
|
||||||
origin=origin,
|
}
|
||||||
key=stringify_sql_value(row[0]),
|
return {}
|
||||||
value=stringify_sql_value(row[1]),
|
|
||||||
source=ls_path.as_posix(),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return entries
|
|
||||||
|
|
||||||
|
|
||||||
def read_firefox_webapps_entries(profile_dir, origin_filters):
|
def read_firefox_webapps_store(profile_dir, origin_filter):
|
||||||
webapps_path = profile_dir / "webappsstore.sqlite"
|
webapps_path = profile_dir / "webappsstore.sqlite"
|
||||||
if not webapps_path.exists():
|
if not webapps_path.exists():
|
||||||
return []
|
return {}
|
||||||
|
|
||||||
entries = []
|
values = {}
|
||||||
for row in query_sqlite(
|
for row in query_sqlite(
|
||||||
webapps_path,
|
webapps_path,
|
||||||
"SELECT originKey, key, value FROM webappsstore2",
|
"SELECT originKey, key, value FROM webappsstore2",
|
||||||
):
|
):
|
||||||
origin = stringify_sql_value(row[0])
|
origin = stringify_sql_value(row[0])
|
||||||
if not origin_matches(origin, origin_filters):
|
if origin_filter.lower() not in origin.lower():
|
||||||
continue
|
continue
|
||||||
entries.append(
|
values[stringify_sql_value(row[1])] = stringify_sql_value(row[2])
|
||||||
StorageEntry(
|
return values
|
||||||
origin=origin,
|
|
||||||
key=stringify_sql_value(row[1]),
|
|
||||||
value=stringify_sql_value(row[2]),
|
|
||||||
source=webapps_path.as_posix(),
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return entries
|
|
||||||
|
|
||||||
def query_sqlite(path, query):
|
def query_sqlite(path, query):
|
||||||
copied_path = copy_sqlite_to_temp(path)
|
copied_path = copy_sqlite_to_temp(path)
|
||||||
@@ -210,7 +107,6 @@ def query_sqlite(path, query):
|
|||||||
|
|
||||||
|
|
||||||
def copy_sqlite_to_temp(path):
|
def copy_sqlite_to_temp(path):
|
||||||
import os, shutil, tempfile
|
|
||||||
fd, tmp = tempfile.mkstemp(suffix=".sqlite")
|
fd, tmp = tempfile.mkstemp(suffix=".sqlite")
|
||||||
os.close(fd)
|
os.close(fd)
|
||||||
shutil.copyfile(path, tmp)
|
shutil.copyfile(path, tmp)
|
||||||
@@ -220,14 +116,6 @@ def decode_firefox_origin(raw_origin):
|
|||||||
origin = raw_origin.split("^", 1)[0]
|
origin = raw_origin.split("^", 1)[0]
|
||||||
return origin.replace("+++", "://")
|
return origin.replace("+++", "://")
|
||||||
|
|
||||||
|
|
||||||
def origin_matches(origin, origin_filters):
|
|
||||||
if not origin_filters:
|
|
||||||
return True
|
|
||||||
normalized_origin = origin.lower()
|
|
||||||
return any(filter_value.lower() in normalized_origin for filter_value in origin_filters)
|
|
||||||
|
|
||||||
|
|
||||||
def stringify_sql_value(value):
|
def stringify_sql_value(value):
|
||||||
if value is None:
|
if value is None:
|
||||||
return ""
|
return ""
|
||||||
|
|||||||
@@ -1,116 +0,0 @@
|
|||||||
import os
|
|
||||||
from dataclasses import dataclass
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
|
|
||||||
from browser_session import (
|
|
||||||
find_json_storage_value,
|
|
||||||
find_storage_value,
|
|
||||||
list_storage_keys,
|
|
||||||
load_browser_context,
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
COSTCO_STORAGE_ORIGINS = ["costco.com"]
|
|
||||||
COSTCO_HEADER_FIELDS = [
|
|
||||||
("costco-x-authorization", "costco-x-authorization"),
|
|
||||||
("costco-x-wcs-clientId", "costco-x-wcs-clientId"),
|
|
||||||
("client-identifier", "client-identifier"),
|
|
||||||
]
|
|
||||||
COSTCO_JSON_HEADER_KEYS = ["headers", "costco.headers"]
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
|
||||||
class RetailerSession:
|
|
||||||
cookies: object
|
|
||||||
headers: dict[str, str]
|
|
||||||
|
|
||||||
|
|
||||||
def load_giant_session(browser="firefox", profile_dir=None):
|
|
||||||
context = load_browser_context(
|
|
||||||
browser=browser,
|
|
||||||
domain_name="giantfood.com",
|
|
||||||
storage_origins=["giantfood.com"],
|
|
||||||
profile_dir=profile_dir,
|
|
||||||
)
|
|
||||||
return RetailerSession(cookies=context.cookies, headers={})
|
|
||||||
|
|
||||||
def load_costco_session(browser="firefox", profile_dir=None):
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
headers = {
|
|
||||||
"costco-x-authorization": os.getenv("COSTCO_X_AUTHORIZATION", "").strip(),
|
|
||||||
"costco-x-wcs-clientId": os.getenv("COSTCO_WCS_CLIENT_ID", "").strip(),
|
|
||||||
"client-identifier": os.getenv("COSTCO_CLIENT_IDENTIFIER", "").strip(),
|
|
||||||
}
|
|
||||||
|
|
||||||
context = load_browser_context(
|
|
||||||
browser=browser,
|
|
||||||
domain_name=".costco.com",
|
|
||||||
storage_origins=["costco.com"],
|
|
||||||
profile_dir=profile_dir,
|
|
||||||
)
|
|
||||||
|
|
||||||
storage = {entry.key: entry.value for entry in context.storage_entries}
|
|
||||||
|
|
||||||
id_token = storage.get("idToken", "").strip()
|
|
||||||
client_id = storage.get("clientID", "").strip()
|
|
||||||
|
|
||||||
if id_token:
|
|
||||||
headers["costco-x-authorization"] = (
|
|
||||||
id_token if id_token.startswith("Bearer ") else f"Bearer {id_token}"
|
|
||||||
)
|
|
||||||
if client_id:
|
|
||||||
headers["costco-x-wcs-clientId"] = client_id
|
|
||||||
|
|
||||||
headers = {k: v for k, v in headers.items() if v}
|
|
||||||
|
|
||||||
return RetailerSession(cookies=context.cookies, headers=headers)
|
|
||||||
|
|
||||||
#def load_costco_session(browser="firefox", profile_dir=None):
|
|
||||||
# context = load_browser_context(
|
|
||||||
# browser=browser,
|
|
||||||
# domain_name=".costco.com",
|
|
||||||
# storage_origins=COSTCO_STORAGE_ORIGINS,
|
|
||||||
# profile_dir=profile_dir,
|
|
||||||
# )
|
|
||||||
# headers = extract_costco_headers(context.storage_entries)
|
|
||||||
# missing = [
|
|
||||||
# header_name for header_name, value in headers.items() if not value
|
|
||||||
# ]
|
|
||||||
# if missing:
|
|
||||||
# available_keys = ", ".join(
|
|
||||||
# list_storage_keys(context.storage_entries, COSTCO_STORAGE_ORIGINS)
|
|
||||||
# )
|
|
||||||
# raise ValueError(
|
|
||||||
# "missing Costco browser session headers: "
|
|
||||||
# f"{', '.join(missing)}. "
|
|
||||||
# f"Available Costco storage keys: {available_keys or '(none)'}"
|
|
||||||
# )
|
|
||||||
# return RetailerSession(cookies=context.cookies, headers=headers)
|
|
||||||
|
|
||||||
|
|
||||||
def extract_costco_headers(storage_entries):
|
|
||||||
headers = {}
|
|
||||||
for header_name, storage_key in COSTCO_HEADER_FIELDS:
|
|
||||||
value = find_storage_value(
|
|
||||||
storage_entries,
|
|
||||||
COSTCO_STORAGE_ORIGINS,
|
|
||||||
storage_key,
|
|
||||||
)
|
|
||||||
if not value:
|
|
||||||
value = find_costco_header_in_json(storage_entries, header_name)
|
|
||||||
headers[header_name] = value
|
|
||||||
return headers
|
|
||||||
|
|
||||||
|
|
||||||
def find_costco_header_in_json(storage_entries, header_name):
|
|
||||||
for json_key in COSTCO_JSON_HEADER_KEYS:
|
|
||||||
value = find_json_storage_value(
|
|
||||||
storage_entries,
|
|
||||||
COSTCO_STORAGE_ORIGINS,
|
|
||||||
json_key,
|
|
||||||
header_name,
|
|
||||||
)
|
|
||||||
if value:
|
|
||||||
return value
|
|
||||||
return ""
|
|
||||||
107
scrape_costco.py
107
scrape_costco.py
@@ -3,14 +3,19 @@ import csv
|
|||||||
import json
|
import json
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
|
from pathlib import Path
|
||||||
from calendar import monthrange
|
from calendar import monthrange
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from pathlib import Path
|
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import click
|
import click
|
||||||
from curl_cffi import requests
|
from curl_cffi import requests
|
||||||
|
|
||||||
from retailer_sessions import load_costco_session
|
from browser_session import (
|
||||||
|
find_firefox_profile_dir,
|
||||||
|
load_firefox_cookies,
|
||||||
|
read_firefox_local_storage,
|
||||||
|
read_firefox_webapps_store,
|
||||||
|
)
|
||||||
|
|
||||||
BASE_URL = "https://ecom-api.costco.com/ebusiness/order/v1/orders/graphql"
|
BASE_URL = "https://ecom-api.costco.com/ebusiness/order/v1/orders/graphql"
|
||||||
RETAILER = "costco"
|
RETAILER = "costco"
|
||||||
@@ -210,6 +215,18 @@ ITEM_FIELDS = [
|
|||||||
"is_coupon_line",
|
"is_coupon_line",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
COSTCO_STORAGE_ORIGIN = "costco.com"
|
||||||
|
COSTCO_AUTH_STORAGE_KEY = "costco-x-authorization"
|
||||||
|
COSTCO_HEADERS_BLOB_KEY = "headers"
|
||||||
|
|
||||||
|
def load_config():
|
||||||
|
load_dotenv()
|
||||||
|
return {
|
||||||
|
"client_id": os.getenv("COSTCO_X_WCS_CLIENTID", "").strip(),
|
||||||
|
"client_identifier": os.getenv("COSTCO_CLIENT_IDENTIFIER", "").strip(),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
def build_headers(auth_headers):
|
def build_headers(auth_headers):
|
||||||
headers = {
|
headers = {
|
||||||
"accept": "*/*",
|
"accept": "*/*",
|
||||||
@@ -226,11 +243,52 @@ def build_headers(auth_headers):
|
|||||||
headers.update(auth_headers)
|
headers.update(auth_headers)
|
||||||
return headers
|
return headers
|
||||||
|
|
||||||
def build_session(retailer_session):
|
|
||||||
|
def load_costco_browser_headers(profile_dir, client_id, client_identifier):
|
||||||
|
local_storage = read_firefox_local_storage(profile_dir, COSTCO_STORAGE_ORIGIN)
|
||||||
|
webapps_store = read_firefox_webapps_store(profile_dir, COSTCO_STORAGE_ORIGIN)
|
||||||
|
auth_token = (
|
||||||
|
local_storage.get(COSTCO_AUTH_STORAGE_KEY, "").strip()
|
||||||
|
or webapps_store.get(COSTCO_AUTH_STORAGE_KEY, "").strip()
|
||||||
|
)
|
||||||
|
|
||||||
|
if not auth_token:
|
||||||
|
header_blob = (
|
||||||
|
local_storage.get(COSTCO_HEADERS_BLOB_KEY, "").strip()
|
||||||
|
or webapps_store.get(COSTCO_HEADERS_BLOB_KEY, "").strip()
|
||||||
|
)
|
||||||
|
if header_blob:
|
||||||
|
try:
|
||||||
|
blob_data = json.loads(header_blob)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
blob_data = {}
|
||||||
|
auth_token = str(blob_data.get(COSTCO_AUTH_STORAGE_KEY, "")).strip()
|
||||||
|
client_id = client_id or str(blob_data.get("costco-x-wcs-clientId", "")).strip()
|
||||||
|
client_identifier = client_identifier or str(
|
||||||
|
blob_data.get("client-identifier", "")
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
if not auth_token:
|
||||||
|
raise click.ClickException(
|
||||||
|
"could not find Costco auth token in Firefox session storage"
|
||||||
|
)
|
||||||
|
if not client_id or not client_identifier:
|
||||||
|
raise click.ClickException(
|
||||||
|
"missing Costco client ids; set COSTCO_X_WCS_CLIENTID and COSTCO_CLIENT_IDENTIFIER"
|
||||||
|
)
|
||||||
|
|
||||||
|
return {
|
||||||
|
"costco-x-authorization": auth_token,
|
||||||
|
"costco-x-wcs-clientId": client_id,
|
||||||
|
"client-identifier": client_identifier,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_session(profile_dir, auth_headers):
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.cookies.update(retailer_session.cookies)
|
session.cookies.update(load_firefox_cookies(".costco.com", profile_dir))
|
||||||
session.headers.update(build_headers())
|
session.headers.update(build_headers())
|
||||||
session.headers.update(retailer_session.headers)
|
session.headers.update(auth_headers)
|
||||||
return session
|
return session
|
||||||
|
|
||||||
|
|
||||||
@@ -594,27 +652,24 @@ def main(
|
|||||||
):
|
):
|
||||||
outdir = Path(outdir)
|
outdir = Path(outdir)
|
||||||
raw_dir = outdir / "raw"
|
raw_dir = outdir / "raw"
|
||||||
if firefox_profile_dir is None:
|
config = load_config()
|
||||||
firefox_profile_dir = next(
|
|
||||||
(Path(os.getenv("APPDATA")) / "Mozilla" / "Firefox" / "Profiles").iterdir()
|
profile_dir = Path(firefox_profile_dir) if firefox_profile_dir else None
|
||||||
)
|
if profile_dir is None:
|
||||||
try:
|
try:
|
||||||
retailer_session = load_costco_session(
|
profile_dir = find_firefox_profile_dir()
|
||||||
browser="firefox",
|
except Exception:
|
||||||
profile_dir=firefox_profile_dir,
|
profile_dir = click.prompt(
|
||||||
)
|
"Firefox profile dir",
|
||||||
click.echo(
|
type=click.Path(exists=True, file_okay=False, path_type=Path),
|
||||||
"session bootstrap: "
|
)
|
||||||
f"cookies={bool(retailer_session.cookies)}, "
|
|
||||||
f"authorization={'costco-x-authorization' in retailer_session.headers}, "
|
auth_headers = load_costco_browser_headers(
|
||||||
f"client_id={'costco-x-wcs-clientId' in retailer_session.headers}, "
|
profile_dir,
|
||||||
f"client_identifier={'client-identifier' in retailer_session.headers}"
|
client_id=config["client_id"],
|
||||||
)
|
client_identifier=config["client_identifier"],
|
||||||
session = build_session(retailer_session)
|
)
|
||||||
except Exception as exc:
|
session = build_session(profile_dir, auth_headers)
|
||||||
raise click.ClickException(
|
|
||||||
f"failed to load Costco browser session: {exc}"
|
|
||||||
) from exc
|
|
||||||
|
|
||||||
start_date, end_date = resolve_date_range(months_back)
|
start_date, end_date = resolve_date_range(months_back)
|
||||||
|
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ import click
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
from curl_cffi import requests
|
from curl_cffi import requests
|
||||||
|
|
||||||
from retailer_sessions import load_giant_session
|
from browser_session import find_firefox_profile_dir, load_firefox_cookies
|
||||||
|
|
||||||
|
|
||||||
BASE = "https://giantfood.com"
|
BASE = "https://giantfood.com"
|
||||||
@@ -67,9 +67,9 @@ def load_config():
|
|||||||
|
|
||||||
|
|
||||||
def build_session():
|
def build_session():
|
||||||
browser_session = load_giant_session()
|
profile_dir = find_firefox_profile_dir()
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
session.cookies.update(browser_session.cookies)
|
session.cookies.update(load_firefox_cookies("giantfood.com", profile_dir))
|
||||||
session.headers.update(
|
session.headers.update(
|
||||||
{
|
{
|
||||||
"user-agent": (
|
"user-agent": (
|
||||||
|
|||||||
@@ -5,12 +5,11 @@ from pathlib import Path
|
|||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
import browser_session
|
import browser_session
|
||||||
import retailer_sessions
|
|
||||||
import scrape_costco
|
import scrape_costco
|
||||||
|
|
||||||
|
|
||||||
class BrowserSessionTests(unittest.TestCase):
|
class BrowserSessionTests(unittest.TestCase):
|
||||||
def test_read_firefox_ls_entries_reads_storage_from_copied_sqlite(self):
|
def test_read_firefox_local_storage_reads_copied_sqlite(self):
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
profile_dir = Path(tmpdir) / "abcd.default-release"
|
profile_dir = Path(tmpdir) / "abcd.default-release"
|
||||||
ls_dir = profile_dir / "storage" / "default" / "https+++www.costco.com" / "ls"
|
ls_dir = profile_dir / "storage" / "default" / "https+++www.costco.com" / "ls"
|
||||||
@@ -24,38 +23,35 @@ class BrowserSessionTests(unittest.TestCase):
|
|||||||
("costco-x-wcs-clientId", "4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf"),
|
("costco-x-wcs-clientId", "4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf"),
|
||||||
)
|
)
|
||||||
|
|
||||||
entries = browser_session.read_firefox_storage_entries(
|
values = browser_session.read_firefox_local_storage(
|
||||||
profile_dir,
|
profile_dir,
|
||||||
origin_filters=["costco.com"],
|
origin_filter="costco.com",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(1, len(entries))
|
self.assertEqual(
|
||||||
self.assertEqual("https://www.costco.com", entries[0].origin)
|
"4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
|
||||||
self.assertEqual("costco-x-wcs-clientId", entries[0].key)
|
values["costco-x-wcs-clientId"],
|
||||||
|
)
|
||||||
|
|
||||||
def test_extract_costco_headers_uses_exact_keys(self):
|
def test_load_costco_browser_headers_reads_exact_auth_key(self):
|
||||||
entries = [
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
browser_session.StorageEntry(
|
profile_dir = Path(tmpdir)
|
||||||
origin="https://www.costco.com",
|
storage_dir = profile_dir / "storage" / "default" / "https+++www.costco.com" / "ls"
|
||||||
key="costco-x-authorization",
|
storage_dir.mkdir(parents=True)
|
||||||
value="Bearer header.payload.signature",
|
db_path = storage_dir / "data.sqlite"
|
||||||
source="memory",
|
|
||||||
),
|
|
||||||
browser_session.StorageEntry(
|
|
||||||
origin="https://www.costco.com",
|
|
||||||
key="costco-x-wcs-clientId",
|
|
||||||
value="4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
|
|
||||||
source="memory",
|
|
||||||
),
|
|
||||||
browser_session.StorageEntry(
|
|
||||||
origin="https://www.costco.com",
|
|
||||||
key="client-identifier",
|
|
||||||
value="481b1aec-aa3b-454b-b81b-48187e28f205",
|
|
||||||
source="memory",
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
headers = retailer_sessions.extract_costco_headers(entries)
|
with sqlite3.connect(db_path) as connection:
|
||||||
|
connection.execute("CREATE TABLE data (key TEXT, value TEXT)")
|
||||||
|
connection.execute(
|
||||||
|
"INSERT INTO data (key, value) VALUES (?, ?)",
|
||||||
|
("costco-x-authorization", "Bearer header.payload.signature"),
|
||||||
|
)
|
||||||
|
|
||||||
|
headers = scrape_costco.load_costco_browser_headers(
|
||||||
|
profile_dir,
|
||||||
|
client_id="4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
|
||||||
|
client_identifier="481b1aec-aa3b-454b-b81b-48187e28f205",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual("Bearer header.payload.signature", headers["costco-x-authorization"])
|
self.assertEqual("Bearer header.payload.signature", headers["costco-x-authorization"])
|
||||||
self.assertEqual(
|
self.assertEqual(
|
||||||
@@ -67,42 +63,60 @@ class BrowserSessionTests(unittest.TestCase):
|
|||||||
headers["client-identifier"],
|
headers["client-identifier"],
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_extract_costco_headers_uses_exact_json_header_blob(self):
|
def test_load_costco_browser_headers_falls_back_to_exact_header_blob(self):
|
||||||
entries = [
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
browser_session.StorageEntry(
|
profile_dir = Path(tmpdir)
|
||||||
origin="https://www.costco.com",
|
storage_dir = profile_dir / "storage" / "default" / "https+++www.costco.com" / "ls"
|
||||||
key="headers",
|
storage_dir.mkdir(parents=True)
|
||||||
value=(
|
db_path = storage_dir / "data.sqlite"
|
||||||
'{"costco-x-authorization":"Bearer header.payload.signature",'
|
|
||||||
'"costco-x-wcs-clientId":"4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",'
|
|
||||||
'"client-identifier":"481b1aec-aa3b-454b-b81b-48187e28f205"}'
|
|
||||||
),
|
|
||||||
source="memory",
|
|
||||||
)
|
|
||||||
]
|
|
||||||
|
|
||||||
headers = retailer_sessions.extract_costco_headers(entries)
|
with sqlite3.connect(db_path) as connection:
|
||||||
|
connection.execute("CREATE TABLE data (key TEXT, value TEXT)")
|
||||||
|
connection.execute(
|
||||||
|
"INSERT INTO data (key, value) VALUES (?, ?)",
|
||||||
|
(
|
||||||
|
"headers",
|
||||||
|
'{"costco-x-authorization":"Bearer header.payload.signature"}',
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
|
headers = scrape_costco.load_costco_browser_headers(
|
||||||
|
profile_dir,
|
||||||
|
client_id="4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
|
||||||
|
client_identifier="481b1aec-aa3b-454b-b81b-48187e28f205",
|
||||||
|
)
|
||||||
|
|
||||||
self.assertEqual("Bearer header.payload.signature", headers["costco-x-authorization"])
|
self.assertEqual("Bearer header.payload.signature", headers["costco-x-authorization"])
|
||||||
self.assertEqual(
|
|
||||||
"4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
|
|
||||||
headers["costco-x-wcs-clientId"],
|
|
||||||
)
|
|
||||||
self.assertEqual(
|
|
||||||
"481b1aec-aa3b-454b-b81b-48187e28f205",
|
|
||||||
headers["client-identifier"],
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_scrape_costco_prompts_for_profile_dir_when_autodiscovery_fails(self):
|
def test_scrape_costco_prompts_for_profile_dir_when_autodiscovery_fails(self):
|
||||||
with mock.patch.object(
|
with mock.patch.object(
|
||||||
scrape_costco,
|
scrape_costco,
|
||||||
"build_session",
|
"find_firefox_profile_dir",
|
||||||
side_effect=[FileNotFoundError("no default profile"), object()],
|
side_effect=FileNotFoundError("no default profile"),
|
||||||
), mock.patch.object(
|
), mock.patch.object(
|
||||||
scrape_costco.click,
|
scrape_costco.click,
|
||||||
"prompt",
|
"prompt",
|
||||||
return_value=Path("/tmp/profile"),
|
return_value=Path("/tmp/profile"),
|
||||||
) as mocked_prompt, mock.patch.object(
|
) as mocked_prompt, mock.patch.object(
|
||||||
|
scrape_costco,
|
||||||
|
"load_config",
|
||||||
|
return_value={
|
||||||
|
"client_id": "4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
|
||||||
|
"client_identifier": "481b1aec-aa3b-454b-b81b-48187e28f205",
|
||||||
|
},
|
||||||
|
), mock.patch.object(
|
||||||
|
scrape_costco,
|
||||||
|
"load_costco_browser_headers",
|
||||||
|
return_value={
|
||||||
|
"costco-x-authorization": "Bearer header.payload.signature",
|
||||||
|
"costco-x-wcs-clientId": "4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
|
||||||
|
"client-identifier": "481b1aec-aa3b-454b-b81b-48187e28f205",
|
||||||
|
},
|
||||||
|
), mock.patch.object(
|
||||||
|
scrape_costco,
|
||||||
|
"build_session",
|
||||||
|
return_value=object(),
|
||||||
|
), mock.patch.object(
|
||||||
scrape_costco,
|
scrape_costco,
|
||||||
"fetch_summary_windows",
|
"fetch_summary_windows",
|
||||||
return_value=(
|
return_value=(
|
||||||
|
|||||||
@@ -411,6 +411,25 @@ class CostcoPipelineTests(unittest.TestCase):
|
|||||||
]
|
]
|
||||||
|
|
||||||
with mock.patch.object(
|
with mock.patch.object(
|
||||||
|
scrape_costco,
|
||||||
|
"load_config",
|
||||||
|
return_value={
|
||||||
|
"client_id": "4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
|
||||||
|
"client_identifier": "481b1aec-aa3b-454b-b81b-48187e28f205",
|
||||||
|
},
|
||||||
|
), mock.patch.object(
|
||||||
|
scrape_costco,
|
||||||
|
"find_firefox_profile_dir",
|
||||||
|
return_value=Path("/tmp/profile"),
|
||||||
|
), mock.patch.object(
|
||||||
|
scrape_costco,
|
||||||
|
"load_costco_browser_headers",
|
||||||
|
return_value={
|
||||||
|
"costco-x-authorization": "Bearer header.payload.signature",
|
||||||
|
"costco-x-wcs-clientId": "4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
|
||||||
|
"client-identifier": "481b1aec-aa3b-454b-b81b-48187e28f205",
|
||||||
|
},
|
||||||
|
), mock.patch.object(
|
||||||
scrape_costco, "build_session", return_value=object()
|
scrape_costco, "build_session", return_value=object()
|
||||||
), mock.patch.object(
|
), mock.patch.object(
|
||||||
scrape_costco,
|
scrape_costco,
|
||||||
|
|||||||
Reference in New Issue
Block a user