Simplify Costco browser header extraction

This commit is contained in:
ben
2026-03-16 16:23:38 -04:00
parent 5a331c9af4
commit 1b4c7dde25
5 changed files with 199 additions and 141 deletions

View File

@@ -2,9 +2,11 @@ import sqlite3
import tempfile
import unittest
from pathlib import Path
from unittest import mock
import browser_session
import retailer_sessions
import scrape_costco
class BrowserSessionTests(unittest.TestCase):
@@ -15,17 +17,13 @@ class BrowserSessionTests(unittest.TestCase):
ls_dir.mkdir(parents=True)
db_path = ls_dir / "data.sqlite"
connection = sqlite3.connect(db_path)
try:
with sqlite3.connect(db_path) as connection:
connection.execute("CREATE TABLE data (key TEXT, value TEXT)")
connection.execute(
"INSERT INTO data (key, value) VALUES (?, ?)",
("session", '{"costco":{"clientIdentifier":"481b1aec-aa3b-454b-b81b-48187e28f205"}}'),
("costco-x-wcs-clientId", "4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf"),
)
connection.commit()
finally:
connection.close()
entries = browser_session.read_firefox_storage_entries(
profile_dir,
origin_filters=["costco.com"],
@@ -33,17 +31,51 @@ class BrowserSessionTests(unittest.TestCase):
self.assertEqual(1, len(entries))
self.assertEqual("https://www.costco.com", entries[0].origin)
self.assertEqual("session", entries[0].key)
self.assertEqual("costco-x-wcs-clientId", entries[0].key)
def test_extract_costco_headers_from_storage_json(self):
def test_extract_costco_headers_uses_exact_keys(self):
entries = [
browser_session.StorageEntry(
origin="https://www.costco.com",
key="authState",
key="costco-x-authorization",
value="Bearer header.payload.signature",
source="memory",
),
browser_session.StorageEntry(
origin="https://www.costco.com",
key="costco-x-wcs-clientId",
value="4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
source="memory",
),
browser_session.StorageEntry(
origin="https://www.costco.com",
key="client-identifier",
value="481b1aec-aa3b-454b-b81b-48187e28f205",
source="memory",
),
]
headers = retailer_sessions.extract_costco_headers(entries)
self.assertEqual("Bearer header.payload.signature", headers["costco-x-authorization"])
self.assertEqual(
"4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
headers["costco-x-wcs-clientId"],
)
self.assertEqual(
"481b1aec-aa3b-454b-b81b-48187e28f205",
headers["client-identifier"],
)
def test_extract_costco_headers_uses_exact_json_header_blob(self):
entries = [
browser_session.StorageEntry(
origin="https://www.costco.com",
key="headers",
value=(
'{"authorization":"Bearer header.payload.signature",'
'"wcsClientId":"4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",'
'"clientIdentifier":"481b1aec-aa3b-454b-b81b-48187e28f205"}'
'{"costco-x-authorization":"Bearer header.payload.signature",'
'"costco-x-wcs-clientId":"4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",'
'"client-identifier":"481b1aec-aa3b-454b-b81b-48187e28f205"}'
),
source="memory",
)
@@ -61,6 +93,40 @@ class BrowserSessionTests(unittest.TestCase):
headers["client-identifier"],
)
def test_scrape_costco_prompts_for_profile_dir_when_autodiscovery_fails(self):
with mock.patch.object(
scrape_costco,
"build_session",
side_effect=[FileNotFoundError("no default profile"), object()],
), mock.patch.object(
scrape_costco.click,
"prompt",
return_value=Path("/tmp/profile"),
) as mocked_prompt, mock.patch.object(
scrape_costco,
"fetch_summary_windows",
return_value=(
{"data": {"receiptsWithCounts": {"receipts": []}}},
[],
),
), mock.patch.object(
scrape_costco,
"write_json",
), mock.patch.object(
scrape_costco,
"write_csv",
):
scrape_costco.main.callback(
outdir="/tmp/costco_output",
document_type="all",
document_sub_type="all",
window_days=92,
months_back=3,
firefox_profile_dir=None,
)
mocked_prompt.assert_called_once()
if __name__ == "__main__":
unittest.main()

View File

@@ -427,6 +427,7 @@ class CostcoPipelineTests(unittest.TestCase):
document_sub_type="all",
window_days=92,
months_back=3,
firefox_profile_dir=None,
)
metadata_path = outdir / "raw" / "summary_requests.json"