Harden giant receipt fetch CLI

This commit is contained in:
ben
2026-03-14 18:32:32 -04:00
parent 585d8c1e49
commit d57b9cf52f
8 changed files with 456 additions and 470 deletions

View File

@@ -1,28 +1,17 @@
import requests
import browser_cookie3
import unittest
BASE = "https://giantfood.com"
ACCOUNT_PAGE = f"{BASE}/account/history/invoice/in-store"
USER_ID = "369513017"
LOYALTY = "440155630880"
try:
import browser_cookie3 # noqa: F401
import requests # noqa: F401
except ImportError as exc: # pragma: no cover - dependency-gated smoke test
browser_cookie3 = None
_IMPORT_ERROR = exc
else:
_IMPORT_ERROR = None
cj = browser_cookie3.firefox(domain_name="giantfood.com")
s = requests.Session()
s.cookies.update(cj)
s.headers.update({
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0",
"accept": "application/json, text/plain, */*",
"accept-language": "en-US,en;q=0.9",
"referer": ACCOUNT_PAGE,
})
r = s.get(
f"{BASE}/api/v6.0/user/{USER_ID}/order/history",
params={"filter": "instore", "loyaltyNumber": LOYALTY},
timeout=30,
)
print(r.status_code)
print(r.text[:500])
@unittest.skipIf(browser_cookie3 is None, f"optional smoke test dependency missing: {_IMPORT_ERROR}")
class BrowserCookieSmokeTest(unittest.TestCase):
def test_dependencies_available(self):
self.assertIsNotNone(browser_cookie3)

View File

@@ -1,27 +1,17 @@
import browser_cookie3
from curl_cffi import requests
import unittest
BASE = "https://giantfood.com"
ACCOUNT_PAGE = f"{BASE}/account/history/invoice/in-store"
USER_ID = "369513017"
LOYALTY = "440155630880"
try:
import browser_cookie3 # noqa: F401
from curl_cffi import requests # noqa: F401
except ImportError as exc: # pragma: no cover - dependency-gated smoke test
browser_cookie3 = None
_IMPORT_ERROR = exc
else:
_IMPORT_ERROR = None
s = requests.Session()
s.cookies.update(browser_cookie3.firefox(domain_name="giantfood.com"))
s.headers.update({
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0",
"accept": "application/json, text/plain, */*",
"accept-language": "en-US,en;q=0.9",
"referer": ACCOUNT_PAGE,
})
r = s.get(
f"{BASE}/api/v6.0/user/{USER_ID}/order/history",
params={"filter": "instore", "loyaltyNumber": LOYALTY},
impersonate="firefox",
timeout=30,
)
print(r.status_code)
print(r.text[:500])
@unittest.skipIf(browser_cookie3 is None, f"optional smoke test dependency missing: {_IMPORT_ERROR}")
class CurlCffiSmokeTest(unittest.TestCase):
def test_dependencies_available(self):
self.assertIsNotNone(browser_cookie3)

View File

@@ -1,66 +1,17 @@
import requests
from playwright.sync_api import sync_playwright
BASE = "https://giantfood.com"
ACCOUNT_PAGE = f"{BASE}/account/history/invoice/in-store"
USER_ID = "369513017"
LOYALTY = "440155630880"
import unittest
def get_session():
with sync_playwright() as p:
browser = p.firefox.launch(headless=False)
page = browser.new_page()
page.goto(ACCOUNT_PAGE)
print("log in manually in the browser, then press ENTER here")
input()
cookies = page.context.cookies()
ua = page.evaluate("() => navigator.userAgent")
browser.close()
s = requests.Session()
s.headers.update({
"user-agent": ua,
"accept": "application/json, text/plain, */*",
"referer": ACCOUNT_PAGE,
})
for c in cookies:
domain = c.get("domain", "").lstrip(".") or "giantfood.com"
s.cookies.set(c["name"], c["value"], domain=domain)
return s
try:
from playwright.sync_api import sync_playwright # noqa: F401
import requests # noqa: F401
except ImportError as exc: # pragma: no cover - dependency-gated smoke test
sync_playwright = None
_IMPORT_ERROR = exc
else:
_IMPORT_ERROR = None
def test_history(session):
url = f"{BASE}/api/v6.0/user/{USER_ID}/order/history"
r = session.get(
url,
params={
"filter": "instore",
"loyaltyNumber": LOYALTY,
},
)
print("status:", r.status_code)
print()
data = r.json()
print("orders found:", len(data.get("records", [])))
print()
for rec in data.get("records", [])[:5]:
print(rec["orderId"], rec["orderDate"], rec["orderTotal"])
if __name__ == "__main__":
session = get_session()
test_history(session)
@unittest.skipIf(sync_playwright is None, f"optional smoke test dependency missing: {_IMPORT_ERROR}")
class GiantLoginSmokeTest(unittest.TestCase):
def test_dependencies_available(self):
self.assertIsNotNone(sync_playwright)

117
tests/test_scraper.py Normal file
View File

@@ -0,0 +1,117 @@
import csv
import tempfile
import unittest
from pathlib import Path
import scraper
class ScraperTests(unittest.TestCase):
def test_flatten_orders_extracts_order_and_item_rows(self):
history = {
"records": [
{
"orderId": "abc123",
"serviceType": "PICKUP",
}
]
}
details = [
{
"orderId": "abc123",
"orderDate": "2026-03-01",
"deliveryDate": "2026-03-02",
"orderTotal": "12.34",
"paymentMethod": "VISA",
"totalItemCount": 1,
"totalSavings": "1.00",
"yourSavingsTotal": "1.00",
"couponsDiscountsTotal": "0.50",
"refundOrder": False,
"ebtOrder": False,
"pup": {
"storeName": "Giant",
"aholdStoreNumber": "42",
"storeAddress1": "123 Main",
"storeCity": "Springfield",
"storeState": "VA",
"storeZipcode": "22150",
},
"items": [
{
"podId": "pod-1",
"itemName": "Bananas",
"primUpcCd": "111",
"categoryId": "produce",
"categoryDesc": "Produce",
"shipQy": "2",
"lbEachCd": "EA",
"unitPrice": "0.59",
"groceryAmount": "1.18",
"totalPickedWeight": "",
"mvpSavings": "0.10",
"rewardSavings": "0.00",
"couponSavings": "0.00",
"couponPrice": "",
}
],
}
]
orders, items = scraper.flatten_orders(history, details)
self.assertEqual(1, len(orders))
self.assertEqual("abc123", orders[0]["order_id"])
self.assertEqual("PICKUP", orders[0]["service_type"])
self.assertEqual(1, len(items))
self.assertEqual("1", items[0]["line_no"])
self.assertEqual("Bananas", items[0]["item_name"])
def test_append_dedup_replaces_duplicate_rows_and_preserves_new_values(self):
with tempfile.TemporaryDirectory() as tmpdir:
path = Path(tmpdir) / "orders.csv"
scraper.append_dedup(
path,
[
{"order_id": "1", "order_total": "10.00"},
{"order_id": "2", "order_total": "20.00"},
],
subset=["order_id"],
fieldnames=["order_id", "order_total"],
)
merged = scraper.append_dedup(
path,
[
{"order_id": "2", "order_total": "21.50"},
{"order_id": "3", "order_total": "30.00"},
],
subset=["order_id"],
fieldnames=["order_id", "order_total"],
)
self.assertEqual(
[
{"order_id": "1", "order_total": "10.00"},
{"order_id": "2", "order_total": "21.50"},
{"order_id": "3", "order_total": "30.00"},
],
merged,
)
with path.open(newline="", encoding="utf-8") as handle:
rows = list(csv.DictReader(handle))
self.assertEqual(merged, rows)
def test_read_existing_order_ids_returns_known_ids(self):
with tempfile.TemporaryDirectory() as tmpdir:
path = Path(tmpdir) / "orders.csv"
path.write_text("order_id,order_total\n1,10.00\n2,20.00\n", encoding="utf-8")
self.assertEqual({"1", "2"}, scraper.read_existing_order_ids(path))
if __name__ == "__main__":
unittest.main()