Harden giant receipt fetch CLI

2026-03-14 18:32:32 -04:00
parent 585d8c1e49
commit d57b9cf52f
8 changed files with 456 additions and 470 deletions
--- a/tests/test_bc.py
+++ b/tests/test_bc.py
@@ -1,28 +1,17 @@
-import requests
-import browser_cookie3
+import unittest

-BASE = "https://giantfood.com"
-ACCOUNT_PAGE = f"{BASE}/account/history/invoice/in-store"

-USER_ID = "369513017"
-LOYALTY = "440155630880"
+try:
+    import browser_cookie3  # noqa: F401
+    import requests  # noqa: F401
+except ImportError as exc:  # pragma: no cover - dependency-gated smoke test
+    browser_cookie3 = None
+    _IMPORT_ERROR = exc
+else:
+    _IMPORT_ERROR = None

-cj = browser_cookie3.firefox(domain_name="giantfood.com")

-s = requests.Session()
-s.cookies.update(cj)
-s.headers.update({
-    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0",
-    "accept": "application/json, text/plain, */*",
-    "accept-language": "en-US,en;q=0.9",
-    "referer": ACCOUNT_PAGE,
-})
-
-r = s.get(
-    f"{BASE}/api/v6.0/user/{USER_ID}/order/history",
-    params={"filter": "instore", "loyaltyNumber": LOYALTY},
-    timeout=30,
-)
-
-print(r.status_code)
-print(r.text[:500])
+@unittest.skipIf(browser_cookie3 is None, f"optional smoke test dependency missing: {_IMPORT_ERROR}")
+class BrowserCookieSmokeTest(unittest.TestCase):
+    def test_dependencies_available(self):
+        self.assertIsNotNone(browser_cookie3)
--- a/tests/test_bc_cffi.py
+++ b/tests/test_bc_cffi.py
@@ -1,27 +1,17 @@
-import browser_cookie3
-from curl_cffi import requests
+import unittest

-BASE = "https://giantfood.com"
-ACCOUNT_PAGE = f"{BASE}/account/history/invoice/in-store"

-USER_ID = "369513017"
-LOYALTY = "440155630880"
+try:
+    import browser_cookie3  # noqa: F401
+    from curl_cffi import requests  # noqa: F401
+except ImportError as exc:  # pragma: no cover - dependency-gated smoke test
+    browser_cookie3 = None
+    _IMPORT_ERROR = exc
+else:
+    _IMPORT_ERROR = None

-s = requests.Session()
-s.cookies.update(browser_cookie3.firefox(domain_name="giantfood.com"))
-s.headers.update({
-    "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0",
-    "accept": "application/json, text/plain, */*",
-    "accept-language": "en-US,en;q=0.9",
-    "referer": ACCOUNT_PAGE,
-})

-r = s.get(
-    f"{BASE}/api/v6.0/user/{USER_ID}/order/history",
-    params={"filter": "instore", "loyaltyNumber": LOYALTY},
-    impersonate="firefox",
-    timeout=30,
-)
-
-print(r.status_code)
-print(r.text[:500])
+@unittest.skipIf(browser_cookie3 is None, f"optional smoke test dependency missing: {_IMPORT_ERROR}")
+class CurlCffiSmokeTest(unittest.TestCase):
+    def test_dependencies_available(self):
+        self.assertIsNotNone(browser_cookie3)
--- a/tests/test_giant_login.py
+++ b/tests/test_giant_login.py
@@ -1,66 +1,17 @@
-import requests
-from playwright.sync_api import sync_playwright
-
-BASE = "https://giantfood.com"
-ACCOUNT_PAGE = f"{BASE}/account/history/invoice/in-store"
-
-USER_ID = "369513017"
-LOYALTY = "440155630880"
+import unittest


-def get_session():
-    with sync_playwright() as p:
-        browser = p.firefox.launch(headless=False)
-        page = browser.new_page()
-
-        page.goto(ACCOUNT_PAGE)
-
-        print("log in manually in the browser, then press ENTER here")
-        input()
-
-        cookies = page.context.cookies()
-        ua = page.evaluate("() => navigator.userAgent")
-
-        browser.close()
-
-    s = requests.Session()
-
-    s.headers.update({
-        "user-agent": ua,
-        "accept": "application/json, text/plain, */*",
-        "referer": ACCOUNT_PAGE,
-    })
-
-    for c in cookies:
-        domain = c.get("domain", "").lstrip(".") or "giantfood.com"
-        s.cookies.set(c["name"], c["value"], domain=domain)
-
-    return s
+try:
+    from playwright.sync_api import sync_playwright  # noqa: F401
+    import requests  # noqa: F401
+except ImportError as exc:  # pragma: no cover - dependency-gated smoke test
+    sync_playwright = None
+    _IMPORT_ERROR = exc
+else:
+    _IMPORT_ERROR = None


-def test_history(session):
-    url = f"{BASE}/api/v6.0/user/{USER_ID}/order/history"
-
-    r = session.get(
-        url,
-        params={
-            "filter": "instore",
-            "loyaltyNumber": LOYALTY,
-        },
-    )
-
-    print("status:", r.status_code)
-    print()
-
-    data = r.json()
-
-    print("orders found:", len(data.get("records", [])))
-    print()
-
-    for rec in data.get("records", [])[:5]:
-        print(rec["orderId"], rec["orderDate"], rec["orderTotal"])
-
-
-if __name__ == "__main__":
-    session = get_session()
-    test_history(session)
+@unittest.skipIf(sync_playwright is None, f"optional smoke test dependency missing: {_IMPORT_ERROR}")
+class GiantLoginSmokeTest(unittest.TestCase):
+    def test_dependencies_available(self):
+        self.assertIsNotNone(sync_playwright)
--- a/tests/test_scraper.py
+++ b/tests/test_scraper.py
@@ -0,0 +1,117 @@
+import csv
+import tempfile
+import unittest
+from pathlib import Path
+
+import scraper
+
+
+class ScraperTests(unittest.TestCase):
+    def test_flatten_orders_extracts_order_and_item_rows(self):
+        history = {
+            "records": [
+                {
+                    "orderId": "abc123",
+                    "serviceType": "PICKUP",
+                }
+            ]
+        }
+        details = [
+            {
+                "orderId": "abc123",
+                "orderDate": "2026-03-01",
+                "deliveryDate": "2026-03-02",
+                "orderTotal": "12.34",
+                "paymentMethod": "VISA",
+                "totalItemCount": 1,
+                "totalSavings": "1.00",
+                "yourSavingsTotal": "1.00",
+                "couponsDiscountsTotal": "0.50",
+                "refundOrder": False,
+                "ebtOrder": False,
+                "pup": {
+                    "storeName": "Giant",
+                    "aholdStoreNumber": "42",
+                    "storeAddress1": "123 Main",
+                    "storeCity": "Springfield",
+                    "storeState": "VA",
+                    "storeZipcode": "22150",
+                },
+                "items": [
+                    {
+                        "podId": "pod-1",
+                        "itemName": "Bananas",
+                        "primUpcCd": "111",
+                        "categoryId": "produce",
+                        "categoryDesc": "Produce",
+                        "shipQy": "2",
+                        "lbEachCd": "EA",
+                        "unitPrice": "0.59",
+                        "groceryAmount": "1.18",
+                        "totalPickedWeight": "",
+                        "mvpSavings": "0.10",
+                        "rewardSavings": "0.00",
+                        "couponSavings": "0.00",
+                        "couponPrice": "",
+                    }
+                ],
+            }
+        ]
+
+        orders, items = scraper.flatten_orders(history, details)
+
+        self.assertEqual(1, len(orders))
+        self.assertEqual("abc123", orders[0]["order_id"])
+        self.assertEqual("PICKUP", orders[0]["service_type"])
+        self.assertEqual(1, len(items))
+        self.assertEqual("1", items[0]["line_no"])
+        self.assertEqual("Bananas", items[0]["item_name"])
+
+    def test_append_dedup_replaces_duplicate_rows_and_preserves_new_values(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = Path(tmpdir) / "orders.csv"
+
+            scraper.append_dedup(
+                path,
+                [
+                    {"order_id": "1", "order_total": "10.00"},
+                    {"order_id": "2", "order_total": "20.00"},
+                ],
+                subset=["order_id"],
+                fieldnames=["order_id", "order_total"],
+            )
+
+            merged = scraper.append_dedup(
+                path,
+                [
+                    {"order_id": "2", "order_total": "21.50"},
+                    {"order_id": "3", "order_total": "30.00"},
+                ],
+                subset=["order_id"],
+                fieldnames=["order_id", "order_total"],
+            )
+
+            self.assertEqual(
+                [
+                    {"order_id": "1", "order_total": "10.00"},
+                    {"order_id": "2", "order_total": "21.50"},
+                    {"order_id": "3", "order_total": "30.00"},
+                ],
+                merged,
+            )
+
+            with path.open(newline="", encoding="utf-8") as handle:
+                rows = list(csv.DictReader(handle))
+
+            self.assertEqual(merged, rows)
+
+    def test_read_existing_order_ids_returns_known_ids(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            path = Path(tmpdir) / "orders.csv"
+            path.write_text("order_id,order_total\n1,10.00\n2,20.00\n", encoding="utf-8")
+
+            self.assertEqual({"1", "2"}, scraper.read_existing_order_ids(path))
+
+
+if __name__ == "__main__":
+    unittest.main()