Align Costco scraper with browser session flow

This commit is contained in:
ben
2026-03-16 12:28:19 -04:00
parent 58d6efb7bb
commit c0054dc51e
2 changed files with 159 additions and 56 deletions

View File

@@ -11,6 +11,14 @@ import validate_cross_retailer_flow
class CostcoPipelineTests(unittest.TestCase):
def test_resolve_date_range_uses_months_back(self):
start_date, end_date = scrape_costco.resolve_date_range(
3, today=scrape_costco.parse_cli_date("3/16/2026")
)
self.assertEqual("12/16/2025", start_date)
self.assertEqual("3/16/2026", end_date)
def test_build_date_windows_splits_long_ranges(self):
windows = scrape_costco.build_date_windows("1/01/2026", "6/30/2026", 92)
@@ -160,6 +168,74 @@ class CostcoPipelineTests(unittest.TestCase):
self.assertEqual("true", items[1]["is_discount_line"])
self.assertEqual("true", items[1]["is_coupon_line"])
def test_flatten_costco_data_uses_composite_summary_lookup_key(self):
summary_payload = {
"data": {
"receiptsWithCounts": {
"receipts": [
{
"transactionBarcode": "dup",
"transactionDateTime": "2026-03-12T16:16:00",
"tenderArray": [{"tenderDescription": "VISA"}],
"couponArray": [{"upcnumberCoupon": "111"}],
},
{
"transactionBarcode": "dup",
"transactionDateTime": "2026-02-14T16:25:00",
"tenderArray": [{"tenderDescription": "MASTERCARD"}],
"couponArray": [],
},
]
}
}
}
detail_payloads = [
{
"data": {
"receiptsWithCounts": {
"receipts": [
{
"transactionBarcode": "dup",
"transactionDateTime": "2026-03-12T16:16:00",
"transactionDate": "2026-03-12",
"receiptType": "In-Warehouse",
"total": 10.0,
"totalItemCount": 1,
"instantSavings": 5.0,
"warehouseName": "MT VERNON",
"warehouseNumber": 1115,
"warehouseAddress1": "7940 RICHMOND HWY",
"warehouseCity": "ALEXANDRIA",
"warehouseState": "VA",
"warehousePostalCode": "22306",
"itemArray": [
{
"itemNumber": "111",
"itemDescription01": "/ 111",
"itemDescription02": None,
"itemDepartmentNumber": 14,
"transDepartmentNumber": 14,
"unit": -1,
"itemIdentifier": None,
"amount": -5,
"itemUnitPriceAmount": 0,
}
],
}
]
}
}
}
]
orders, items = scrape_costco.flatten_costco_data(
summary_payload, detail_payloads, Path("costco_output/raw")
)
self.assertEqual("VISA", orders[0]["payment_method"])
self.assertEqual("true", items[0]["is_coupon_line"])
self.assertIn("dup::2026-03-12T16:16:00.json", items[0]["raw_order_path"])
def test_costco_enricher_parses_size_pack_and_discount(self):
row = enrich_costco.parse_costco_item(
order_id="abc",
@@ -335,13 +411,6 @@ class CostcoPipelineTests(unittest.TestCase):
]
with mock.patch.object(
scrape_costco, "load_config",
return_value={
"authorization": "token",
"client_id": "client",
"client_identifier": "identifier",
},
), mock.patch.object(
scrape_costco, "build_session", return_value=object()
), mock.patch.object(
scrape_costco,
@@ -353,12 +422,11 @@ class CostcoPipelineTests(unittest.TestCase):
return_value=detail_payload,
):
scrape_costco.main.callback(
start_date="1/01/2026",
end_date="3/31/2026",
outdir=str(outdir),
document_type="all",
document_sub_type="all",
window_days=92,
months_back=3,
)
metadata_path = outdir / "raw" / "summary_requests.json"