import csv import json import tempfile import unittest from pathlib import Path from unittest import mock import enrich_costco import scrape_costco import validate_cross_retailer_flow class CostcoPipelineTests(unittest.TestCase): def test_resolve_date_range_uses_months_back(self): start_date, end_date = scrape_costco.resolve_date_range( 3, today=scrape_costco.parse_cli_date("3/16/2026") ) self.assertEqual("12/16/2025", start_date) self.assertEqual("3/16/2026", end_date) def test_build_date_windows_splits_long_ranges(self): windows = scrape_costco.build_date_windows("1/01/2026", "6/30/2026", 92) self.assertEqual( [ {"startDate": "1/01/2026", "endDate": "4/02/2026"}, {"startDate": "4/03/2026", "endDate": "6/30/2026"}, ], windows, ) def test_fetch_summary_windows_records_metadata_and_warns_on_mismatch(self): payloads = [ { "data": { "receiptsWithCounts": { "inWarehouse": 2, "gasStation": 0, "carWash": 0, "gasAndCarWash": 0, "receipts": [ { "transactionBarcode": "abc", "receiptType": "In-Warehouse", } ], } } }, { "data": { "receiptsWithCounts": { "inWarehouse": 1, "gasStation": 0, "carWash": 0, "gasAndCarWash": 0, "receipts": [ { "transactionBarcode": "def", "receiptType": "In-Warehouse", } ], } } }, ] with mock.patch.object( scrape_costco, "graphql_post", side_effect=payloads ) as mocked_post, mock.patch.object(scrape_costco.click, "echo") as mocked_echo: summary_payload, metadata = scrape_costco.fetch_summary_windows( session=object(), start_date="1/01/2026", end_date="6/30/2026", document_type="all", document_sub_type="all", window_days=92, ) self.assertEqual(2, mocked_post.call_count) self.assertEqual(2, len(metadata)) self.assertTrue(metadata[0]["countMismatch"]) self.assertFalse(metadata[1]["countMismatch"]) self.assertEqual("1/01/2026", metadata[0]["startDate"]) self.assertEqual("4/03/2026", metadata[1]["startDate"]) self.assertEqual( ["abc", "def"], [ row["transactionBarcode"] for row in scrape_costco.summary_receipts(summary_payload) ], ) mocked_echo.assert_called_once() warning_text = mocked_echo.call_args.args[0] self.assertIn("warning: summary count mismatch", warning_text) def test_flatten_costco_data_preserves_discount_rows(self): summary_payload = { "data": { "receiptsWithCounts": { "receipts": [ { "transactionBarcode": "abc", "tenderArray": [{"tenderDescription": "VISA"}], "couponArray": [{"upcnumberCoupon": "2100003746641"}], } ] } } } detail_payloads = [ { "data": { "receiptsWithCounts": { "receipts": [ { "transactionBarcode": "abc", "transactionDate": "2026-03-12", "receiptType": "In-Warehouse", "total": 10.0, "totalItemCount": 2, "instantSavings": 5.0, "warehouseName": "MT VERNON", "warehouseNumber": 1115, "warehouseAddress1": "7940 RICHMOND HWY", "warehouseCity": "ALEXANDRIA", "warehouseState": "VA", "warehousePostalCode": "22306", "itemArray": [ { "itemNumber": "4873222", "itemDescription01": "ALL F&C", "itemDescription02": "200OZ 160LOADS P104", "itemDepartmentNumber": 14, "transDepartmentNumber": 14, "unit": 1, "itemIdentifier": "E", "amount": 19.99, "itemUnitPriceAmount": 19.99, }, { "itemNumber": "374664", "itemDescription01": "/ 4873222", "itemDescription02": None, "itemDepartmentNumber": 14, "transDepartmentNumber": 14, "unit": -1, "itemIdentifier": None, "amount": -5, "itemUnitPriceAmount": 0, }, ], } ] } } } ] orders, items = scrape_costco.flatten_costco_data( summary_payload, detail_payloads, Path("costco_output/raw") ) self.assertEqual(1, len(orders)) self.assertEqual(2, len(items)) self.assertEqual("false", items[0]["is_discount_line"]) self.assertEqual("true", items[1]["is_discount_line"]) self.assertEqual("true", items[1]["is_coupon_line"]) def test_flatten_costco_data_uses_composite_summary_lookup_key(self): summary_payload = { "data": { "receiptsWithCounts": { "receipts": [ { "transactionBarcode": "dup", "transactionDateTime": "2026-03-12T16:16:00", "tenderArray": [{"tenderDescription": "VISA"}], "couponArray": [{"upcnumberCoupon": "111"}], }, { "transactionBarcode": "dup", "transactionDateTime": "2026-02-14T16:25:00", "tenderArray": [{"tenderDescription": "MASTERCARD"}], "couponArray": [], }, ] } } } detail_payloads = [ { "data": { "receiptsWithCounts": { "receipts": [ { "transactionBarcode": "dup", "transactionDateTime": "2026-03-12T16:16:00", "transactionDate": "2026-03-12", "receiptType": "In-Warehouse", "total": 10.0, "totalItemCount": 1, "instantSavings": 5.0, "warehouseName": "MT VERNON", "warehouseNumber": 1115, "warehouseAddress1": "7940 RICHMOND HWY", "warehouseCity": "ALEXANDRIA", "warehouseState": "VA", "warehousePostalCode": "22306", "itemArray": [ { "itemNumber": "111", "itemDescription01": "/ 111", "itemDescription02": None, "itemDepartmentNumber": 14, "transDepartmentNumber": 14, "unit": -1, "itemIdentifier": None, "amount": -5, "itemUnitPriceAmount": 0, } ], } ] } } } ] orders, items = scrape_costco.flatten_costco_data( summary_payload, detail_payloads, Path("costco_output/raw") ) self.assertEqual("VISA", orders[0]["payment_method"]) self.assertEqual("true", items[0]["is_coupon_line"]) self.assertIn("dup-2026-03-12T16-16-00.json", items[0]["raw_order_path"]) def test_costco_enricher_parses_size_pack_and_discount(self): row = enrich_costco.parse_costco_item( order_id="abc", order_date="2026-03-12", raw_path=Path("costco_output/raw/abc.json"), line_no=1, item={ "itemNumber": "60357", "itemDescription01": "MIXED PEPPER", "itemDescription02": "6-PACK", "itemDepartmentNumber": 65, "transDepartmentNumber": 65, "unit": 1, "itemIdentifier": "E", "amount": 7.49, "itemUnitPriceAmount": 7.49, }, ) self.assertEqual("60357", row["retailer_item_id"]) self.assertEqual("MIXED PEPPER", row["item_name_norm"]) self.assertEqual("6", row["pack_qty"]) self.assertEqual("count", row["measure_type"]) self.assertEqual("costco:abc:1", row["normalized_row_id"]) self.assertEqual("exact_retailer_item_id", row["normalization_basis"]) self.assertTrue(row["normalized_item_id"]) self.assertEqual("6", row["normalized_quantity"]) self.assertEqual("count", row["normalized_quantity_unit"]) discount = enrich_costco.parse_costco_item( order_id="abc", order_date="2026-03-12", raw_path=Path("costco_output/raw/abc.json"), line_no=2, item={ "itemNumber": "374664", "itemDescription01": "/ 4873222", "itemDescription02": None, "itemDepartmentNumber": 14, "transDepartmentNumber": 14, "unit": -1, "itemIdentifier": None, "amount": -5, "itemUnitPriceAmount": 0, }, ) self.assertEqual("true", discount["is_discount_line"]) self.assertEqual("true", discount["is_coupon_line"]) self.assertEqual("false", discount["is_item"]) def test_costco_name_cleanup_removes_dual_weight_and_logistics_artifacts(self): mixed_units = enrich_costco.parse_costco_item( order_id="abc", order_date="2026-03-12", raw_path=Path("costco_output/raw/abc.json"), line_no=1, item={ "itemNumber": "18600", "itemDescription01": "MANDARINS 2.27 KG / 5 LBS", "itemDescription02": None, "itemDepartmentNumber": 65, "transDepartmentNumber": 65, "unit": 1, "itemIdentifier": "E", "amount": 7.49, "itemUnitPriceAmount": 7.49, }, ) self.assertEqual("MANDARIN", mixed_units["item_name_norm"]) self.assertEqual("5", mixed_units["size_value"]) self.assertEqual("lb", mixed_units["size_unit"]) logistics = enrich_costco.parse_costco_item( order_id="abc", order_date="2026-03-12", raw_path=Path("costco_output/raw/abc.json"), line_no=2, item={ "itemNumber": "1375005", "itemDescription01": "LIFE 6'TABLE MDL #80873U - T12/H3/P36", "itemDescription02": None, "itemDepartmentNumber": 18, "transDepartmentNumber": 18, "unit": 1, "itemIdentifier": "E", "amount": 119.98, "itemUnitPriceAmount": 119.98, }, ) self.assertEqual("LIFE 6'TABLE MDL", logistics["item_name_norm"]) def test_build_items_enriched_matches_discount_to_item(self): with tempfile.TemporaryDirectory() as tmpdir: raw_dir = Path(tmpdir) / "raw" raw_dir.mkdir() payload = { "data": { "receiptsWithCounts": { "receipts": [ { "transactionBarcode": "abc", "transactionDate": "2026-03-12", "itemArray": [ { "itemNumber": "4873222", "itemDescription01": "ALL F&C", "itemDescription02": "200OZ 160LOADS P104", "itemDepartmentNumber": 14, "transDepartmentNumber": 14, "unit": 1, "itemIdentifier": "E", "amount": 19.99, "itemUnitPriceAmount": 19.99, }, { "itemNumber": "374664", "itemDescription01": "/ 4873222", "itemDescription02": None, "itemDepartmentNumber": 14, "transDepartmentNumber": 14, "unit": -1, "itemIdentifier": None, "amount": -5, "itemUnitPriceAmount": 0, }, ], } ] } } } (raw_dir / "abc.json").write_text(json.dumps(payload), encoding="utf-8") rows = enrich_costco.build_items_enriched(raw_dir) purchase_row = next(row for row in rows if row["is_discount_line"] == "false") discount_row = next(row for row in rows if row["is_discount_line"] == "true") self.assertEqual("-5", purchase_row["matched_discount_amount"]) self.assertEqual("14.99", purchase_row["net_line_total"]) self.assertIn("matched_discount=4873222", purchase_row["parse_notes"]) self.assertIn("matched_to_item=4873222", discount_row["parse_notes"]) def test_cross_retailer_validation_writes_proof_example(self): with tempfile.TemporaryDirectory() as tmpdir: giant_csv = Path(tmpdir) / "giant_items_enriched.csv" costco_csv = Path(tmpdir) / "costco_items_enriched.csv" outdir = Path(tmpdir) / "combined" fieldnames = enrich_costco.OUTPUT_FIELDS giant_row = {field: "" for field in fieldnames} giant_row.update( { "retailer": "giant", "order_id": "g1", "line_no": "1", "order_date": "2026-03-01", "retailer_item_id": "100", "item_name": "FRESH BANANA", "item_name_norm": "BANANA", "upc": "4011", "measure_type": "weight", "is_store_brand": "false", "is_fee": "false", "is_discount_line": "false", "is_coupon_line": "false", "line_total": "1.29", } ) costco_row = {field: "" for field in fieldnames} costco_row.update( { "retailer": "costco", "order_id": "c1", "line_no": "1", "order_date": "2026-03-12", "retailer_item_id": "30669", "item_name": "BANANAS 3 LB / 1.36 KG", "item_name_norm": "BANANA", "upc": "", "size_value": "3", "size_unit": "lb", "measure_type": "weight", "is_store_brand": "false", "is_fee": "false", "is_discount_line": "false", "is_coupon_line": "false", "line_total": "2.98", } ) with giant_csv.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=fieldnames) writer.writeheader() writer.writerow(giant_row) with costco_csv.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=fieldnames) writer.writeheader() writer.writerow(costco_row) validate_cross_retailer_flow.main.callback( giant_items_enriched_csv=str(giant_csv), costco_items_enriched_csv=str(costco_csv), outdir=str(outdir), ) proof_path = outdir / "proof_examples.csv" self.assertTrue(proof_path.exists()) with proof_path.open(newline="", encoding="utf-8") as handle: rows = list(csv.DictReader(handle)) self.assertEqual(1, len(rows)) self.assertEqual("banana", rows[0]["proof_name"]) def test_main_writes_summary_request_metadata(self): with tempfile.TemporaryDirectory() as tmpdir: outdir = Path(tmpdir) / "costco_output" summary_payload = { "data": { "receiptsWithCounts": { "inWarehouse": 1, "gasStation": 0, "carWash": 0, "gasAndCarWash": 0, "receipts": [ { "transactionBarcode": "abc", "receiptType": "In-Warehouse", "tenderArray": [], "couponArray": [], } ], } } } detail_payload = { "data": { "receiptsWithCounts": { "receipts": [ { "transactionBarcode": "abc", "transactionDate": "2026-03-12", "receiptType": "In-Warehouse", "total": 10.0, "totalItemCount": 1, "instantSavings": 0, "warehouseName": "MT VERNON", "warehouseNumber": 1115, "warehouseAddress1": "7940 RICHMOND HWY", "warehouseCity": "ALEXANDRIA", "warehouseState": "VA", "warehousePostalCode": "22306", "itemArray": [], } ] } } } metadata = [ { "startDate": "1/01/2026", "endDate": "3/31/2026", "text": "custom", "documentType": "all", "documentSubType": "all", "returnedReceipts": 1, "returnedInWarehouseReceipts": 1, "inWarehouse": 1, "gasStation": 0, "carWash": 0, "gasAndCarWash": 0, "countMismatch": False, } ] with mock.patch.object( scrape_costco, "load_config", return_value={ "authorization": "", "client_id": "4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf", "client_identifier": "481b1aec-aa3b-454b-b81b-48187e28f205", }, ), mock.patch.object( scrape_costco, "find_firefox_profile_dir", return_value=Path("/tmp/profile"), ), mock.patch.object( scrape_costco, "load_costco_browser_headers", return_value={ "costco-x-authorization": "Bearer header.payload.signature", "costco-x-wcs-clientId": "4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf", "client-identifier": "481b1aec-aa3b-454b-b81b-48187e28f205", }, ), mock.patch.object( scrape_costco, "build_session", return_value=object() ), mock.patch.object( scrape_costco, "fetch_summary_windows", return_value=(summary_payload, metadata), ), mock.patch.object( scrape_costco, "graphql_post", return_value=detail_payload, ): scrape_costco.main.callback( outdir=str(outdir), document_type="all", document_sub_type="all", window_days=92, months_back=3, firefox_profile_dir=None, ) metadata_path = outdir / "raw" / "summary_requests.json" self.assertTrue(metadata_path.exists()) saved_metadata = json.loads(metadata_path.read_text(encoding="utf-8")) self.assertEqual(metadata, saved_metadata) if __name__ == "__main__": unittest.main()