518 lines
21 KiB
Python
518 lines
21 KiB
Python
import csv
|
|
import json
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
from unittest import mock
|
|
|
|
import enrich_costco
|
|
import scrape_costco
|
|
import validate_cross_retailer_flow
|
|
|
|
|
|
class CostcoPipelineTests(unittest.TestCase):
|
|
def test_resolve_date_range_uses_months_back(self):
|
|
start_date, end_date = scrape_costco.resolve_date_range(
|
|
3, today=scrape_costco.parse_cli_date("3/16/2026")
|
|
)
|
|
|
|
self.assertEqual("12/16/2025", start_date)
|
|
self.assertEqual("3/16/2026", end_date)
|
|
|
|
def test_build_date_windows_splits_long_ranges(self):
|
|
windows = scrape_costco.build_date_windows("1/01/2026", "6/30/2026", 92)
|
|
|
|
self.assertEqual(
|
|
[
|
|
{"startDate": "1/01/2026", "endDate": "4/02/2026"},
|
|
{"startDate": "4/03/2026", "endDate": "6/30/2026"},
|
|
],
|
|
windows,
|
|
)
|
|
|
|
def test_fetch_summary_windows_records_metadata_and_warns_on_mismatch(self):
|
|
payloads = [
|
|
{
|
|
"data": {
|
|
"receiptsWithCounts": {
|
|
"inWarehouse": 2,
|
|
"gasStation": 0,
|
|
"carWash": 0,
|
|
"gasAndCarWash": 0,
|
|
"receipts": [
|
|
{
|
|
"transactionBarcode": "abc",
|
|
"receiptType": "In-Warehouse",
|
|
}
|
|
],
|
|
}
|
|
}
|
|
},
|
|
{
|
|
"data": {
|
|
"receiptsWithCounts": {
|
|
"inWarehouse": 1,
|
|
"gasStation": 0,
|
|
"carWash": 0,
|
|
"gasAndCarWash": 0,
|
|
"receipts": [
|
|
{
|
|
"transactionBarcode": "def",
|
|
"receiptType": "In-Warehouse",
|
|
}
|
|
],
|
|
}
|
|
}
|
|
},
|
|
]
|
|
|
|
with mock.patch.object(
|
|
scrape_costco, "graphql_post", side_effect=payloads
|
|
) as mocked_post, mock.patch.object(scrape_costco.click, "echo") as mocked_echo:
|
|
summary_payload, metadata = scrape_costco.fetch_summary_windows(
|
|
session=object(),
|
|
start_date="1/01/2026",
|
|
end_date="6/30/2026",
|
|
document_type="all",
|
|
document_sub_type="all",
|
|
window_days=92,
|
|
)
|
|
|
|
self.assertEqual(2, mocked_post.call_count)
|
|
self.assertEqual(2, len(metadata))
|
|
self.assertTrue(metadata[0]["countMismatch"])
|
|
self.assertFalse(metadata[1]["countMismatch"])
|
|
self.assertEqual("1/01/2026", metadata[0]["startDate"])
|
|
self.assertEqual("4/03/2026", metadata[1]["startDate"])
|
|
self.assertEqual(
|
|
["abc", "def"],
|
|
[
|
|
row["transactionBarcode"]
|
|
for row in scrape_costco.summary_receipts(summary_payload)
|
|
],
|
|
)
|
|
mocked_echo.assert_called_once()
|
|
warning_text = mocked_echo.call_args.args[0]
|
|
self.assertIn("warning: summary count mismatch", warning_text)
|
|
|
|
def test_flatten_costco_data_preserves_discount_rows(self):
|
|
summary_payload = {
|
|
"data": {
|
|
"receiptsWithCounts": {
|
|
"receipts": [
|
|
{
|
|
"transactionBarcode": "abc",
|
|
"tenderArray": [{"tenderDescription": "VISA"}],
|
|
"couponArray": [{"upcnumberCoupon": "2100003746641"}],
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
detail_payloads = [
|
|
{
|
|
"data": {
|
|
"receiptsWithCounts": {
|
|
"receipts": [
|
|
{
|
|
"transactionBarcode": "abc",
|
|
"transactionDate": "2026-03-12",
|
|
"receiptType": "In-Warehouse",
|
|
"total": 10.0,
|
|
"totalItemCount": 2,
|
|
"instantSavings": 5.0,
|
|
"warehouseName": "MT VERNON",
|
|
"warehouseNumber": 1115,
|
|
"warehouseAddress1": "7940 RICHMOND HWY",
|
|
"warehouseCity": "ALEXANDRIA",
|
|
"warehouseState": "VA",
|
|
"warehousePostalCode": "22306",
|
|
"itemArray": [
|
|
{
|
|
"itemNumber": "4873222",
|
|
"itemDescription01": "ALL F&C",
|
|
"itemDescription02": "200OZ 160LOADS P104",
|
|
"itemDepartmentNumber": 14,
|
|
"transDepartmentNumber": 14,
|
|
"unit": 1,
|
|
"itemIdentifier": "E",
|
|
"amount": 19.99,
|
|
"itemUnitPriceAmount": 19.99,
|
|
},
|
|
{
|
|
"itemNumber": "374664",
|
|
"itemDescription01": "/ 4873222",
|
|
"itemDescription02": None,
|
|
"itemDepartmentNumber": 14,
|
|
"transDepartmentNumber": 14,
|
|
"unit": -1,
|
|
"itemIdentifier": None,
|
|
"amount": -5,
|
|
"itemUnitPriceAmount": 0,
|
|
},
|
|
],
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
]
|
|
|
|
orders, items = scrape_costco.flatten_costco_data(
|
|
summary_payload, detail_payloads, Path("costco_output/raw")
|
|
)
|
|
|
|
self.assertEqual(1, len(orders))
|
|
self.assertEqual(2, len(items))
|
|
self.assertEqual("false", items[0]["is_discount_line"])
|
|
self.assertEqual("true", items[1]["is_discount_line"])
|
|
self.assertEqual("true", items[1]["is_coupon_line"])
|
|
|
|
def test_flatten_costco_data_uses_composite_summary_lookup_key(self):
|
|
summary_payload = {
|
|
"data": {
|
|
"receiptsWithCounts": {
|
|
"receipts": [
|
|
{
|
|
"transactionBarcode": "dup",
|
|
"transactionDateTime": "2026-03-12T16:16:00",
|
|
"tenderArray": [{"tenderDescription": "VISA"}],
|
|
"couponArray": [{"upcnumberCoupon": "111"}],
|
|
},
|
|
{
|
|
"transactionBarcode": "dup",
|
|
"transactionDateTime": "2026-02-14T16:25:00",
|
|
"tenderArray": [{"tenderDescription": "MASTERCARD"}],
|
|
"couponArray": [],
|
|
},
|
|
]
|
|
}
|
|
}
|
|
}
|
|
detail_payloads = [
|
|
{
|
|
"data": {
|
|
"receiptsWithCounts": {
|
|
"receipts": [
|
|
{
|
|
"transactionBarcode": "dup",
|
|
"transactionDateTime": "2026-03-12T16:16:00",
|
|
"transactionDate": "2026-03-12",
|
|
"receiptType": "In-Warehouse",
|
|
"total": 10.0,
|
|
"totalItemCount": 1,
|
|
"instantSavings": 5.0,
|
|
"warehouseName": "MT VERNON",
|
|
"warehouseNumber": 1115,
|
|
"warehouseAddress1": "7940 RICHMOND HWY",
|
|
"warehouseCity": "ALEXANDRIA",
|
|
"warehouseState": "VA",
|
|
"warehousePostalCode": "22306",
|
|
"itemArray": [
|
|
{
|
|
"itemNumber": "111",
|
|
"itemDescription01": "/ 111",
|
|
"itemDescription02": None,
|
|
"itemDepartmentNumber": 14,
|
|
"transDepartmentNumber": 14,
|
|
"unit": -1,
|
|
"itemIdentifier": None,
|
|
"amount": -5,
|
|
"itemUnitPriceAmount": 0,
|
|
}
|
|
],
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
]
|
|
|
|
orders, items = scrape_costco.flatten_costco_data(
|
|
summary_payload, detail_payloads, Path("costco_output/raw")
|
|
)
|
|
|
|
self.assertEqual("VISA", orders[0]["payment_method"])
|
|
self.assertEqual("true", items[0]["is_coupon_line"])
|
|
self.assertIn("dup-2026-03-12T16-16-00.json", items[0]["raw_order_path"])
|
|
|
|
def test_costco_enricher_parses_size_pack_and_discount(self):
|
|
row = enrich_costco.parse_costco_item(
|
|
order_id="abc",
|
|
order_date="2026-03-12",
|
|
raw_path=Path("costco_output/raw/abc.json"),
|
|
line_no=1,
|
|
item={
|
|
"itemNumber": "60357",
|
|
"itemDescription01": "MIXED PEPPER",
|
|
"itemDescription02": "6-PACK",
|
|
"itemDepartmentNumber": 65,
|
|
"transDepartmentNumber": 65,
|
|
"unit": 1,
|
|
"itemIdentifier": "E",
|
|
"amount": 7.49,
|
|
"itemUnitPriceAmount": 7.49,
|
|
},
|
|
)
|
|
self.assertEqual("60357", row["retailer_item_id"])
|
|
self.assertEqual("MIXED PEPPER", row["item_name_norm"])
|
|
self.assertEqual("6", row["pack_qty"])
|
|
self.assertEqual("count", row["measure_type"])
|
|
self.assertEqual("costco:abc:1", row["normalized_row_id"])
|
|
self.assertEqual("exact_retailer_item_id", row["normalization_basis"])
|
|
self.assertTrue(row["normalized_item_id"])
|
|
self.assertEqual("6", row["normalized_quantity"])
|
|
self.assertEqual("count", row["normalized_quantity_unit"])
|
|
|
|
discount = enrich_costco.parse_costco_item(
|
|
order_id="abc",
|
|
order_date="2026-03-12",
|
|
raw_path=Path("costco_output/raw/abc.json"),
|
|
line_no=2,
|
|
item={
|
|
"itemNumber": "374664",
|
|
"itemDescription01": "/ 4873222",
|
|
"itemDescription02": None,
|
|
"itemDepartmentNumber": 14,
|
|
"transDepartmentNumber": 14,
|
|
"unit": -1,
|
|
"itemIdentifier": None,
|
|
"amount": -5,
|
|
"itemUnitPriceAmount": 0,
|
|
},
|
|
)
|
|
self.assertEqual("true", discount["is_discount_line"])
|
|
self.assertEqual("true", discount["is_coupon_line"])
|
|
self.assertEqual("false", discount["is_item"])
|
|
|
|
def test_build_items_enriched_matches_discount_to_item(self):
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
raw_dir = Path(tmpdir) / "raw"
|
|
raw_dir.mkdir()
|
|
payload = {
|
|
"data": {
|
|
"receiptsWithCounts": {
|
|
"receipts": [
|
|
{
|
|
"transactionBarcode": "abc",
|
|
"transactionDate": "2026-03-12",
|
|
"itemArray": [
|
|
{
|
|
"itemNumber": "4873222",
|
|
"itemDescription01": "ALL F&C",
|
|
"itemDescription02": "200OZ 160LOADS P104",
|
|
"itemDepartmentNumber": 14,
|
|
"transDepartmentNumber": 14,
|
|
"unit": 1,
|
|
"itemIdentifier": "E",
|
|
"amount": 19.99,
|
|
"itemUnitPriceAmount": 19.99,
|
|
},
|
|
{
|
|
"itemNumber": "374664",
|
|
"itemDescription01": "/ 4873222",
|
|
"itemDescription02": None,
|
|
"itemDepartmentNumber": 14,
|
|
"transDepartmentNumber": 14,
|
|
"unit": -1,
|
|
"itemIdentifier": None,
|
|
"amount": -5,
|
|
"itemUnitPriceAmount": 0,
|
|
},
|
|
],
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
(raw_dir / "abc.json").write_text(json.dumps(payload), encoding="utf-8")
|
|
|
|
rows = enrich_costco.build_items_enriched(raw_dir)
|
|
|
|
purchase_row = next(row for row in rows if row["is_discount_line"] == "false")
|
|
discount_row = next(row for row in rows if row["is_discount_line"] == "true")
|
|
self.assertEqual("-5", purchase_row["matched_discount_amount"])
|
|
self.assertEqual("14.99", purchase_row["net_line_total"])
|
|
self.assertIn("matched_discount=4873222", purchase_row["parse_notes"])
|
|
self.assertIn("matched_to_item=4873222", discount_row["parse_notes"])
|
|
|
|
def test_cross_retailer_validation_writes_proof_example(self):
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
giant_csv = Path(tmpdir) / "giant_items_enriched.csv"
|
|
costco_csv = Path(tmpdir) / "costco_items_enriched.csv"
|
|
outdir = Path(tmpdir) / "combined"
|
|
|
|
fieldnames = enrich_costco.OUTPUT_FIELDS
|
|
giant_row = {field: "" for field in fieldnames}
|
|
giant_row.update(
|
|
{
|
|
"retailer": "giant",
|
|
"order_id": "g1",
|
|
"line_no": "1",
|
|
"order_date": "2026-03-01",
|
|
"retailer_item_id": "100",
|
|
"item_name": "FRESH BANANA",
|
|
"item_name_norm": "BANANA",
|
|
"upc": "4011",
|
|
"measure_type": "weight",
|
|
"is_store_brand": "false",
|
|
"is_fee": "false",
|
|
"is_discount_line": "false",
|
|
"is_coupon_line": "false",
|
|
"line_total": "1.29",
|
|
}
|
|
)
|
|
costco_row = {field: "" for field in fieldnames}
|
|
costco_row.update(
|
|
{
|
|
"retailer": "costco",
|
|
"order_id": "c1",
|
|
"line_no": "1",
|
|
"order_date": "2026-03-12",
|
|
"retailer_item_id": "30669",
|
|
"item_name": "BANANAS 3 LB / 1.36 KG",
|
|
"item_name_norm": "BANANA",
|
|
"upc": "",
|
|
"size_value": "3",
|
|
"size_unit": "lb",
|
|
"measure_type": "weight",
|
|
"is_store_brand": "false",
|
|
"is_fee": "false",
|
|
"is_discount_line": "false",
|
|
"is_coupon_line": "false",
|
|
"line_total": "2.98",
|
|
}
|
|
)
|
|
|
|
with giant_csv.open("w", newline="", encoding="utf-8") as handle:
|
|
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerow(giant_row)
|
|
with costco_csv.open("w", newline="", encoding="utf-8") as handle:
|
|
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
|
writer.writeheader()
|
|
writer.writerow(costco_row)
|
|
|
|
validate_cross_retailer_flow.main.callback(
|
|
giant_items_enriched_csv=str(giant_csv),
|
|
costco_items_enriched_csv=str(costco_csv),
|
|
outdir=str(outdir),
|
|
)
|
|
|
|
proof_path = outdir / "proof_examples.csv"
|
|
self.assertTrue(proof_path.exists())
|
|
with proof_path.open(newline="", encoding="utf-8") as handle:
|
|
rows = list(csv.DictReader(handle))
|
|
self.assertEqual(1, len(rows))
|
|
self.assertEqual("banana", rows[0]["proof_name"])
|
|
|
|
def test_main_writes_summary_request_metadata(self):
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
outdir = Path(tmpdir) / "costco_output"
|
|
summary_payload = {
|
|
"data": {
|
|
"receiptsWithCounts": {
|
|
"inWarehouse": 1,
|
|
"gasStation": 0,
|
|
"carWash": 0,
|
|
"gasAndCarWash": 0,
|
|
"receipts": [
|
|
{
|
|
"transactionBarcode": "abc",
|
|
"receiptType": "In-Warehouse",
|
|
"tenderArray": [],
|
|
"couponArray": [],
|
|
}
|
|
],
|
|
}
|
|
}
|
|
}
|
|
detail_payload = {
|
|
"data": {
|
|
"receiptsWithCounts": {
|
|
"receipts": [
|
|
{
|
|
"transactionBarcode": "abc",
|
|
"transactionDate": "2026-03-12",
|
|
"receiptType": "In-Warehouse",
|
|
"total": 10.0,
|
|
"totalItemCount": 1,
|
|
"instantSavings": 0,
|
|
"warehouseName": "MT VERNON",
|
|
"warehouseNumber": 1115,
|
|
"warehouseAddress1": "7940 RICHMOND HWY",
|
|
"warehouseCity": "ALEXANDRIA",
|
|
"warehouseState": "VA",
|
|
"warehousePostalCode": "22306",
|
|
"itemArray": [],
|
|
}
|
|
]
|
|
}
|
|
}
|
|
}
|
|
metadata = [
|
|
{
|
|
"startDate": "1/01/2026",
|
|
"endDate": "3/31/2026",
|
|
"text": "custom",
|
|
"documentType": "all",
|
|
"documentSubType": "all",
|
|
"returnedReceipts": 1,
|
|
"returnedInWarehouseReceipts": 1,
|
|
"inWarehouse": 1,
|
|
"gasStation": 0,
|
|
"carWash": 0,
|
|
"gasAndCarWash": 0,
|
|
"countMismatch": False,
|
|
}
|
|
]
|
|
|
|
with mock.patch.object(
|
|
scrape_costco,
|
|
"load_config",
|
|
return_value={
|
|
"authorization": "",
|
|
"client_id": "4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
|
|
"client_identifier": "481b1aec-aa3b-454b-b81b-48187e28f205",
|
|
},
|
|
), mock.patch.object(
|
|
scrape_costco,
|
|
"find_firefox_profile_dir",
|
|
return_value=Path("/tmp/profile"),
|
|
), mock.patch.object(
|
|
scrape_costco,
|
|
"load_costco_browser_headers",
|
|
return_value={
|
|
"costco-x-authorization": "Bearer header.payload.signature",
|
|
"costco-x-wcs-clientId": "4900eb1f-0c10-4bd9-99c3-c59e6c1ecebf",
|
|
"client-identifier": "481b1aec-aa3b-454b-b81b-48187e28f205",
|
|
},
|
|
), mock.patch.object(
|
|
scrape_costco, "build_session", return_value=object()
|
|
), mock.patch.object(
|
|
scrape_costco,
|
|
"fetch_summary_windows",
|
|
return_value=(summary_payload, metadata),
|
|
), mock.patch.object(
|
|
scrape_costco,
|
|
"graphql_post",
|
|
return_value=detail_payload,
|
|
):
|
|
scrape_costco.main.callback(
|
|
outdir=str(outdir),
|
|
document_type="all",
|
|
document_sub_type="all",
|
|
window_days=92,
|
|
months_back=3,
|
|
firefox_profile_dir=None,
|
|
)
|
|
|
|
metadata_path = outdir / "raw" / "summary_requests.json"
|
|
self.assertTrue(metadata_path.exists())
|
|
saved_metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
|
|
self.assertEqual(metadata, saved_metadata)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|