Fix Costco receipt enumeration windows

This commit is contained in:
ben
2026-03-16 11:39:45 -04:00
parent 0d1591a602
commit ac82fa64fb
2 changed files with 300 additions and 10 deletions

View File

@@ -1,6 +1,7 @@
import csv
import json
import os
from datetime import datetime, timedelta
from pathlib import Path
import click
@@ -260,6 +261,120 @@ def detail_receipts(payload):
return payload.get("data", {}).get("receiptsWithCounts", {}).get("receipts", [])
def summary_counts(payload):
counts = payload.get("data", {}).get("receiptsWithCounts", {})
return {
"inWarehouse": counts.get("inWarehouse", 0) or 0,
"gasStation": counts.get("gasStation", 0) or 0,
"carWash": counts.get("carWash", 0) or 0,
"gasAndCarWash": counts.get("gasAndCarWash", 0) or 0,
}
def parse_cli_date(value):
return datetime.strptime(value, "%m/%d/%Y").date()
def format_cli_date(value):
return f"{value.month}/{value.day:02d}/{value.year}"
def build_date_windows(start_date, end_date, window_days):
start = parse_cli_date(start_date)
end = parse_cli_date(end_date)
if end < start:
raise click.ClickException("end-date must be on or after start-date")
if window_days < 1:
raise click.ClickException("window-days must be at least 1")
windows = []
current = start
while current <= end:
window_end = min(current + timedelta(days=window_days - 1), end)
windows.append(
{
"startDate": format_cli_date(current),
"endDate": format_cli_date(window_end),
}
)
current = window_end + timedelta(days=1)
return windows
def unique_receipts(receipts):
by_barcode = {}
for receipt in receipts:
barcode = receipt.get("transactionBarcode")
if barcode:
by_barcode[barcode] = receipt
return list(by_barcode.values())
def fetch_summary_windows(
session,
start_date,
end_date,
document_type,
document_sub_type,
window_days,
):
requests_metadata = []
combined_receipts = []
for window in build_date_windows(start_date, end_date, window_days):
variables = {
"startDate": window["startDate"],
"endDate": window["endDate"],
"text": "custom",
"documentType": document_type,
"documentSubType": document_sub_type,
}
payload = graphql_post(session, SUMMARY_QUERY, variables)
receipts = summary_receipts(payload)
counts = summary_counts(payload)
warehouse_count = sum(
1 for receipt in receipts if receipt.get("receiptType") == "In-Warehouse"
)
mismatch = counts["inWarehouse"] != warehouse_count
requests_metadata.append(
{
**variables,
"returnedReceipts": len(receipts),
"returnedInWarehouseReceipts": warehouse_count,
"inWarehouse": counts["inWarehouse"],
"gasStation": counts["gasStation"],
"carWash": counts["carWash"],
"gasAndCarWash": counts["gasAndCarWash"],
"countMismatch": mismatch,
}
)
if mismatch:
click.echo(
(
"warning: summary count mismatch for "
f"{window['startDate']} to {window['endDate']}: "
f"inWarehouse={counts['inWarehouse']} "
f"returnedInWarehouseReceipts={warehouse_count}"
),
err=True,
)
combined_receipts.extend(receipts)
unique = unique_receipts(combined_receipts)
aggregate_payload = {
"data": {
"receiptsWithCounts": {
"inWarehouse": sum(row["inWarehouse"] for row in requests_metadata),
"gasStation": sum(row["gasStation"] for row in requests_metadata),
"carWash": sum(row["carWash"] for row in requests_metadata),
"gasAndCarWash": sum(row["gasAndCarWash"] for row in requests_metadata),
"receipts": unique,
}
}
}
return aggregate_payload, requests_metadata
def flatten_costco_data(summary_payload, detail_payloads, raw_dir):
summary_lookup = {
receipt["transactionBarcode"]: receipt
@@ -415,7 +530,14 @@ def write_csv(path, rows, fieldnames):
show_default=True,
help="Summary document sub type.",
)
def main(start_date, end_date, outdir, document_type, document_sub_type):
@click.option(
"--window-days",
default=92,
show_default=True,
type=int,
help="Maximum number of days to request per summary window.",
)
def main(start_date, end_date, outdir, document_type, document_sub_type, window_days):
config = load_config()
required = ["authorization", "client_id", "client_identifier"]
missing = [key for key in required if not config[key]]
@@ -428,18 +550,16 @@ def main(start_date, end_date, outdir, document_type, document_sub_type):
raw_dir = outdir / "raw"
session = build_session(config)
summary_payload = graphql_post(
summary_payload, request_metadata = fetch_summary_windows(
session,
SUMMARY_QUERY,
{
"startDate": start_date,
"endDate": end_date,
"text": "custom",
"documentType": document_type,
"documentSubType": document_sub_type,
},
start_date,
end_date,
document_type,
document_sub_type,
window_days,
)
write_json(raw_dir / "summary.json", summary_payload)
write_json(raw_dir / "summary_requests.json", request_metadata)
receipts = summary_receipts(summary_payload)
detail_payloads = []

View File

@@ -3,6 +3,7 @@ import json
import tempfile
import unittest
from pathlib import Path
from unittest import mock
import enrich_costco
import scrape_costco
@@ -10,6 +11,82 @@ import validate_cross_retailer_flow
class CostcoPipelineTests(unittest.TestCase):
def test_build_date_windows_splits_long_ranges(self):
windows = scrape_costco.build_date_windows("1/01/2026", "6/30/2026", 92)
self.assertEqual(
[
{"startDate": "1/01/2026", "endDate": "4/02/2026"},
{"startDate": "4/03/2026", "endDate": "6/30/2026"},
],
windows,
)
def test_fetch_summary_windows_records_metadata_and_warns_on_mismatch(self):
payloads = [
{
"data": {
"receiptsWithCounts": {
"inWarehouse": 2,
"gasStation": 0,
"carWash": 0,
"gasAndCarWash": 0,
"receipts": [
{
"transactionBarcode": "abc",
"receiptType": "In-Warehouse",
}
],
}
}
},
{
"data": {
"receiptsWithCounts": {
"inWarehouse": 1,
"gasStation": 0,
"carWash": 0,
"gasAndCarWash": 0,
"receipts": [
{
"transactionBarcode": "def",
"receiptType": "In-Warehouse",
}
],
}
}
},
]
with mock.patch.object(
scrape_costco, "graphql_post", side_effect=payloads
) as mocked_post, mock.patch.object(scrape_costco.click, "echo") as mocked_echo:
summary_payload, metadata = scrape_costco.fetch_summary_windows(
session=object(),
start_date="1/01/2026",
end_date="6/30/2026",
document_type="all",
document_sub_type="all",
window_days=92,
)
self.assertEqual(2, mocked_post.call_count)
self.assertEqual(2, len(metadata))
self.assertTrue(metadata[0]["countMismatch"])
self.assertFalse(metadata[1]["countMismatch"])
self.assertEqual("1/01/2026", metadata[0]["startDate"])
self.assertEqual("4/03/2026", metadata[1]["startDate"])
self.assertEqual(
["abc", "def"],
[
row["transactionBarcode"]
for row in scrape_costco.summary_receipts(summary_payload)
],
)
mocked_echo.assert_called_once()
warning_text = mocked_echo.call_args.args[0]
self.assertIn("warning: summary count mismatch", warning_text)
def test_flatten_costco_data_preserves_discount_rows(self):
summary_payload = {
"data": {
@@ -196,6 +273,99 @@ class CostcoPipelineTests(unittest.TestCase):
self.assertEqual(1, len(rows))
self.assertEqual("banana", rows[0]["proof_name"])
def test_main_writes_summary_request_metadata(self):
with tempfile.TemporaryDirectory() as tmpdir:
outdir = Path(tmpdir) / "costco_output"
summary_payload = {
"data": {
"receiptsWithCounts": {
"inWarehouse": 1,
"gasStation": 0,
"carWash": 0,
"gasAndCarWash": 0,
"receipts": [
{
"transactionBarcode": "abc",
"receiptType": "In-Warehouse",
"tenderArray": [],
"couponArray": [],
}
],
}
}
}
detail_payload = {
"data": {
"receiptsWithCounts": {
"receipts": [
{
"transactionBarcode": "abc",
"transactionDate": "2026-03-12",
"receiptType": "In-Warehouse",
"total": 10.0,
"totalItemCount": 1,
"instantSavings": 0,
"warehouseName": "MT VERNON",
"warehouseNumber": 1115,
"warehouseAddress1": "7940 RICHMOND HWY",
"warehouseCity": "ALEXANDRIA",
"warehouseState": "VA",
"warehousePostalCode": "22306",
"itemArray": [],
}
]
}
}
}
metadata = [
{
"startDate": "1/01/2026",
"endDate": "3/31/2026",
"text": "custom",
"documentType": "all",
"documentSubType": "all",
"returnedReceipts": 1,
"returnedInWarehouseReceipts": 1,
"inWarehouse": 1,
"gasStation": 0,
"carWash": 0,
"gasAndCarWash": 0,
"countMismatch": False,
}
]
with mock.patch.object(
scrape_costco, "load_config",
return_value={
"authorization": "token",
"client_id": "client",
"client_identifier": "identifier",
},
), mock.patch.object(
scrape_costco, "build_session", return_value=object()
), mock.patch.object(
scrape_costco,
"fetch_summary_windows",
return_value=(summary_payload, metadata),
), mock.patch.object(
scrape_costco,
"graphql_post",
return_value=detail_payload,
):
scrape_costco.main.callback(
start_date="1/01/2026",
end_date="3/31/2026",
outdir=str(outdir),
document_type="all",
document_sub_type="all",
window_days=92,
)
metadata_path = outdir / "raw" / "summary_requests.json"
self.assertTrue(metadata_path.exists())
saved_metadata = json.loads(metadata_path.read_text(encoding="utf-8"))
self.assertEqual(metadata, saved_metadata)
if __name__ == "__main__":
unittest.main()