Fix Costco receipt enumeration windows

This commit is contained in:
ben
2026-03-16 11:39:45 -04:00
parent 0d1591a602
commit ac82fa64fb
2 changed files with 300 additions and 10 deletions

View File

@@ -1,6 +1,7 @@
import csv
import json
import os
from datetime import datetime, timedelta
from pathlib import Path
import click
@@ -260,6 +261,120 @@ def detail_receipts(payload):
return payload.get("data", {}).get("receiptsWithCounts", {}).get("receipts", [])
def summary_counts(payload):
counts = payload.get("data", {}).get("receiptsWithCounts", {})
return {
"inWarehouse": counts.get("inWarehouse", 0) or 0,
"gasStation": counts.get("gasStation", 0) or 0,
"carWash": counts.get("carWash", 0) or 0,
"gasAndCarWash": counts.get("gasAndCarWash", 0) or 0,
}
def parse_cli_date(value):
return datetime.strptime(value, "%m/%d/%Y").date()
def format_cli_date(value):
return f"{value.month}/{value.day:02d}/{value.year}"
def build_date_windows(start_date, end_date, window_days):
start = parse_cli_date(start_date)
end = parse_cli_date(end_date)
if end < start:
raise click.ClickException("end-date must be on or after start-date")
if window_days < 1:
raise click.ClickException("window-days must be at least 1")
windows = []
current = start
while current <= end:
window_end = min(current + timedelta(days=window_days - 1), end)
windows.append(
{
"startDate": format_cli_date(current),
"endDate": format_cli_date(window_end),
}
)
current = window_end + timedelta(days=1)
return windows
def unique_receipts(receipts):
by_barcode = {}
for receipt in receipts:
barcode = receipt.get("transactionBarcode")
if barcode:
by_barcode[barcode] = receipt
return list(by_barcode.values())
def fetch_summary_windows(
session,
start_date,
end_date,
document_type,
document_sub_type,
window_days,
):
requests_metadata = []
combined_receipts = []
for window in build_date_windows(start_date, end_date, window_days):
variables = {
"startDate": window["startDate"],
"endDate": window["endDate"],
"text": "custom",
"documentType": document_type,
"documentSubType": document_sub_type,
}
payload = graphql_post(session, SUMMARY_QUERY, variables)
receipts = summary_receipts(payload)
counts = summary_counts(payload)
warehouse_count = sum(
1 for receipt in receipts if receipt.get("receiptType") == "In-Warehouse"
)
mismatch = counts["inWarehouse"] != warehouse_count
requests_metadata.append(
{
**variables,
"returnedReceipts": len(receipts),
"returnedInWarehouseReceipts": warehouse_count,
"inWarehouse": counts["inWarehouse"],
"gasStation": counts["gasStation"],
"carWash": counts["carWash"],
"gasAndCarWash": counts["gasAndCarWash"],
"countMismatch": mismatch,
}
)
if mismatch:
click.echo(
(
"warning: summary count mismatch for "
f"{window['startDate']} to {window['endDate']}: "
f"inWarehouse={counts['inWarehouse']} "
f"returnedInWarehouseReceipts={warehouse_count}"
),
err=True,
)
combined_receipts.extend(receipts)
unique = unique_receipts(combined_receipts)
aggregate_payload = {
"data": {
"receiptsWithCounts": {
"inWarehouse": sum(row["inWarehouse"] for row in requests_metadata),
"gasStation": sum(row["gasStation"] for row in requests_metadata),
"carWash": sum(row["carWash"] for row in requests_metadata),
"gasAndCarWash": sum(row["gasAndCarWash"] for row in requests_metadata),
"receipts": unique,
}
}
}
return aggregate_payload, requests_metadata
def flatten_costco_data(summary_payload, detail_payloads, raw_dir):
summary_lookup = {
receipt["transactionBarcode"]: receipt
@@ -415,7 +530,14 @@ def write_csv(path, rows, fieldnames):
show_default=True,
help="Summary document sub type.",
)
def main(start_date, end_date, outdir, document_type, document_sub_type):
@click.option(
"--window-days",
default=92,
show_default=True,
type=int,
help="Maximum number of days to request per summary window.",
)
def main(start_date, end_date, outdir, document_type, document_sub_type, window_days):
config = load_config()
required = ["authorization", "client_id", "client_identifier"]
missing = [key for key in required if not config[key]]
@@ -428,18 +550,16 @@ def main(start_date, end_date, outdir, document_type, document_sub_type):
raw_dir = outdir / "raw"
session = build_session(config)
summary_payload = graphql_post(
summary_payload, request_metadata = fetch_summary_windows(
session,
SUMMARY_QUERY,
{
"startDate": start_date,
"endDate": end_date,
"text": "custom",
"documentType": document_type,
"documentSubType": document_sub_type,
},
start_date,
end_date,
document_type,
document_sub_type,
window_days,
)
write_json(raw_dir / "summary.json", summary_payload)
write_json(raw_dir / "summary_requests.json", request_metadata)
receipts = summary_receipts(summary_payload)
detail_payloads = []