Attach Costco discounts to purchase rows

This commit is contained in:
ben
2026-03-17 15:07:45 -04:00
parent 967e19e561
commit 56a03bcb1d
4 changed files with 117 additions and 1 deletions

View File

@@ -33,6 +33,8 @@ PURCHASE_FIELDS = [
"measure_type",
"line_total",
"unit_price",
"matched_discount_amount",
"net_line_total",
"store_name",
"store_number",
"store_city",
@@ -94,7 +96,7 @@ def decimal_or_zero(value):
def derive_metrics(row):
line_total = to_decimal(row.get("line_total"))
line_total = to_decimal(row.get("net_line_total") or row.get("line_total"))
qty = to_decimal(row.get("qty"))
pack_qty = to_decimal(row.get("pack_qty"))
size_value = to_decimal(row.get("size_value"))
@@ -292,6 +294,8 @@ def build_purchase_rows(
"measure_type": row["measure_type"],
"line_total": row["line_total"],
"unit_price": row["unit_price"],
"matched_discount_amount": row.get("matched_discount_amount", ""),
"net_line_total": row.get("net_line_total", ""),
"store_name": order_row.get("store_name", ""),
"store_number": order_row.get("store_number", ""),
"store_city": order_row.get("store_city", ""),

View File

@@ -1,6 +1,7 @@
import csv
import json
import re
from collections import defaultdict
from pathlib import Path
import click
@@ -29,6 +30,7 @@ HASH_SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)#\b")
PACK_DASH_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*-\s*PACK\b")
PACK_WORD_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*PACK\b")
SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)\s*(OZ|LB|LBS|CT|KG|G)\b")
DISCOUNT_TARGET_RE = re.compile(r"^/\s*(\d+)\b")
def clean_costco_name(name):
@@ -156,6 +158,13 @@ def is_discount_item(item):
return amount < 0 or unit < 0 or description.startswith("/")
def discount_target_id(raw_name):
match = DISCOUNT_TARGET_RE.match(normalize_whitespace(raw_name))
if not match:
return ""
return match.group(1)
def parse_costco_item(order_id, order_date, raw_path, line_no, item):
raw_name = combine_description(item)
cleaned_name = clean_costco_name(raw_name)
@@ -190,6 +199,8 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
"reward_savings": "",
"coupon_savings": str(item.get("amount", "")) if is_discount_line else "",
"coupon_price": "",
"matched_discount_amount": "",
"net_line_total": str(item.get("amount", "")) if not is_discount_line else "",
"image_url": "",
"raw_order_path": raw_path.as_posix(),
"item_name_norm": item_name_norm,
@@ -211,6 +222,51 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
}
def match_costco_discounts(rows):
rows_by_order = defaultdict(list)
for row in rows:
rows_by_order[row["order_id"]].append(row)
for order_rows in rows_by_order.values():
purchase_rows_by_item_id = defaultdict(list)
for row in order_rows:
if row.get("is_discount_line") == "true":
continue
retailer_item_id = row.get("retailer_item_id", "")
if retailer_item_id:
purchase_rows_by_item_id[retailer_item_id].append(row)
for row in order_rows:
if row.get("is_discount_line") != "true":
continue
target_id = discount_target_id(row.get("item_name", ""))
if not target_id:
continue
matches = purchase_rows_by_item_id.get(target_id, [])
if len(matches) != 1:
row["parse_notes"] = normalize_whitespace(
f"{row.get('parse_notes', '')};discount_target_unmatched={target_id}"
).strip(";")
continue
purchase_row = matches[0]
matched_discount = to_decimal(row.get("line_total"))
gross_total = to_decimal(purchase_row.get("line_total"))
existing_discount = to_decimal(purchase_row.get("matched_discount_amount")) or 0
if matched_discount is None or gross_total is None:
continue
total_discount = existing_discount + matched_discount
purchase_row["matched_discount_amount"] = format_decimal(total_discount)
purchase_row["net_line_total"] = format_decimal(gross_total + total_discount)
purchase_row["parse_notes"] = normalize_whitespace(
f"{purchase_row.get('parse_notes', '')};matched_discount={target_id}"
).strip(";")
row["parse_notes"] = normalize_whitespace(
f"{row.get('parse_notes', '')};matched_to_item={target_id}"
).strip(";")
def iter_costco_rows(raw_dir):
for path in discover_json_files(raw_dir):
if path.name in {"summary.json", "summary_requests.json"}:
@@ -238,6 +294,7 @@ def discover_json_files(raw_dir):
def build_items_enriched(raw_dir):
rows = list(iter_costco_rows(raw_dir))
match_costco_discounts(rows)
rows.sort(key=lambda row: (row["order_date"], row["order_id"], int(row["line_no"])))
return rows

View File

@@ -33,6 +33,8 @@ OUTPUT_FIELDS = [
"reward_savings",
"coupon_savings",
"coupon_price",
"matched_discount_amount",
"net_line_total",
"image_url",
"raw_order_path",
"item_name_norm",
@@ -371,6 +373,8 @@ def parse_item(order_id, order_date, raw_path, line_no, item):
"reward_savings": stringify(item.get("rewardSavings")),
"coupon_savings": stringify(item.get("couponSavings")),
"coupon_price": stringify(item.get("couponPrice")),
"matched_discount_amount": "",
"net_line_total": stringify(item.get("totalPrice")),
"image_url": extract_image_url(item),
"raw_order_path": raw_path.as_posix(),
"item_name_norm": normalized_name,

View File

@@ -279,6 +279,57 @@ class CostcoPipelineTests(unittest.TestCase):
self.assertEqual("true", discount["is_discount_line"])
self.assertEqual("true", discount["is_coupon_line"])
def test_build_items_enriched_matches_discount_to_item(self):
with tempfile.TemporaryDirectory() as tmpdir:
raw_dir = Path(tmpdir) / "raw"
raw_dir.mkdir()
payload = {
"data": {
"receiptsWithCounts": {
"receipts": [
{
"transactionBarcode": "abc",
"transactionDate": "2026-03-12",
"itemArray": [
{
"itemNumber": "4873222",
"itemDescription01": "ALL F&C",
"itemDescription02": "200OZ 160LOADS P104",
"itemDepartmentNumber": 14,
"transDepartmentNumber": 14,
"unit": 1,
"itemIdentifier": "E",
"amount": 19.99,
"itemUnitPriceAmount": 19.99,
},
{
"itemNumber": "374664",
"itemDescription01": "/ 4873222",
"itemDescription02": None,
"itemDepartmentNumber": 14,
"transDepartmentNumber": 14,
"unit": -1,
"itemIdentifier": None,
"amount": -5,
"itemUnitPriceAmount": 0,
},
],
}
]
}
}
}
(raw_dir / "abc.json").write_text(json.dumps(payload), encoding="utf-8")
rows = enrich_costco.build_items_enriched(raw_dir)
purchase_row = next(row for row in rows if row["is_discount_line"] == "false")
discount_row = next(row for row in rows if row["is_discount_line"] == "true")
self.assertEqual("-5", purchase_row["matched_discount_amount"])
self.assertEqual("14.99", purchase_row["net_line_total"])
self.assertIn("matched_discount=4873222", purchase_row["parse_notes"])
self.assertIn("matched_to_item=4873222", discount_row["parse_notes"])
def test_cross_retailer_validation_writes_proof_example(self):
with tempfile.TemporaryDirectory() as tmpdir:
giant_csv = Path(tmpdir) / "giant_items_enriched.csv"