Attach Costco discounts to purchase rows
This commit is contained in:
@@ -33,6 +33,8 @@ PURCHASE_FIELDS = [
|
||||
"measure_type",
|
||||
"line_total",
|
||||
"unit_price",
|
||||
"matched_discount_amount",
|
||||
"net_line_total",
|
||||
"store_name",
|
||||
"store_number",
|
||||
"store_city",
|
||||
@@ -94,7 +96,7 @@ def decimal_or_zero(value):
|
||||
|
||||
|
||||
def derive_metrics(row):
|
||||
line_total = to_decimal(row.get("line_total"))
|
||||
line_total = to_decimal(row.get("net_line_total") or row.get("line_total"))
|
||||
qty = to_decimal(row.get("qty"))
|
||||
pack_qty = to_decimal(row.get("pack_qty"))
|
||||
size_value = to_decimal(row.get("size_value"))
|
||||
@@ -292,6 +294,8 @@ def build_purchase_rows(
|
||||
"measure_type": row["measure_type"],
|
||||
"line_total": row["line_total"],
|
||||
"unit_price": row["unit_price"],
|
||||
"matched_discount_amount": row.get("matched_discount_amount", ""),
|
||||
"net_line_total": row.get("net_line_total", ""),
|
||||
"store_name": order_row.get("store_name", ""),
|
||||
"store_number": order_row.get("store_number", ""),
|
||||
"store_city": order_row.get("store_city", ""),
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
import csv
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
@@ -29,6 +30,7 @@ HASH_SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)#\b")
|
||||
PACK_DASH_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*-\s*PACK\b")
|
||||
PACK_WORD_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*PACK\b")
|
||||
SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)\s*(OZ|LB|LBS|CT|KG|G)\b")
|
||||
DISCOUNT_TARGET_RE = re.compile(r"^/\s*(\d+)\b")
|
||||
|
||||
|
||||
def clean_costco_name(name):
|
||||
@@ -156,6 +158,13 @@ def is_discount_item(item):
|
||||
return amount < 0 or unit < 0 or description.startswith("/")
|
||||
|
||||
|
||||
def discount_target_id(raw_name):
|
||||
match = DISCOUNT_TARGET_RE.match(normalize_whitespace(raw_name))
|
||||
if not match:
|
||||
return ""
|
||||
return match.group(1)
|
||||
|
||||
|
||||
def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
||||
raw_name = combine_description(item)
|
||||
cleaned_name = clean_costco_name(raw_name)
|
||||
@@ -190,6 +199,8 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
||||
"reward_savings": "",
|
||||
"coupon_savings": str(item.get("amount", "")) if is_discount_line else "",
|
||||
"coupon_price": "",
|
||||
"matched_discount_amount": "",
|
||||
"net_line_total": str(item.get("amount", "")) if not is_discount_line else "",
|
||||
"image_url": "",
|
||||
"raw_order_path": raw_path.as_posix(),
|
||||
"item_name_norm": item_name_norm,
|
||||
@@ -211,6 +222,51 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
||||
}
|
||||
|
||||
|
||||
def match_costco_discounts(rows):
|
||||
rows_by_order = defaultdict(list)
|
||||
for row in rows:
|
||||
rows_by_order[row["order_id"]].append(row)
|
||||
|
||||
for order_rows in rows_by_order.values():
|
||||
purchase_rows_by_item_id = defaultdict(list)
|
||||
for row in order_rows:
|
||||
if row.get("is_discount_line") == "true":
|
||||
continue
|
||||
retailer_item_id = row.get("retailer_item_id", "")
|
||||
if retailer_item_id:
|
||||
purchase_rows_by_item_id[retailer_item_id].append(row)
|
||||
|
||||
for row in order_rows:
|
||||
if row.get("is_discount_line") != "true":
|
||||
continue
|
||||
target_id = discount_target_id(row.get("item_name", ""))
|
||||
if not target_id:
|
||||
continue
|
||||
matches = purchase_rows_by_item_id.get(target_id, [])
|
||||
if len(matches) != 1:
|
||||
row["parse_notes"] = normalize_whitespace(
|
||||
f"{row.get('parse_notes', '')};discount_target_unmatched={target_id}"
|
||||
).strip(";")
|
||||
continue
|
||||
|
||||
purchase_row = matches[0]
|
||||
matched_discount = to_decimal(row.get("line_total"))
|
||||
gross_total = to_decimal(purchase_row.get("line_total"))
|
||||
existing_discount = to_decimal(purchase_row.get("matched_discount_amount")) or 0
|
||||
if matched_discount is None or gross_total is None:
|
||||
continue
|
||||
|
||||
total_discount = existing_discount + matched_discount
|
||||
purchase_row["matched_discount_amount"] = format_decimal(total_discount)
|
||||
purchase_row["net_line_total"] = format_decimal(gross_total + total_discount)
|
||||
purchase_row["parse_notes"] = normalize_whitespace(
|
||||
f"{purchase_row.get('parse_notes', '')};matched_discount={target_id}"
|
||||
).strip(";")
|
||||
row["parse_notes"] = normalize_whitespace(
|
||||
f"{row.get('parse_notes', '')};matched_to_item={target_id}"
|
||||
).strip(";")
|
||||
|
||||
|
||||
def iter_costco_rows(raw_dir):
|
||||
for path in discover_json_files(raw_dir):
|
||||
if path.name in {"summary.json", "summary_requests.json"}:
|
||||
@@ -238,6 +294,7 @@ def discover_json_files(raw_dir):
|
||||
|
||||
def build_items_enriched(raw_dir):
|
||||
rows = list(iter_costco_rows(raw_dir))
|
||||
match_costco_discounts(rows)
|
||||
rows.sort(key=lambda row: (row["order_date"], row["order_id"], int(row["line_no"])))
|
||||
return rows
|
||||
|
||||
|
||||
@@ -33,6 +33,8 @@ OUTPUT_FIELDS = [
|
||||
"reward_savings",
|
||||
"coupon_savings",
|
||||
"coupon_price",
|
||||
"matched_discount_amount",
|
||||
"net_line_total",
|
||||
"image_url",
|
||||
"raw_order_path",
|
||||
"item_name_norm",
|
||||
@@ -371,6 +373,8 @@ def parse_item(order_id, order_date, raw_path, line_no, item):
|
||||
"reward_savings": stringify(item.get("rewardSavings")),
|
||||
"coupon_savings": stringify(item.get("couponSavings")),
|
||||
"coupon_price": stringify(item.get("couponPrice")),
|
||||
"matched_discount_amount": "",
|
||||
"net_line_total": stringify(item.get("totalPrice")),
|
||||
"image_url": extract_image_url(item),
|
||||
"raw_order_path": raw_path.as_posix(),
|
||||
"item_name_norm": normalized_name,
|
||||
|
||||
@@ -279,6 +279,57 @@ class CostcoPipelineTests(unittest.TestCase):
|
||||
self.assertEqual("true", discount["is_discount_line"])
|
||||
self.assertEqual("true", discount["is_coupon_line"])
|
||||
|
||||
def test_build_items_enriched_matches_discount_to_item(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
raw_dir = Path(tmpdir) / "raw"
|
||||
raw_dir.mkdir()
|
||||
payload = {
|
||||
"data": {
|
||||
"receiptsWithCounts": {
|
||||
"receipts": [
|
||||
{
|
||||
"transactionBarcode": "abc",
|
||||
"transactionDate": "2026-03-12",
|
||||
"itemArray": [
|
||||
{
|
||||
"itemNumber": "4873222",
|
||||
"itemDescription01": "ALL F&C",
|
||||
"itemDescription02": "200OZ 160LOADS P104",
|
||||
"itemDepartmentNumber": 14,
|
||||
"transDepartmentNumber": 14,
|
||||
"unit": 1,
|
||||
"itemIdentifier": "E",
|
||||
"amount": 19.99,
|
||||
"itemUnitPriceAmount": 19.99,
|
||||
},
|
||||
{
|
||||
"itemNumber": "374664",
|
||||
"itemDescription01": "/ 4873222",
|
||||
"itemDescription02": None,
|
||||
"itemDepartmentNumber": 14,
|
||||
"transDepartmentNumber": 14,
|
||||
"unit": -1,
|
||||
"itemIdentifier": None,
|
||||
"amount": -5,
|
||||
"itemUnitPriceAmount": 0,
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
(raw_dir / "abc.json").write_text(json.dumps(payload), encoding="utf-8")
|
||||
|
||||
rows = enrich_costco.build_items_enriched(raw_dir)
|
||||
|
||||
purchase_row = next(row for row in rows if row["is_discount_line"] == "false")
|
||||
discount_row = next(row for row in rows if row["is_discount_line"] == "true")
|
||||
self.assertEqual("-5", purchase_row["matched_discount_amount"])
|
||||
self.assertEqual("14.99", purchase_row["net_line_total"])
|
||||
self.assertIn("matched_discount=4873222", purchase_row["parse_notes"])
|
||||
self.assertIn("matched_to_item=4873222", discount_row["parse_notes"])
|
||||
|
||||
def test_cross_retailer_validation_writes_proof_example(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
giant_csv = Path(tmpdir) / "giant_items_enriched.csv"
|
||||
|
||||
Reference in New Issue
Block a user