Attach Costco discounts to purchase rows

This commit is contained in:
ben
2026-03-17 15:07:45 -04:00
parent 967e19e561
commit 56a03bcb1d
4 changed files with 117 additions and 1 deletions

View File

@@ -1,6 +1,7 @@
import csv
import json
import re
from collections import defaultdict
from pathlib import Path
import click
@@ -29,6 +30,7 @@ HASH_SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)#\b")
PACK_DASH_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*-\s*PACK\b")
PACK_WORD_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*PACK\b")
SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)\s*(OZ|LB|LBS|CT|KG|G)\b")
DISCOUNT_TARGET_RE = re.compile(r"^/\s*(\d+)\b")
def clean_costco_name(name):
@@ -156,6 +158,13 @@ def is_discount_item(item):
return amount < 0 or unit < 0 or description.startswith("/")
def discount_target_id(raw_name):
match = DISCOUNT_TARGET_RE.match(normalize_whitespace(raw_name))
if not match:
return ""
return match.group(1)
def parse_costco_item(order_id, order_date, raw_path, line_no, item):
raw_name = combine_description(item)
cleaned_name = clean_costco_name(raw_name)
@@ -190,6 +199,8 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
"reward_savings": "",
"coupon_savings": str(item.get("amount", "")) if is_discount_line else "",
"coupon_price": "",
"matched_discount_amount": "",
"net_line_total": str(item.get("amount", "")) if not is_discount_line else "",
"image_url": "",
"raw_order_path": raw_path.as_posix(),
"item_name_norm": item_name_norm,
@@ -211,6 +222,51 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
}
def match_costco_discounts(rows):
rows_by_order = defaultdict(list)
for row in rows:
rows_by_order[row["order_id"]].append(row)
for order_rows in rows_by_order.values():
purchase_rows_by_item_id = defaultdict(list)
for row in order_rows:
if row.get("is_discount_line") == "true":
continue
retailer_item_id = row.get("retailer_item_id", "")
if retailer_item_id:
purchase_rows_by_item_id[retailer_item_id].append(row)
for row in order_rows:
if row.get("is_discount_line") != "true":
continue
target_id = discount_target_id(row.get("item_name", ""))
if not target_id:
continue
matches = purchase_rows_by_item_id.get(target_id, [])
if len(matches) != 1:
row["parse_notes"] = normalize_whitespace(
f"{row.get('parse_notes', '')};discount_target_unmatched={target_id}"
).strip(";")
continue
purchase_row = matches[0]
matched_discount = to_decimal(row.get("line_total"))
gross_total = to_decimal(purchase_row.get("line_total"))
existing_discount = to_decimal(purchase_row.get("matched_discount_amount")) or 0
if matched_discount is None or gross_total is None:
continue
total_discount = existing_discount + matched_discount
purchase_row["matched_discount_amount"] = format_decimal(total_discount)
purchase_row["net_line_total"] = format_decimal(gross_total + total_discount)
purchase_row["parse_notes"] = normalize_whitespace(
f"{purchase_row.get('parse_notes', '')};matched_discount={target_id}"
).strip(";")
row["parse_notes"] = normalize_whitespace(
f"{row.get('parse_notes', '')};matched_to_item={target_id}"
).strip(";")
def iter_costco_rows(raw_dir):
for path in discover_json_files(raw_dir):
if path.name in {"summary.json", "summary_requests.json"}:
@@ -238,6 +294,7 @@ def discover_json_files(raw_dir):
def build_items_enriched(raw_dir):
rows = list(iter_costco_rows(raw_dir))
match_costco_discounts(rows)
rows.sort(key=lambda row: (row["order_date"], row["order_id"], int(row["line_no"])))
return rows