Attach Costco discounts to purchase rows
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
import csv
|
||||
import json
|
||||
import re
|
||||
from collections import defaultdict
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
@@ -29,6 +30,7 @@ HASH_SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)#\b")
|
||||
PACK_DASH_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*-\s*PACK\b")
|
||||
PACK_WORD_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*PACK\b")
|
||||
SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)\s*(OZ|LB|LBS|CT|KG|G)\b")
|
||||
DISCOUNT_TARGET_RE = re.compile(r"^/\s*(\d+)\b")
|
||||
|
||||
|
||||
def clean_costco_name(name):
|
||||
@@ -156,6 +158,13 @@ def is_discount_item(item):
|
||||
return amount < 0 or unit < 0 or description.startswith("/")
|
||||
|
||||
|
||||
def discount_target_id(raw_name):
|
||||
match = DISCOUNT_TARGET_RE.match(normalize_whitespace(raw_name))
|
||||
if not match:
|
||||
return ""
|
||||
return match.group(1)
|
||||
|
||||
|
||||
def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
||||
raw_name = combine_description(item)
|
||||
cleaned_name = clean_costco_name(raw_name)
|
||||
@@ -190,6 +199,8 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
||||
"reward_savings": "",
|
||||
"coupon_savings": str(item.get("amount", "")) if is_discount_line else "",
|
||||
"coupon_price": "",
|
||||
"matched_discount_amount": "",
|
||||
"net_line_total": str(item.get("amount", "")) if not is_discount_line else "",
|
||||
"image_url": "",
|
||||
"raw_order_path": raw_path.as_posix(),
|
||||
"item_name_norm": item_name_norm,
|
||||
@@ -211,6 +222,51 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
||||
}
|
||||
|
||||
|
||||
def match_costco_discounts(rows):
|
||||
rows_by_order = defaultdict(list)
|
||||
for row in rows:
|
||||
rows_by_order[row["order_id"]].append(row)
|
||||
|
||||
for order_rows in rows_by_order.values():
|
||||
purchase_rows_by_item_id = defaultdict(list)
|
||||
for row in order_rows:
|
||||
if row.get("is_discount_line") == "true":
|
||||
continue
|
||||
retailer_item_id = row.get("retailer_item_id", "")
|
||||
if retailer_item_id:
|
||||
purchase_rows_by_item_id[retailer_item_id].append(row)
|
||||
|
||||
for row in order_rows:
|
||||
if row.get("is_discount_line") != "true":
|
||||
continue
|
||||
target_id = discount_target_id(row.get("item_name", ""))
|
||||
if not target_id:
|
||||
continue
|
||||
matches = purchase_rows_by_item_id.get(target_id, [])
|
||||
if len(matches) != 1:
|
||||
row["parse_notes"] = normalize_whitespace(
|
||||
f"{row.get('parse_notes', '')};discount_target_unmatched={target_id}"
|
||||
).strip(";")
|
||||
continue
|
||||
|
||||
purchase_row = matches[0]
|
||||
matched_discount = to_decimal(row.get("line_total"))
|
||||
gross_total = to_decimal(purchase_row.get("line_total"))
|
||||
existing_discount = to_decimal(purchase_row.get("matched_discount_amount")) or 0
|
||||
if matched_discount is None or gross_total is None:
|
||||
continue
|
||||
|
||||
total_discount = existing_discount + matched_discount
|
||||
purchase_row["matched_discount_amount"] = format_decimal(total_discount)
|
||||
purchase_row["net_line_total"] = format_decimal(gross_total + total_discount)
|
||||
purchase_row["parse_notes"] = normalize_whitespace(
|
||||
f"{purchase_row.get('parse_notes', '')};matched_discount={target_id}"
|
||||
).strip(";")
|
||||
row["parse_notes"] = normalize_whitespace(
|
||||
f"{row.get('parse_notes', '')};matched_to_item={target_id}"
|
||||
).strip(";")
|
||||
|
||||
|
||||
def iter_costco_rows(raw_dir):
|
||||
for path in discover_json_files(raw_dir):
|
||||
if path.name in {"summary.json", "summary_requests.json"}:
|
||||
@@ -238,6 +294,7 @@ def discover_json_files(raw_dir):
|
||||
|
||||
def build_items_enriched(raw_dir):
|
||||
rows = list(iter_costco_rows(raw_dir))
|
||||
match_costco_discounts(rows)
|
||||
rows.sort(key=lambda row: (row["order_date"], row["order_id"], int(row["line_no"])))
|
||||
return rows
|
||||
|
||||
|
||||
Reference in New Issue
Block a user