Attach Costco discounts to purchase rows
This commit is contained in:
@@ -33,6 +33,8 @@ PURCHASE_FIELDS = [
|
|||||||
"measure_type",
|
"measure_type",
|
||||||
"line_total",
|
"line_total",
|
||||||
"unit_price",
|
"unit_price",
|
||||||
|
"matched_discount_amount",
|
||||||
|
"net_line_total",
|
||||||
"store_name",
|
"store_name",
|
||||||
"store_number",
|
"store_number",
|
||||||
"store_city",
|
"store_city",
|
||||||
@@ -94,7 +96,7 @@ def decimal_or_zero(value):
|
|||||||
|
|
||||||
|
|
||||||
def derive_metrics(row):
|
def derive_metrics(row):
|
||||||
line_total = to_decimal(row.get("line_total"))
|
line_total = to_decimal(row.get("net_line_total") or row.get("line_total"))
|
||||||
qty = to_decimal(row.get("qty"))
|
qty = to_decimal(row.get("qty"))
|
||||||
pack_qty = to_decimal(row.get("pack_qty"))
|
pack_qty = to_decimal(row.get("pack_qty"))
|
||||||
size_value = to_decimal(row.get("size_value"))
|
size_value = to_decimal(row.get("size_value"))
|
||||||
@@ -292,6 +294,8 @@ def build_purchase_rows(
|
|||||||
"measure_type": row["measure_type"],
|
"measure_type": row["measure_type"],
|
||||||
"line_total": row["line_total"],
|
"line_total": row["line_total"],
|
||||||
"unit_price": row["unit_price"],
|
"unit_price": row["unit_price"],
|
||||||
|
"matched_discount_amount": row.get("matched_discount_amount", ""),
|
||||||
|
"net_line_total": row.get("net_line_total", ""),
|
||||||
"store_name": order_row.get("store_name", ""),
|
"store_name": order_row.get("store_name", ""),
|
||||||
"store_number": order_row.get("store_number", ""),
|
"store_number": order_row.get("store_number", ""),
|
||||||
"store_city": order_row.get("store_city", ""),
|
"store_city": order_row.get("store_city", ""),
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
import csv
|
import csv
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
from collections import defaultdict
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import click
|
import click
|
||||||
@@ -29,6 +30,7 @@ HASH_SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)#\b")
|
|||||||
PACK_DASH_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*-\s*PACK\b")
|
PACK_DASH_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*-\s*PACK\b")
|
||||||
PACK_WORD_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*PACK\b")
|
PACK_WORD_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*PACK\b")
|
||||||
SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)\s*(OZ|LB|LBS|CT|KG|G)\b")
|
SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)\s*(OZ|LB|LBS|CT|KG|G)\b")
|
||||||
|
DISCOUNT_TARGET_RE = re.compile(r"^/\s*(\d+)\b")
|
||||||
|
|
||||||
|
|
||||||
def clean_costco_name(name):
|
def clean_costco_name(name):
|
||||||
@@ -156,6 +158,13 @@ def is_discount_item(item):
|
|||||||
return amount < 0 or unit < 0 or description.startswith("/")
|
return amount < 0 or unit < 0 or description.startswith("/")
|
||||||
|
|
||||||
|
|
||||||
|
def discount_target_id(raw_name):
|
||||||
|
match = DISCOUNT_TARGET_RE.match(normalize_whitespace(raw_name))
|
||||||
|
if not match:
|
||||||
|
return ""
|
||||||
|
return match.group(1)
|
||||||
|
|
||||||
|
|
||||||
def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
||||||
raw_name = combine_description(item)
|
raw_name = combine_description(item)
|
||||||
cleaned_name = clean_costco_name(raw_name)
|
cleaned_name = clean_costco_name(raw_name)
|
||||||
@@ -190,6 +199,8 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
|||||||
"reward_savings": "",
|
"reward_savings": "",
|
||||||
"coupon_savings": str(item.get("amount", "")) if is_discount_line else "",
|
"coupon_savings": str(item.get("amount", "")) if is_discount_line else "",
|
||||||
"coupon_price": "",
|
"coupon_price": "",
|
||||||
|
"matched_discount_amount": "",
|
||||||
|
"net_line_total": str(item.get("amount", "")) if not is_discount_line else "",
|
||||||
"image_url": "",
|
"image_url": "",
|
||||||
"raw_order_path": raw_path.as_posix(),
|
"raw_order_path": raw_path.as_posix(),
|
||||||
"item_name_norm": item_name_norm,
|
"item_name_norm": item_name_norm,
|
||||||
@@ -211,6 +222,51 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def match_costco_discounts(rows):
|
||||||
|
rows_by_order = defaultdict(list)
|
||||||
|
for row in rows:
|
||||||
|
rows_by_order[row["order_id"]].append(row)
|
||||||
|
|
||||||
|
for order_rows in rows_by_order.values():
|
||||||
|
purchase_rows_by_item_id = defaultdict(list)
|
||||||
|
for row in order_rows:
|
||||||
|
if row.get("is_discount_line") == "true":
|
||||||
|
continue
|
||||||
|
retailer_item_id = row.get("retailer_item_id", "")
|
||||||
|
if retailer_item_id:
|
||||||
|
purchase_rows_by_item_id[retailer_item_id].append(row)
|
||||||
|
|
||||||
|
for row in order_rows:
|
||||||
|
if row.get("is_discount_line") != "true":
|
||||||
|
continue
|
||||||
|
target_id = discount_target_id(row.get("item_name", ""))
|
||||||
|
if not target_id:
|
||||||
|
continue
|
||||||
|
matches = purchase_rows_by_item_id.get(target_id, [])
|
||||||
|
if len(matches) != 1:
|
||||||
|
row["parse_notes"] = normalize_whitespace(
|
||||||
|
f"{row.get('parse_notes', '')};discount_target_unmatched={target_id}"
|
||||||
|
).strip(";")
|
||||||
|
continue
|
||||||
|
|
||||||
|
purchase_row = matches[0]
|
||||||
|
matched_discount = to_decimal(row.get("line_total"))
|
||||||
|
gross_total = to_decimal(purchase_row.get("line_total"))
|
||||||
|
existing_discount = to_decimal(purchase_row.get("matched_discount_amount")) or 0
|
||||||
|
if matched_discount is None or gross_total is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
total_discount = existing_discount + matched_discount
|
||||||
|
purchase_row["matched_discount_amount"] = format_decimal(total_discount)
|
||||||
|
purchase_row["net_line_total"] = format_decimal(gross_total + total_discount)
|
||||||
|
purchase_row["parse_notes"] = normalize_whitespace(
|
||||||
|
f"{purchase_row.get('parse_notes', '')};matched_discount={target_id}"
|
||||||
|
).strip(";")
|
||||||
|
row["parse_notes"] = normalize_whitespace(
|
||||||
|
f"{row.get('parse_notes', '')};matched_to_item={target_id}"
|
||||||
|
).strip(";")
|
||||||
|
|
||||||
|
|
||||||
def iter_costco_rows(raw_dir):
|
def iter_costco_rows(raw_dir):
|
||||||
for path in discover_json_files(raw_dir):
|
for path in discover_json_files(raw_dir):
|
||||||
if path.name in {"summary.json", "summary_requests.json"}:
|
if path.name in {"summary.json", "summary_requests.json"}:
|
||||||
@@ -238,6 +294,7 @@ def discover_json_files(raw_dir):
|
|||||||
|
|
||||||
def build_items_enriched(raw_dir):
|
def build_items_enriched(raw_dir):
|
||||||
rows = list(iter_costco_rows(raw_dir))
|
rows = list(iter_costco_rows(raw_dir))
|
||||||
|
match_costco_discounts(rows)
|
||||||
rows.sort(key=lambda row: (row["order_date"], row["order_id"], int(row["line_no"])))
|
rows.sort(key=lambda row: (row["order_date"], row["order_id"], int(row["line_no"])))
|
||||||
return rows
|
return rows
|
||||||
|
|
||||||
|
|||||||
@@ -33,6 +33,8 @@ OUTPUT_FIELDS = [
|
|||||||
"reward_savings",
|
"reward_savings",
|
||||||
"coupon_savings",
|
"coupon_savings",
|
||||||
"coupon_price",
|
"coupon_price",
|
||||||
|
"matched_discount_amount",
|
||||||
|
"net_line_total",
|
||||||
"image_url",
|
"image_url",
|
||||||
"raw_order_path",
|
"raw_order_path",
|
||||||
"item_name_norm",
|
"item_name_norm",
|
||||||
@@ -371,6 +373,8 @@ def parse_item(order_id, order_date, raw_path, line_no, item):
|
|||||||
"reward_savings": stringify(item.get("rewardSavings")),
|
"reward_savings": stringify(item.get("rewardSavings")),
|
||||||
"coupon_savings": stringify(item.get("couponSavings")),
|
"coupon_savings": stringify(item.get("couponSavings")),
|
||||||
"coupon_price": stringify(item.get("couponPrice")),
|
"coupon_price": stringify(item.get("couponPrice")),
|
||||||
|
"matched_discount_amount": "",
|
||||||
|
"net_line_total": stringify(item.get("totalPrice")),
|
||||||
"image_url": extract_image_url(item),
|
"image_url": extract_image_url(item),
|
||||||
"raw_order_path": raw_path.as_posix(),
|
"raw_order_path": raw_path.as_posix(),
|
||||||
"item_name_norm": normalized_name,
|
"item_name_norm": normalized_name,
|
||||||
|
|||||||
@@ -279,6 +279,57 @@ class CostcoPipelineTests(unittest.TestCase):
|
|||||||
self.assertEqual("true", discount["is_discount_line"])
|
self.assertEqual("true", discount["is_discount_line"])
|
||||||
self.assertEqual("true", discount["is_coupon_line"])
|
self.assertEqual("true", discount["is_coupon_line"])
|
||||||
|
|
||||||
|
def test_build_items_enriched_matches_discount_to_item(self):
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
raw_dir = Path(tmpdir) / "raw"
|
||||||
|
raw_dir.mkdir()
|
||||||
|
payload = {
|
||||||
|
"data": {
|
||||||
|
"receiptsWithCounts": {
|
||||||
|
"receipts": [
|
||||||
|
{
|
||||||
|
"transactionBarcode": "abc",
|
||||||
|
"transactionDate": "2026-03-12",
|
||||||
|
"itemArray": [
|
||||||
|
{
|
||||||
|
"itemNumber": "4873222",
|
||||||
|
"itemDescription01": "ALL F&C",
|
||||||
|
"itemDescription02": "200OZ 160LOADS P104",
|
||||||
|
"itemDepartmentNumber": 14,
|
||||||
|
"transDepartmentNumber": 14,
|
||||||
|
"unit": 1,
|
||||||
|
"itemIdentifier": "E",
|
||||||
|
"amount": 19.99,
|
||||||
|
"itemUnitPriceAmount": 19.99,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"itemNumber": "374664",
|
||||||
|
"itemDescription01": "/ 4873222",
|
||||||
|
"itemDescription02": None,
|
||||||
|
"itemDepartmentNumber": 14,
|
||||||
|
"transDepartmentNumber": 14,
|
||||||
|
"unit": -1,
|
||||||
|
"itemIdentifier": None,
|
||||||
|
"amount": -5,
|
||||||
|
"itemUnitPriceAmount": 0,
|
||||||
|
},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
(raw_dir / "abc.json").write_text(json.dumps(payload), encoding="utf-8")
|
||||||
|
|
||||||
|
rows = enrich_costco.build_items_enriched(raw_dir)
|
||||||
|
|
||||||
|
purchase_row = next(row for row in rows if row["is_discount_line"] == "false")
|
||||||
|
discount_row = next(row for row in rows if row["is_discount_line"] == "true")
|
||||||
|
self.assertEqual("-5", purchase_row["matched_discount_amount"])
|
||||||
|
self.assertEqual("14.99", purchase_row["net_line_total"])
|
||||||
|
self.assertIn("matched_discount=4873222", purchase_row["parse_notes"])
|
||||||
|
self.assertIn("matched_to_item=4873222", discount_row["parse_notes"])
|
||||||
|
|
||||||
def test_cross_retailer_validation_writes_proof_example(self):
|
def test_cross_retailer_validation_writes_proof_example(self):
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
giant_csv = Path(tmpdir) / "giant_items_enriched.csv"
|
giant_csv = Path(tmpdir) / "giant_items_enriched.csv"
|
||||||
|
|||||||
Reference in New Issue
Block a user