From a45522c1106b9d46d04bb0f387d691084602e58a Mon Sep 17 00:00:00 2001 From: ben Date: Mon, 23 Mar 2026 15:27:58 -0400 Subject: [PATCH] Finalize purchase effective price fields --- build_purchases.py | 60 +++++++++++++++++++++++++++-------------- tests/test_purchases.py | 12 +++++++++ 2 files changed, 52 insertions(+), 20 deletions(-) diff --git a/build_purchases.py b/build_purchases.py index 06ae4c5..29e7415 100644 --- a/build_purchases.py +++ b/build_purchases.py @@ -10,6 +10,14 @@ from layer_helpers import read_csv_rows, write_csv_rows PURCHASE_FIELDS = [ "purchase_date", "retailer", + "catalog_name", + "product_type", + "category", + "net_line_total", + "normalized_quantity", + "normalized_quantity_unit", + "effective_price", + "effective_price_unit", "order_id", "line_no", "normalized_row_id", @@ -19,9 +27,6 @@ PURCHASE_FIELDS = [ "resolution_action", "raw_item_name", "normalized_item_name", - "catalog_name", - "category", - "product_type", "brand", "variant", "image_url", @@ -29,8 +34,6 @@ PURCHASE_FIELDS = [ "upc", "qty", "unit", - "normalized_quantity", - "normalized_quantity_unit", "pack_qty", "size_value", "size_unit", @@ -51,7 +54,6 @@ PURCHASE_FIELDS = [ "price_per_lb_basis", "price_per_oz", "price_per_oz_basis", - "effective_price", "is_discount_line", "is_coupon_line", "is_fee", @@ -178,19 +180,36 @@ def derive_effective_price(row): if normalized_quantity in (None, Decimal("0")): return "" - net_line_total = to_decimal(row.get("net_line_total")) - line_total = to_decimal(row.get("line_total")) - numerator = ( - net_line_total - if net_line_total not in (None, Decimal("0")) - else line_total - ) + numerator = to_decimal(derive_net_line_total(row)) if numerator is None: return "" return format_decimal(numerator / normalized_quantity) +def derive_effective_price_unit(row): + normalized_quantity = to_decimal(row.get("normalized_quantity")) + if normalized_quantity in (None, Decimal("0")): + return "" + return row.get("normalized_quantity_unit", "") + + +def derive_net_line_total(row): + existing_net = row.get("net_line_total", "") + if str(existing_net).strip() != "": + return str(existing_net) + + line_total = to_decimal(row.get("line_total")) + if line_total is None: + return "" + + matched_discount_amount = to_decimal(row.get("matched_discount_amount")) + if matched_discount_amount is not None: + return format_decimal(line_total + matched_discount_amount) + + return format_decimal(line_total) + + def order_lookup(rows, retailer): return {(retailer, row["order_id"]): row for row in rows} @@ -339,6 +358,14 @@ def build_purchase_rows( { "purchase_date": row["order_date"], "retailer": row["retailer"], + "catalog_name": catalog_row.get("catalog_name", ""), + "product_type": catalog_row.get("product_type", ""), + "category": catalog_row.get("category", ""), + "net_line_total": derive_net_line_total(row), + "normalized_quantity": row.get("normalized_quantity", ""), + "normalized_quantity_unit": row.get("normalized_quantity_unit", ""), + "effective_price": derive_effective_price({**row, "net_line_total": derive_net_line_total(row)}), + "effective_price_unit": derive_effective_price_unit(row), "order_id": row["order_id"], "line_no": row["line_no"], "normalized_row_id": row.get("normalized_row_id", ""), @@ -348,9 +375,6 @@ def build_purchase_rows( "resolution_action": resolution.get("resolution_action", ""), "raw_item_name": row["item_name"], "normalized_item_name": row["item_name_norm"], - "catalog_name": catalog_row.get("catalog_name", ""), - "category": catalog_row.get("category", ""), - "product_type": catalog_row.get("product_type", ""), "brand": catalog_row.get("brand", ""), "variant": catalog_row.get("variant", ""), "image_url": row.get("image_url", ""), @@ -358,8 +382,6 @@ def build_purchase_rows( "upc": row["upc"], "qty": row["qty"], "unit": row["unit"], - "normalized_quantity": row.get("normalized_quantity", ""), - "normalized_quantity_unit": row.get("normalized_quantity_unit", ""), "pack_qty": row["pack_qty"], "size_value": row["size_value"], "size_unit": row["size_unit"], @@ -367,12 +389,10 @@ def build_purchase_rows( "line_total": row["line_total"], "unit_price": row["unit_price"], "matched_discount_amount": row.get("matched_discount_amount", ""), - "net_line_total": row.get("net_line_total", ""), "store_name": order_row.get("store_name", ""), "store_number": order_row.get("store_number", ""), "store_city": order_row.get("store_city", ""), "store_state": order_row.get("store_state", ""), - "effective_price": derive_effective_price(row), "is_discount_line": row["is_discount_line"], "is_coupon_line": row["is_coupon_line"], "is_fee": row["is_fee"], diff --git a/tests/test_purchases.py b/tests/test_purchases.py index 7cc2f73..951334d 100644 --- a/tests/test_purchases.py +++ b/tests/test_purchases.py @@ -8,6 +8,11 @@ import enrich_costco class PurchaseLogTests(unittest.TestCase): + def test_derive_net_line_total_preserves_existing_then_derives(self): + self.assertEqual("1.49", build_purchases.derive_net_line_total({"net_line_total": "1.49", "line_total": "2.98"})) + self.assertEqual("5.99", build_purchases.derive_net_line_total({"line_total": "6.99", "matched_discount_amount": "-1.00"})) + self.assertEqual("3.5", build_purchases.derive_net_line_total({"line_total": "3.50"})) + def test_derive_metrics_prefers_picked_weight_and_pack_count(self): metrics = build_purchases.derive_metrics( { @@ -161,6 +166,7 @@ class PurchaseLogTests(unittest.TestCase): self.assertEqual("https://example.test/banana.jpg", rows[0]["image_url"]) self.assertEqual("1", rows[0]["normalized_quantity"]) self.assertEqual("lb", rows[0]["normalized_quantity_unit"]) + self.assertEqual("lb", rows[0]["effective_price_unit"]) def test_main_writes_purchase_and_example_csvs(self): with tempfile.TemporaryDirectory() as tmpdir: @@ -576,10 +582,15 @@ class PurchaseLogTests(unittest.TestCase): rows_by_item = {row["normalized_item_id"]: row for row in rows} self.assertEqual("0.5893", rows_by_item["gnorm:banana"]["effective_price"]) + self.assertEqual("lb", rows_by_item["gnorm:banana"]["effective_price_unit"]) self.assertEqual("0.4967", rows_by_item["cnorm:banana"]["effective_price"]) + self.assertEqual("lb", rows_by_item["cnorm:banana"]["effective_price_unit"]) self.assertEqual("0.2495", rows_by_item["gnorm:ice"]["effective_price"]) + self.assertEqual("lb", rows_by_item["gnorm:ice"]["effective_price_unit"]) self.assertEqual("26.99", rows_by_item["cnorm:patty"]["effective_price"]) + self.assertEqual("each", rows_by_item["cnorm:patty"]["effective_price_unit"]) self.assertEqual("", rows_by_item["gnorm:patty"]["effective_price"]) + self.assertEqual("", rows_by_item["gnorm:patty"]["effective_price_unit"]) def test_build_purchase_rows_leaves_effective_price_blank_without_valid_denominator(self): fieldnames = enrich_costco.OUTPUT_FIELDS @@ -611,6 +622,7 @@ class PurchaseLogTests(unittest.TestCase): rows, _links = build_purchases.build_purchase_rows([row], [], [], [], [], [], []) self.assertEqual("", rows[0]["effective_price"]) + self.assertEqual("", rows[0]["effective_price_unit"]) if __name__ == "__main__":