import csv import tempfile import unittest from pathlib import Path import build_purchases import enrich_costco class PurchaseLogTests(unittest.TestCase): def test_derive_metrics_prefers_picked_weight_and_pack_count(self): metrics = build_purchases.derive_metrics( { "line_total": "4.00", "qty": "1", "pack_qty": "4", "size_value": "", "size_unit": "", "picked_weight": "2", "price_per_each": "", "price_per_lb": "", "price_per_oz": "", } ) self.assertEqual("4", metrics["price_per_each"]) self.assertEqual("1", metrics["price_per_count"]) self.assertEqual("2", metrics["price_per_lb"]) self.assertEqual("0.125", metrics["price_per_oz"]) self.assertEqual("picked_weight_lb", metrics["price_per_lb_basis"]) def test_build_purchase_rows_maps_catalog_ids(self): fieldnames = enrich_costco.OUTPUT_FIELDS giant_row = {field: "" for field in fieldnames} giant_row.update( { "retailer": "giant", "order_id": "g1", "line_no": "1", "normalized_row_id": "giant:g1:1", "normalized_item_id": "gnorm:banana", "order_date": "2026-03-01", "item_name": "FRESH BANANA", "item_name_norm": "BANANA", "image_url": "https://example.test/banana.jpg", "retailer_item_id": "100", "upc": "4011", "qty": "1", "unit": "LB", "normalized_quantity": "1", "normalized_quantity_unit": "lb", "line_total": "1.29", "unit_price": "1.29", "measure_type": "weight", "price_per_lb": "1.29", "raw_order_path": "data/giant-web/raw/g1.json", "is_discount_line": "false", "is_coupon_line": "false", "is_fee": "false", } ) costco_row = {field: "" for field in fieldnames} costco_row.update( { "retailer": "costco", "order_id": "c1", "line_no": "1", "normalized_row_id": "costco:c1:1", "normalized_item_id": "cnorm:banana", "order_date": "2026-03-12", "item_name": "BANANAS 3 LB / 1.36 KG", "item_name_norm": "BANANA", "retailer_item_id": "30669", "qty": "1", "unit": "E", "normalized_quantity": "3", "normalized_quantity_unit": "lb", "line_total": "2.98", "unit_price": "2.98", "size_value": "3", "size_unit": "lb", "measure_type": "weight", "price_per_lb": "0.9933", "raw_order_path": "data/costco-web/raw/c1.json", "is_discount_line": "false", "is_coupon_line": "false", "is_fee": "false", } ) giant_orders = [ { "order_id": "g1", "store_name": "Giant", "store_number": "42", "store_city": "Springfield", "store_state": "VA", } ] costco_orders = [ { "order_id": "c1", "store_name": "MT VERNON", "store_number": "1115", "store_city": "ALEXANDRIA", "store_state": "VA", } ] catalog_rows = [ { "catalog_id": "cat_banana", "catalog_name": "BANANA", "category": "produce", "product_type": "banana", "brand": "", "variant": "", "size_value": "", "size_unit": "", "pack_qty": "", "measure_type": "", "notes": "", "created_at": "", "updated_at": "", } ] link_rows = [ { "normalized_item_id": "gnorm:banana", "catalog_id": "cat_banana", "link_method": "manual_link", "link_confidence": "high", "review_status": "approved", "reviewed_by": "", "reviewed_at": "", "link_notes": "", }, { "normalized_item_id": "cnorm:banana", "catalog_id": "cat_banana", "link_method": "manual_link", "link_confidence": "high", "review_status": "approved", "reviewed_by": "", "reviewed_at": "", "link_notes": "", }, ] rows, _links = build_purchases.build_purchase_rows( [giant_row], [costco_row], giant_orders, costco_orders, [], link_rows, catalog_rows, ) self.assertEqual(2, len(rows)) self.assertTrue(all(row["catalog_id"] == "cat_banana" for row in rows)) self.assertEqual({"giant", "costco"}, {row["retailer"] for row in rows}) self.assertEqual("https://example.test/banana.jpg", rows[0]["image_url"]) self.assertEqual("1", rows[0]["normalized_quantity"]) self.assertEqual("lb", rows[0]["normalized_quantity_unit"]) def test_main_writes_purchase_and_example_csvs(self): with tempfile.TemporaryDirectory() as tmpdir: giant_items = Path(tmpdir) / "giant_items.csv" costco_items = Path(tmpdir) / "costco_items.csv" giant_orders = Path(tmpdir) / "giant_orders.csv" costco_orders = Path(tmpdir) / "costco_orders.csv" resolutions_csv = Path(tmpdir) / "review_resolutions.csv" catalog_csv = Path(tmpdir) / "catalog.csv" links_csv = Path(tmpdir) / "product_links.csv" purchases_csv = Path(tmpdir) / "review" / "purchases.csv" examples_csv = Path(tmpdir) / "review" / "comparison_examples.csv" fieldnames = enrich_costco.OUTPUT_FIELDS giant_row = {field: "" for field in fieldnames} giant_row.update( { "retailer": "giant", "order_id": "g1", "line_no": "1", "normalized_row_id": "giant:g1:1", "normalized_item_id": "gnorm:banana", "order_date": "2026-03-01", "item_name": "FRESH BANANA", "item_name_norm": "BANANA", "retailer_item_id": "100", "upc": "4011", "qty": "1", "unit": "LB", "normalized_quantity": "1", "normalized_quantity_unit": "lb", "line_total": "1.29", "unit_price": "1.29", "measure_type": "weight", "price_per_lb": "1.29", "raw_order_path": "data/giant-web/raw/g1.json", "is_discount_line": "false", "is_coupon_line": "false", "is_fee": "false", } ) costco_row = {field: "" for field in fieldnames} costco_row.update( { "retailer": "costco", "order_id": "c1", "line_no": "1", "normalized_row_id": "costco:c1:1", "normalized_item_id": "cnorm:banana", "order_date": "2026-03-12", "item_name": "BANANAS 3 LB / 1.36 KG", "item_name_norm": "BANANA", "retailer_item_id": "30669", "qty": "1", "unit": "E", "normalized_quantity": "3", "normalized_quantity_unit": "lb", "line_total": "2.98", "unit_price": "2.98", "size_value": "3", "size_unit": "lb", "measure_type": "weight", "price_per_lb": "0.9933", "raw_order_path": "data/costco-web/raw/c1.json", "is_discount_line": "false", "is_coupon_line": "false", "is_fee": "false", } ) for path, source_rows in [(giant_items, [giant_row]), (costco_items, [costco_row])]: with path.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=fieldnames) writer.writeheader() writer.writerows(source_rows) order_fields = ["order_id", "store_name", "store_number", "store_city", "store_state"] for path, source_rows in [ ( giant_orders, [ { "order_id": "g1", "store_name": "Giant", "store_number": "42", "store_city": "Springfield", "store_state": "VA", } ], ), ( costco_orders, [ { "order_id": "c1", "store_name": "MT VERNON", "store_number": "1115", "store_city": "ALEXANDRIA", "store_state": "VA", } ], ), ]: with path.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=order_fields) writer.writeheader() writer.writerows(source_rows) with catalog_csv.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=build_purchases.CATALOG_FIELDS) writer.writeheader() writer.writerow( { "catalog_id": "cat_banana", "catalog_name": "BANANA", "category": "produce", "product_type": "banana", "brand": "", "variant": "", "size_value": "", "size_unit": "", "pack_qty": "", "measure_type": "", "notes": "", "created_at": "", "updated_at": "", } ) with links_csv.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=build_purchases.PRODUCT_LINK_FIELDS) writer.writeheader() writer.writerows( [ { "normalized_item_id": "gnorm:banana", "catalog_id": "cat_banana", "link_method": "manual_link", "link_confidence": "high", "review_status": "approved", "reviewed_by": "", "reviewed_at": "", "link_notes": "", }, { "normalized_item_id": "cnorm:banana", "catalog_id": "cat_banana", "link_method": "manual_link", "link_confidence": "high", "review_status": "approved", "reviewed_by": "", "reviewed_at": "", "link_notes": "", }, ] ) build_purchases.main.callback( giant_items_enriched_csv=str(giant_items), costco_items_enriched_csv=str(costco_items), giant_orders_csv=str(giant_orders), costco_orders_csv=str(costco_orders), resolutions_csv=str(resolutions_csv), catalog_csv=str(catalog_csv), links_csv=str(links_csv), output_csv=str(purchases_csv), examples_csv=str(examples_csv), ) self.assertTrue(purchases_csv.exists()) self.assertTrue(examples_csv.exists()) with purchases_csv.open(newline="", encoding="utf-8") as handle: purchase_rows = list(csv.DictReader(handle)) with examples_csv.open(newline="", encoding="utf-8") as handle: example_rows = list(csv.DictReader(handle)) self.assertEqual(2, len(purchase_rows)) self.assertEqual(1, len(example_rows)) def test_build_purchase_rows_applies_manual_resolution(self): fieldnames = enrich_costco.OUTPUT_FIELDS giant_row = {field: "" for field in fieldnames} giant_row.update( { "retailer": "giant", "order_id": "g1", "line_no": "1", "normalized_row_id": "giant:g1:1", "normalized_item_id": "gnorm:ice", "order_date": "2026-03-01", "item_name": "SB BAGGED ICE 20LB", "item_name_norm": "BAGGED ICE", "retailer_item_id": "100", "upc": "", "qty": "1", "unit": "EA", "normalized_quantity": "1", "normalized_quantity_unit": "each", "line_total": "3.50", "unit_price": "3.50", "measure_type": "each", "raw_order_path": "data/giant-web/raw/g1.json", "is_discount_line": "false", "is_coupon_line": "false", "is_fee": "false", } ) rows, links = build_purchases.build_purchase_rows( [giant_row], [], [ { "order_id": "g1", "store_name": "Giant", "store_number": "42", "store_city": "Springfield", "store_state": "VA", } ], [], [ { "normalized_item_id": "gnorm:ice", "catalog_id": "cat_ice", "resolution_action": "create", "status": "approved", "resolution_notes": "manual ice merge", "reviewed_at": "2026-03-16", } ], [], [ { "catalog_id": "cat_ice", "catalog_name": "ICE", "category": "frozen", "product_type": "ice", "brand": "", "variant": "", "size_value": "", "size_unit": "", "pack_qty": "", "measure_type": "", "notes": "", "created_at": "", "updated_at": "", } ], ) self.assertEqual("cat_ice", rows[0]["catalog_id"]) self.assertEqual("approved", rows[0]["review_status"]) self.assertEqual("create", rows[0]["resolution_action"]) self.assertEqual("cat_ice", links[0]["catalog_id"]) self.assertEqual("1", rows[0]["normalized_quantity"]) self.assertEqual("each", rows[0]["normalized_quantity_unit"]) def test_build_purchase_rows_derives_effective_price_for_known_cases(self): fieldnames = enrich_costco.OUTPUT_FIELDS def base_row(): return {field: "" for field in fieldnames} giant_banana = base_row() giant_banana.update( { "retailer": "giant", "order_id": "g1", "line_no": "1", "normalized_row_id": "giant:g1:1", "normalized_item_id": "gnorm:banana", "order_date": "2026-03-01", "item_name": "FRESH BANANA", "item_name_norm": "BANANA", "retailer_item_id": "100", "qty": "1", "unit": "LB", "normalized_quantity": "1.68", "normalized_quantity_unit": "lb", "line_total": "0.99", "unit_price": "0.99", "measure_type": "weight", "price_per_lb": "0.5893", "raw_order_path": "data/giant-web/raw/g1.json", "is_discount_line": "false", "is_coupon_line": "false", "is_fee": "false", } ) costco_banana = base_row() costco_banana.update( { "retailer": "costco", "order_id": "c1", "line_no": "1", "normalized_row_id": "costco:c1:1", "normalized_item_id": "cnorm:banana", "order_date": "2026-03-12", "item_name": "BANANAS 3 LB / 1.36 KG", "item_name_norm": "BANANA", "retailer_item_id": "30669", "qty": "1", "unit": "E", "normalized_quantity": "3", "normalized_quantity_unit": "lb", "line_total": "2.98", "net_line_total": "1.49", "unit_price": "2.98", "size_value": "3", "size_unit": "lb", "measure_type": "weight", "price_per_lb": "0.4967", "raw_order_path": "data/costco-web/raw/c1.json", "is_discount_line": "false", "is_coupon_line": "false", "is_fee": "false", } ) giant_ice = base_row() giant_ice.update( { "retailer": "giant", "order_id": "g2", "line_no": "1", "normalized_row_id": "giant:g2:1", "normalized_item_id": "gnorm:ice", "order_date": "2026-03-02", "item_name": "SB BAGGED ICE 20LB", "item_name_norm": "BAGGED ICE", "retailer_item_id": "101", "qty": "2", "unit": "EA", "normalized_quantity": "40", "normalized_quantity_unit": "lb", "line_total": "9.98", "unit_price": "4.99", "size_value": "20", "size_unit": "lb", "measure_type": "weight", "price_per_lb": "0.2495", "raw_order_path": "data/giant-web/raw/g2.json", "is_discount_line": "false", "is_coupon_line": "false", "is_fee": "false", } ) costco_patty = base_row() costco_patty.update( { "retailer": "costco", "order_id": "c2", "line_no": "1", "normalized_row_id": "costco:c2:1", "normalized_item_id": "cnorm:patty", "order_date": "2026-03-03", "item_name": "BEEF PATTIES 6# BAG", "item_name_norm": "BEEF PATTIES 6# BAG", "retailer_item_id": "777", "qty": "1", "unit": "E", "normalized_quantity": "1", "normalized_quantity_unit": "each", "line_total": "26.99", "net_line_total": "26.99", "unit_price": "26.99", "measure_type": "each", "raw_order_path": "data/costco-web/raw/c2.json", "is_discount_line": "false", "is_coupon_line": "false", "is_fee": "false", } ) giant_patty = base_row() giant_patty.update( { "retailer": "giant", "order_id": "g3", "line_no": "1", "normalized_row_id": "giant:g3:1", "normalized_item_id": "gnorm:patty", "order_date": "2026-03-04", "item_name": "80% PATTIES PK12", "item_name_norm": "80% PATTIES PK12", "retailer_item_id": "102", "qty": "1", "unit": "LB", "normalized_quantity": "", "normalized_quantity_unit": "", "line_total": "10.05", "unit_price": "10.05", "measure_type": "weight", "price_per_lb": "7.7907", "raw_order_path": "data/giant-web/raw/g3.json", "is_discount_line": "false", "is_coupon_line": "false", "is_fee": "false", } ) rows, _links = build_purchases.build_purchase_rows( [giant_banana, giant_ice, giant_patty], [costco_banana, costco_patty], [], [], [], [], [], ) rows_by_item = {row["normalized_item_id"]: row for row in rows} self.assertEqual("0.5893", rows_by_item["gnorm:banana"]["effective_price"]) self.assertEqual("0.4967", rows_by_item["cnorm:banana"]["effective_price"]) self.assertEqual("0.2495", rows_by_item["gnorm:ice"]["effective_price"]) self.assertEqual("26.99", rows_by_item["cnorm:patty"]["effective_price"]) self.assertEqual("", rows_by_item["gnorm:patty"]["effective_price"]) def test_build_purchase_rows_leaves_effective_price_blank_without_valid_denominator(self): fieldnames = enrich_costco.OUTPUT_FIELDS row = {field: "" for field in fieldnames} row.update( { "retailer": "giant", "order_id": "g1", "line_no": "1", "normalized_row_id": "giant:g1:1", "normalized_item_id": "gnorm:blank", "order_date": "2026-03-01", "item_name": "MYSTERY ITEM", "item_name_norm": "MYSTERY ITEM", "retailer_item_id": "100", "qty": "1", "unit": "EA", "normalized_quantity": "0", "normalized_quantity_unit": "each", "line_total": "3.50", "unit_price": "3.50", "measure_type": "each", "raw_order_path": "data/giant-web/raw/g1.json", "is_discount_line": "false", "is_coupon_line": "false", "is_fee": "false", } ) rows, _links = build_purchases.build_purchase_rows([row], [], [], [], [], [], []) self.assertEqual("", rows[0]["effective_price"]) if __name__ == "__main__": unittest.main()