Refactor review pipeline around normalized items
This commit is contained in:
@@ -29,7 +29,7 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
self.assertEqual("0.125", metrics["price_per_oz"])
|
||||
self.assertEqual("picked_weight_lb", metrics["price_per_lb_basis"])
|
||||
|
||||
def test_build_purchase_rows_maps_canonical_ids(self):
|
||||
def test_build_purchase_rows_maps_catalog_ids(self):
|
||||
fieldnames = enrich_costco.OUTPUT_FIELDS
|
||||
giant_row = {field: "" for field in fieldnames}
|
||||
giant_row.update(
|
||||
@@ -37,7 +37,8 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
"retailer": "giant",
|
||||
"order_id": "g1",
|
||||
"line_no": "1",
|
||||
"observed_item_key": "giant:g1:1",
|
||||
"normalized_row_id": "giant:g1:1",
|
||||
"normalized_item_id": "gnorm:banana",
|
||||
"order_date": "2026-03-01",
|
||||
"item_name": "FRESH BANANA",
|
||||
"item_name_norm": "BANANA",
|
||||
@@ -50,7 +51,7 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
"unit_price": "1.29",
|
||||
"measure_type": "weight",
|
||||
"price_per_lb": "1.29",
|
||||
"raw_order_path": "giant_output/raw/g1.json",
|
||||
"raw_order_path": "data/giant-web/raw/g1.json",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"is_fee": "false",
|
||||
@@ -62,7 +63,8 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
"retailer": "costco",
|
||||
"order_id": "c1",
|
||||
"line_no": "1",
|
||||
"observed_item_key": "costco:c1:1",
|
||||
"normalized_row_id": "costco:c1:1",
|
||||
"normalized_item_id": "cnorm:banana",
|
||||
"order_date": "2026-03-12",
|
||||
"item_name": "BANANAS 3 LB / 1.36 KG",
|
||||
"item_name_norm": "BANANA",
|
||||
@@ -75,7 +77,7 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
"size_unit": "lb",
|
||||
"measure_type": "weight",
|
||||
"price_per_lb": "0.9933",
|
||||
"raw_order_path": "costco_output/raw/c1.json",
|
||||
"raw_order_path": "data/costco-web/raw/c1.json",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"is_fee": "false",
|
||||
@@ -99,17 +101,58 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
"store_state": "VA",
|
||||
}
|
||||
]
|
||||
catalog_rows = [
|
||||
{
|
||||
"catalog_id": "cat_banana",
|
||||
"catalog_name": "BANANA",
|
||||
"category": "produce",
|
||||
"product_type": "banana",
|
||||
"brand": "",
|
||||
"variant": "",
|
||||
"size_value": "",
|
||||
"size_unit": "",
|
||||
"pack_qty": "",
|
||||
"measure_type": "",
|
||||
"notes": "",
|
||||
"created_at": "",
|
||||
"updated_at": "",
|
||||
}
|
||||
]
|
||||
link_rows = [
|
||||
{
|
||||
"normalized_item_id": "gnorm:banana",
|
||||
"catalog_id": "cat_banana",
|
||||
"link_method": "manual_link",
|
||||
"link_confidence": "high",
|
||||
"review_status": "approved",
|
||||
"reviewed_by": "",
|
||||
"reviewed_at": "",
|
||||
"link_notes": "",
|
||||
},
|
||||
{
|
||||
"normalized_item_id": "cnorm:banana",
|
||||
"catalog_id": "cat_banana",
|
||||
"link_method": "manual_link",
|
||||
"link_confidence": "high",
|
||||
"review_status": "approved",
|
||||
"reviewed_by": "",
|
||||
"reviewed_at": "",
|
||||
"link_notes": "",
|
||||
},
|
||||
]
|
||||
|
||||
rows, _observed, _canon, _links = build_purchases.build_purchase_rows(
|
||||
rows, _links = build_purchases.build_purchase_rows(
|
||||
[giant_row],
|
||||
[costco_row],
|
||||
giant_orders,
|
||||
costco_orders,
|
||||
[],
|
||||
link_rows,
|
||||
catalog_rows,
|
||||
)
|
||||
|
||||
self.assertEqual(2, len(rows))
|
||||
self.assertTrue(all(row["canonical_product_id"] for row in rows))
|
||||
self.assertTrue(all(row["catalog_id"] == "cat_banana" for row in rows))
|
||||
self.assertEqual({"giant", "costco"}, {row["retailer"] for row in rows})
|
||||
self.assertEqual("https://example.test/banana.jpg", rows[0]["image_url"])
|
||||
|
||||
@@ -120,10 +163,10 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
giant_orders = Path(tmpdir) / "giant_orders.csv"
|
||||
costco_orders = Path(tmpdir) / "costco_orders.csv"
|
||||
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
|
||||
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
|
||||
catalog_csv = Path(tmpdir) / "catalog.csv"
|
||||
links_csv = Path(tmpdir) / "product_links.csv"
|
||||
purchases_csv = Path(tmpdir) / "combined" / "purchases.csv"
|
||||
examples_csv = Path(tmpdir) / "combined" / "comparison_examples.csv"
|
||||
purchases_csv = Path(tmpdir) / "review" / "purchases.csv"
|
||||
examples_csv = Path(tmpdir) / "review" / "comparison_examples.csv"
|
||||
|
||||
fieldnames = enrich_costco.OUTPUT_FIELDS
|
||||
giant_row = {field: "" for field in fieldnames}
|
||||
@@ -132,7 +175,8 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
"retailer": "giant",
|
||||
"order_id": "g1",
|
||||
"line_no": "1",
|
||||
"observed_item_key": "giant:g1:1",
|
||||
"normalized_row_id": "giant:g1:1",
|
||||
"normalized_item_id": "gnorm:banana",
|
||||
"order_date": "2026-03-01",
|
||||
"item_name": "FRESH BANANA",
|
||||
"item_name_norm": "BANANA",
|
||||
@@ -144,7 +188,7 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
"unit_price": "1.29",
|
||||
"measure_type": "weight",
|
||||
"price_per_lb": "1.29",
|
||||
"raw_order_path": "giant_output/raw/g1.json",
|
||||
"raw_order_path": "data/giant-web/raw/g1.json",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"is_fee": "false",
|
||||
@@ -156,7 +200,8 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
"retailer": "costco",
|
||||
"order_id": "c1",
|
||||
"line_no": "1",
|
||||
"observed_item_key": "costco:c1:1",
|
||||
"normalized_row_id": "costco:c1:1",
|
||||
"normalized_item_id": "cnorm:banana",
|
||||
"order_date": "2026-03-12",
|
||||
"item_name": "BANANAS 3 LB / 1.36 KG",
|
||||
"item_name_norm": "BANANA",
|
||||
@@ -169,17 +214,14 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
"size_unit": "lb",
|
||||
"measure_type": "weight",
|
||||
"price_per_lb": "0.9933",
|
||||
"raw_order_path": "costco_output/raw/c1.json",
|
||||
"raw_order_path": "data/costco-web/raw/c1.json",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"is_fee": "false",
|
||||
}
|
||||
)
|
||||
|
||||
for path, source_rows in [
|
||||
(giant_items, [giant_row]),
|
||||
(costco_items, [costco_row]),
|
||||
]:
|
||||
for path, source_rows in [(giant_items, [giant_row]), (costco_items, [costco_row])]:
|
||||
with path.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
@@ -217,6 +259,55 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
writer.writeheader()
|
||||
writer.writerows(source_rows)
|
||||
|
||||
with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=build_purchases.CATALOG_FIELDS)
|
||||
writer.writeheader()
|
||||
writer.writerow(
|
||||
{
|
||||
"catalog_id": "cat_banana",
|
||||
"catalog_name": "BANANA",
|
||||
"category": "produce",
|
||||
"product_type": "banana",
|
||||
"brand": "",
|
||||
"variant": "",
|
||||
"size_value": "",
|
||||
"size_unit": "",
|
||||
"pack_qty": "",
|
||||
"measure_type": "",
|
||||
"notes": "",
|
||||
"created_at": "",
|
||||
"updated_at": "",
|
||||
}
|
||||
)
|
||||
|
||||
with links_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=build_purchases.PRODUCT_LINK_FIELDS)
|
||||
writer.writeheader()
|
||||
writer.writerows(
|
||||
[
|
||||
{
|
||||
"normalized_item_id": "gnorm:banana",
|
||||
"catalog_id": "cat_banana",
|
||||
"link_method": "manual_link",
|
||||
"link_confidence": "high",
|
||||
"review_status": "approved",
|
||||
"reviewed_by": "",
|
||||
"reviewed_at": "",
|
||||
"link_notes": "",
|
||||
},
|
||||
{
|
||||
"normalized_item_id": "cnorm:banana",
|
||||
"catalog_id": "cat_banana",
|
||||
"link_method": "manual_link",
|
||||
"link_confidence": "high",
|
||||
"review_status": "approved",
|
||||
"reviewed_by": "",
|
||||
"reviewed_at": "",
|
||||
"link_notes": "",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
build_purchases.main.callback(
|
||||
giant_items_enriched_csv=str(giant_items),
|
||||
costco_items_enriched_csv=str(costco_items),
|
||||
@@ -246,7 +337,8 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
"retailer": "giant",
|
||||
"order_id": "g1",
|
||||
"line_no": "1",
|
||||
"observed_item_key": "giant:g1:1",
|
||||
"normalized_row_id": "giant:g1:1",
|
||||
"normalized_item_id": "gnorm:ice",
|
||||
"order_date": "2026-03-01",
|
||||
"item_name": "SB BAGGED ICE 20LB",
|
||||
"item_name_norm": "BAGGED ICE",
|
||||
@@ -257,17 +349,14 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
"line_total": "3.50",
|
||||
"unit_price": "3.50",
|
||||
"measure_type": "each",
|
||||
"raw_order_path": "giant_output/raw/g1.json",
|
||||
"raw_order_path": "data/giant-web/raw/g1.json",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"is_fee": "false",
|
||||
}
|
||||
)
|
||||
observed_rows, _canonical_rows, _link_rows, _observed_id_by_key, _canonical_by_observed = (
|
||||
build_purchases.build_link_state([giant_row])
|
||||
)
|
||||
observed_product_id = observed_rows[0]["observed_product_id"]
|
||||
rows, _observed, _canon, _links = build_purchases.build_purchase_rows(
|
||||
|
||||
rows, links = build_purchases.build_purchase_rows(
|
||||
[giant_row],
|
||||
[],
|
||||
[
|
||||
@@ -282,19 +371,38 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
[],
|
||||
[
|
||||
{
|
||||
"observed_product_id": observed_product_id,
|
||||
"canonical_product_id": "gcan_manual_ice",
|
||||
"normalized_item_id": "gnorm:ice",
|
||||
"catalog_id": "cat_ice",
|
||||
"resolution_action": "create",
|
||||
"status": "approved",
|
||||
"resolution_notes": "manual ice merge",
|
||||
"reviewed_at": "2026-03-16",
|
||||
}
|
||||
],
|
||||
[],
|
||||
[
|
||||
{
|
||||
"catalog_id": "cat_ice",
|
||||
"catalog_name": "ICE",
|
||||
"category": "frozen",
|
||||
"product_type": "ice",
|
||||
"brand": "",
|
||||
"variant": "",
|
||||
"size_value": "",
|
||||
"size_unit": "",
|
||||
"pack_qty": "",
|
||||
"measure_type": "",
|
||||
"notes": "",
|
||||
"created_at": "",
|
||||
"updated_at": "",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
self.assertEqual("gcan_manual_ice", rows[0]["canonical_product_id"])
|
||||
self.assertEqual("cat_ice", rows[0]["catalog_id"])
|
||||
self.assertEqual("approved", rows[0]["review_status"])
|
||||
self.assertEqual("create", rows[0]["resolution_action"])
|
||||
self.assertEqual("cat_ice", links[0]["catalog_id"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user