Refactor review pipeline around normalized items
This commit is contained in:
@@ -14,33 +14,39 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
queue_rows = review_products.build_review_queue(
|
||||
[
|
||||
{
|
||||
"observed_product_id": "gobs_1",
|
||||
"canonical_product_id": "",
|
||||
"normalized_item_id": "gnorm_1",
|
||||
"catalog_id": "",
|
||||
"retailer": "giant",
|
||||
"raw_item_name": "SB BAGGED ICE 20LB",
|
||||
"normalized_item_name": "BAGGED ICE",
|
||||
"upc": "",
|
||||
"line_total": "3.50",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
},
|
||||
{
|
||||
"observed_product_id": "gobs_1",
|
||||
"canonical_product_id": "",
|
||||
"normalized_item_id": "gnorm_1",
|
||||
"catalog_id": "",
|
||||
"retailer": "giant",
|
||||
"raw_item_name": "SB BAG ICE CUBED 10LB",
|
||||
"normalized_item_name": "BAG ICE",
|
||||
"upc": "",
|
||||
"line_total": "2.50",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
},
|
||||
],
|
||||
[],
|
||||
)
|
||||
|
||||
self.assertEqual(1, len(queue_rows))
|
||||
self.assertEqual("gobs_1", queue_rows[0]["observed_product_id"])
|
||||
self.assertEqual("gnorm_1", queue_rows[0]["normalized_item_id"])
|
||||
self.assertIn("SB BAGGED ICE 20LB", queue_rows[0]["raw_item_names"])
|
||||
|
||||
def test_build_canonical_suggestions_prefers_upc_then_name(self):
|
||||
suggestions = review_products.build_canonical_suggestions(
|
||||
def test_build_catalog_suggestions_prefers_upc_then_name(self):
|
||||
suggestions = review_products.build_catalog_suggestions(
|
||||
[
|
||||
{
|
||||
"normalized_item_name": "MIXED PEPPER",
|
||||
@@ -49,36 +55,41 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
],
|
||||
[
|
||||
{
|
||||
"canonical_product_id": "gcan_1",
|
||||
"canonical_name": "MIXED PEPPER",
|
||||
"upc": "",
|
||||
"normalized_item_id": "prior_1",
|
||||
"normalized_item_name": "MIXED PEPPER 6 PACK",
|
||||
"upc": "12345",
|
||||
"catalog_id": "cat_2",
|
||||
}
|
||||
],
|
||||
[
|
||||
{
|
||||
"catalog_id": "cat_1",
|
||||
"catalog_name": "MIXED PEPPER",
|
||||
},
|
||||
{
|
||||
"canonical_product_id": "gcan_2",
|
||||
"canonical_name": "MIXED PEPPER 6 PACK",
|
||||
"upc": "12345",
|
||||
"catalog_id": "cat_2",
|
||||
"catalog_name": "MIXED PEPPER 6 PACK",
|
||||
},
|
||||
],
|
||||
)
|
||||
|
||||
self.assertEqual("gcan_2", suggestions[0]["canonical_product_id"])
|
||||
self.assertEqual("cat_2", suggestions[0]["catalog_id"])
|
||||
self.assertEqual("exact upc", suggestions[0]["reason"])
|
||||
self.assertEqual("gcan_1", suggestions[1]["canonical_product_id"])
|
||||
|
||||
def test_review_products_displays_position_items_and_suggestions(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
purchases_csv = Path(tmpdir) / "purchases.csv"
|
||||
queue_csv = Path(tmpdir) / "review_queue.csv"
|
||||
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
|
||||
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
|
||||
catalog_csv = Path(tmpdir) / "catalog.csv"
|
||||
|
||||
purchase_fields = [
|
||||
"purchase_date",
|
||||
"retailer",
|
||||
"order_id",
|
||||
"line_no",
|
||||
"observed_product_id",
|
||||
"canonical_product_id",
|
||||
"normalized_item_id",
|
||||
"catalog_id",
|
||||
"raw_item_name",
|
||||
"normalized_item_name",
|
||||
"image_url",
|
||||
@@ -95,8 +106,8 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
"retailer": "costco",
|
||||
"order_id": "c2",
|
||||
"line_no": "2",
|
||||
"observed_product_id": "gobs_mix",
|
||||
"canonical_product_id": "",
|
||||
"normalized_item_id": "cnorm_mix",
|
||||
"catalog_id": "",
|
||||
"raw_item_name": "MIXED PEPPER 6-PACK",
|
||||
"normalized_item_name": "MIXED PEPPER",
|
||||
"image_url": "",
|
||||
@@ -108,14 +119,27 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
"retailer": "costco",
|
||||
"order_id": "c1",
|
||||
"line_no": "1",
|
||||
"observed_product_id": "gobs_mix",
|
||||
"canonical_product_id": "",
|
||||
"normalized_item_id": "cnorm_mix",
|
||||
"catalog_id": "",
|
||||
"raw_item_name": "MIXED PEPPER 6-PACK",
|
||||
"normalized_item_name": "MIXED PEPPER",
|
||||
"image_url": "https://example.test/mixed-pepper.jpg",
|
||||
"upc": "",
|
||||
"line_total": "6.99",
|
||||
},
|
||||
{
|
||||
"purchase_date": "2026-03-10",
|
||||
"retailer": "giant",
|
||||
"order_id": "g1",
|
||||
"line_no": "1",
|
||||
"normalized_item_id": "gnorm_mix",
|
||||
"catalog_id": "cat_mix",
|
||||
"raw_item_name": "MIXED PEPPER",
|
||||
"normalized_item_name": "MIXED PEPPER",
|
||||
"image_url": "",
|
||||
"upc": "",
|
||||
"line_total": "5.99",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
@@ -124,8 +148,8 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
writer.writeheader()
|
||||
writer.writerow(
|
||||
{
|
||||
"canonical_product_id": "gcan_mix",
|
||||
"canonical_name": "MIXED PEPPER",
|
||||
"catalog_id": "cat_mix",
|
||||
"catalog_name": "MIXED PEPPER",
|
||||
"category": "produce",
|
||||
"product_type": "pepper",
|
||||
"brand": "",
|
||||
@@ -158,14 +182,14 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(0, result.exit_code)
|
||||
self.assertIn("Review 1/1: Resolve observed_product MIXED PEPPER to canonical_name [__]?", result.output)
|
||||
self.assertIn("Review 1/1: Resolve normalized_item MIXED PEPPER to catalog_name [__]?", result.output)
|
||||
self.assertIn("2 matched items:", result.output)
|
||||
self.assertIn("[l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit:", result.output)
|
||||
self.assertIn("[l]ink existing [n]ew catalog e[x]clude [s]kip [q]uit:", result.output)
|
||||
first_item = result.output.index("[1] 2026-03-14 | 7.49")
|
||||
second_item = result.output.index("[2] 2026-03-12 | 6.99")
|
||||
self.assertLess(first_item, second_item)
|
||||
self.assertIn("https://example.test/mixed-pepper.jpg", result.output)
|
||||
self.assertIn("1 canonical suggestions found:", result.output)
|
||||
self.assertIn("1 catalog_name suggestions found:", result.output)
|
||||
self.assertIn("[1] MIXED PEPPER", result.output)
|
||||
self.assertIn("\x1b[", result.output)
|
||||
|
||||
@@ -174,7 +198,7 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
purchases_csv = Path(tmpdir) / "purchases.csv"
|
||||
queue_csv = Path(tmpdir) / "review_queue.csv"
|
||||
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
|
||||
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
|
||||
catalog_csv = Path(tmpdir) / "catalog.csv"
|
||||
|
||||
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(
|
||||
@@ -184,8 +208,8 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
"retailer",
|
||||
"order_id",
|
||||
"line_no",
|
||||
"observed_product_id",
|
||||
"canonical_product_id",
|
||||
"normalized_item_id",
|
||||
"catalog_id",
|
||||
"raw_item_name",
|
||||
"normalized_item_name",
|
||||
"image_url",
|
||||
@@ -200,8 +224,8 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
"retailer": "giant",
|
||||
"order_id": "g1",
|
||||
"line_no": "1",
|
||||
"observed_product_id": "gobs_ice",
|
||||
"canonical_product_id": "",
|
||||
"normalized_item_id": "gnorm_ice",
|
||||
"catalog_id": "",
|
||||
"raw_item_name": "SB BAGGED ICE 20LB",
|
||||
"normalized_item_name": "BAGGED ICE",
|
||||
"image_url": "",
|
||||
@@ -231,14 +255,14 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(0, result.exit_code)
|
||||
self.assertIn("no canonical_name suggestions found", result.output)
|
||||
self.assertIn("no catalog_name suggestions found", result.output)
|
||||
|
||||
def test_link_existing_uses_numbered_selection_and_confirmation(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
purchases_csv = Path(tmpdir) / "purchases.csv"
|
||||
queue_csv = Path(tmpdir) / "review_queue.csv"
|
||||
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
|
||||
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
|
||||
catalog_csv = Path(tmpdir) / "catalog.csv"
|
||||
|
||||
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(
|
||||
@@ -248,8 +272,8 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
"retailer",
|
||||
"order_id",
|
||||
"line_no",
|
||||
"observed_product_id",
|
||||
"canonical_product_id",
|
||||
"normalized_item_id",
|
||||
"catalog_id",
|
||||
"raw_item_name",
|
||||
"normalized_item_name",
|
||||
"image_url",
|
||||
@@ -265,8 +289,8 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
"retailer": "costco",
|
||||
"order_id": "c2",
|
||||
"line_no": "2",
|
||||
"observed_product_id": "gobs_mix",
|
||||
"canonical_product_id": "",
|
||||
"normalized_item_id": "cnorm_mix",
|
||||
"catalog_id": "",
|
||||
"raw_item_name": "MIXED PEPPER 6-PACK",
|
||||
"normalized_item_name": "MIXED PEPPER",
|
||||
"image_url": "",
|
||||
@@ -278,14 +302,27 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
"retailer": "costco",
|
||||
"order_id": "c1",
|
||||
"line_no": "1",
|
||||
"observed_product_id": "gobs_mix",
|
||||
"canonical_product_id": "",
|
||||
"normalized_item_id": "cnorm_mix",
|
||||
"catalog_id": "",
|
||||
"raw_item_name": "MIXED PEPPER 6-PACK",
|
||||
"normalized_item_name": "MIXED PEPPER",
|
||||
"image_url": "",
|
||||
"upc": "",
|
||||
"line_total": "6.99",
|
||||
},
|
||||
{
|
||||
"purchase_date": "2026-03-10",
|
||||
"retailer": "giant",
|
||||
"order_id": "g1",
|
||||
"line_no": "1",
|
||||
"normalized_item_id": "gnorm_mix",
|
||||
"catalog_id": "cat_mix",
|
||||
"raw_item_name": "MIXED PEPPER",
|
||||
"normalized_item_name": "MIXED PEPPER",
|
||||
"image_url": "",
|
||||
"upc": "",
|
||||
"line_total": "5.99",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
@@ -294,8 +331,8 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
writer.writeheader()
|
||||
writer.writerow(
|
||||
{
|
||||
"canonical_product_id": "gcan_mix",
|
||||
"canonical_name": "MIXED PEPPER",
|
||||
"catalog_id": "cat_mix",
|
||||
"catalog_name": "MIXED PEPPER",
|
||||
"category": "",
|
||||
"product_type": "",
|
||||
"brand": "",
|
||||
@@ -329,29 +366,29 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(0, result.exit_code)
|
||||
self.assertIn("Select the canonical_name to associate 2 items with:", result.output)
|
||||
self.assertIn('[1] MIXED PEPPER | gcan_mix', result.output)
|
||||
self.assertIn("Select the catalog_name to associate 2 items with:", result.output)
|
||||
self.assertIn("[1] MIXED PEPPER | cat_mix", result.output)
|
||||
self.assertIn('2 "MIXED PEPPER" items and future matches will be associated with "MIXED PEPPER".', result.output)
|
||||
self.assertIn("actions: [y]es [n]o [b]ack [s]kip [q]uit", result.output)
|
||||
with resolutions_csv.open(newline="", encoding="utf-8") as handle:
|
||||
rows = list(csv.DictReader(handle))
|
||||
self.assertEqual("gcan_mix", rows[0]["canonical_product_id"])
|
||||
self.assertEqual("cat_mix", rows[0]["catalog_id"])
|
||||
self.assertEqual("link", rows[0]["resolution_action"])
|
||||
|
||||
def test_review_products_creates_canonical_and_resolution(self):
|
||||
def test_review_products_creates_catalog_and_resolution(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
purchases_csv = Path(tmpdir) / "purchases.csv"
|
||||
queue_csv = Path(tmpdir) / "review_queue.csv"
|
||||
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
|
||||
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
|
||||
catalog_csv = Path(tmpdir) / "catalog.csv"
|
||||
|
||||
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(
|
||||
handle,
|
||||
fieldnames=[
|
||||
"purchase_date",
|
||||
"observed_product_id",
|
||||
"canonical_product_id",
|
||||
"normalized_item_id",
|
||||
"catalog_id",
|
||||
"retailer",
|
||||
"raw_item_name",
|
||||
"normalized_item_name",
|
||||
@@ -366,8 +403,8 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
writer.writerow(
|
||||
{
|
||||
"purchase_date": "2026-03-15",
|
||||
"observed_product_id": "gobs_ice",
|
||||
"canonical_product_id": "",
|
||||
"normalized_item_id": "gnorm_ice",
|
||||
"catalog_id": "",
|
||||
"retailer": "giant",
|
||||
"raw_item_name": "SB BAGGED ICE 20LB",
|
||||
"normalized_item_name": "BAGGED ICE",
|
||||
@@ -402,7 +439,7 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
catalog_rows = list(csv.DictReader(handle))
|
||||
self.assertEqual("create", resolution_rows[0]["resolution_action"])
|
||||
self.assertEqual("approved", resolution_rows[0]["status"])
|
||||
self.assertEqual("ICE", catalog_rows[0]["canonical_name"])
|
||||
self.assertEqual("ICE", catalog_rows[0]["catalog_name"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user