From 8ccf3ff43bbe7fc76c7038dfd86aa74a17710f82 Mon Sep 17 00:00:00 2001 From: ben Date: Mon, 23 Mar 2026 15:32:41 -0400 Subject: [PATCH] Reconcile review queue against current catalog state --- report_pipeline_status.py | 11 +- review_products.py | 104 ++++++- tests/test_review_workflow.py | 528 ++++++++++++++++++++-------------- 3 files changed, 417 insertions(+), 226 deletions(-) diff --git a/report_pipeline_status.py b/report_pipeline_status.py index e7efe79..a446b8c 100644 --- a/report_pipeline_status.py +++ b/report_pipeline_status.py @@ -27,9 +27,11 @@ def build_status_summary( costco_enriched, purchases, resolutions, + links, + catalog, ): normalized_rows = giant_enriched + costco_enriched - queue_rows = review_products.build_review_queue(purchases, resolutions) + queue_rows = review_products.build_review_queue(purchases, resolutions, links, catalog, []) queue_ids = {row["normalized_item_id"] for row in queue_rows} unresolved_purchase_rows = [ @@ -37,6 +39,7 @@ def build_status_summary( for row in purchases if row.get("normalized_item_id") and not row.get("catalog_id") + and row.get("resolution_action") != "exclude" and row.get("is_fee") != "true" and row.get("is_discount_line") != "true" and row.get("is_coupon_line") != "true" @@ -84,6 +87,8 @@ def build_status_summary( @click.option("--costco-enriched-csv", default="data/costco-web/normalized_items.csv", show_default=True) @click.option("--purchases-csv", default="data/review/purchases.csv", show_default=True) @click.option("--resolutions-csv", default="data/review/review_resolutions.csv", show_default=True) +@click.option("--links-csv", default="data/review/product_links.csv", show_default=True) +@click.option("--catalog-csv", default="data/catalog.csv", show_default=True) @click.option("--summary-csv", default="data/review/pipeline_status.csv", show_default=True) @click.option("--summary-json", default="data/review/pipeline_status.json", show_default=True) def main( @@ -95,6 +100,8 @@ def main( costco_enriched_csv, purchases_csv, resolutions_csv, + links_csv, + catalog_csv, summary_csv, summary_json, ): @@ -107,6 +114,8 @@ def main( read_rows_if_exists(costco_enriched_csv), read_rows_if_exists(purchases_csv), [build_purchases.normalize_resolution_row(row) for row in read_rows_if_exists(resolutions_csv)], + [build_purchases.normalize_link_row(row) for row in read_rows_if_exists(links_csv)], + [build_purchases.normalize_catalog_row(row) for row in read_rows_if_exists(catalog_csv)], ) write_csv_rows(summary_csv, summary_rows, SUMMARY_FIELDS) summary_json_path = Path(summary_json) diff --git a/review_products.py b/review_products.py index 9a8a7e8..e353934 100644 --- a/review_products.py +++ b/review_products.py @@ -31,6 +31,7 @@ INFO_COLOR = "cyan" PROMPT_COLOR = "bright_yellow" WARNING_COLOR = "magenta" TOKEN_RE = re.compile(r"[A-Z0-9]+") +REQUIRED_CATALOG_FIELDS = ("catalog_name", "product_type") def print_intro_text(): @@ -40,9 +41,37 @@ def print_intro_text(): click.echo(" category: broad analysis bucket such as dairy, produce, or frozen") -def build_review_queue(purchase_rows, resolution_rows): +def has_complete_catalog_row(catalog_row): + if not catalog_row: + return False + return all(catalog_row.get(field, "").strip() for field in REQUIRED_CATALOG_FIELDS) + + +def load_queue_lookup(queue_rows): + lookup = {} + for row in queue_rows: + normalized_item_id = row.get("normalized_item_id", "") + if normalized_item_id: + lookup[normalized_item_id] = row + return lookup + + +def build_review_queue( + purchase_rows, + resolution_rows, + link_rows=None, + catalog_rows=None, + existing_queue_rows=None, +): by_normalized = defaultdict(list) resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows) + link_lookup = build_purchases.load_link_lookup(link_rows or []) + catalog_lookup = { + row.get("catalog_id", ""): build_purchases.normalize_catalog_row(row) + for row in (catalog_rows or []) + if row.get("catalog_id", "") + } + queue_lookup = load_queue_lookup(existing_queue_rows or []) for row in purchase_rows: normalized_item_id = row.get("normalized_item_id", "") @@ -54,30 +83,40 @@ def build_review_queue(purchase_rows, resolution_rows): queue_rows = [] for normalized_item_id, rows in sorted(by_normalized.items()): current_resolution = resolution_lookup.get(normalized_item_id, {}) - if current_resolution.get("status") == "approved": + if current_resolution.get("status") == "approved" and current_resolution.get("resolution_action") == "exclude": continue + existing_queue_row = queue_lookup.get(normalized_item_id, {}) + linked_catalog_id = current_resolution.get("catalog_id") or link_lookup.get(normalized_item_id, {}).get("catalog_id", "") + linked_catalog_row = catalog_lookup.get(linked_catalog_id, {}) + has_valid_catalog_link = bool(linked_catalog_id and has_complete_catalog_row(linked_catalog_row)) + unresolved_rows = [ row for row in rows - if not row.get("catalog_id") - and row.get("is_item", "true") != "false" + if row.get("is_item", "true") != "false" and row.get("is_fee") != "true" and row.get("is_discount_line") != "true" and row.get("is_coupon_line") != "true" ] - if not unresolved_rows: + if not unresolved_rows or has_valid_catalog_link: continue retailers = sorted({row["retailer"] for row in rows}) review_id = stable_id("rvw", normalized_item_id) + reason_code = "missing_catalog_link" + if linked_catalog_id and linked_catalog_id not in catalog_lookup: + reason_code = "orphaned_catalog_link" + elif linked_catalog_id and not has_complete_catalog_row(linked_catalog_row): + reason_code = "incomplete_catalog_link" + queue_rows.append( { "review_id": review_id, "retailer": " | ".join(retailers), "normalized_item_id": normalized_item_id, - "catalog_id": current_resolution.get("catalog_id", ""), - "reason_code": "missing_catalog_link", + "catalog_id": linked_catalog_id, + "reason_code": reason_code, "priority": "high", "raw_item_names": compact_join( sorted({row["raw_item_name"] for row in rows if row["raw_item_name"]}), @@ -102,10 +141,13 @@ def build_review_queue(purchase_rows, resolution_rows): limit=8, ), "seen_count": str(len(rows)), - "status": current_resolution.get("status", "pending"), - "resolution_action": current_resolution.get("resolution_action", ""), - "resolution_notes": current_resolution.get("resolution_notes", ""), - "created_at": current_resolution.get("reviewed_at", today_text), + "status": existing_queue_row.get("status") or current_resolution.get("status", "pending"), + "resolution_action": existing_queue_row.get("resolution_action") + or current_resolution.get("resolution_action", ""), + "resolution_notes": existing_queue_row.get("resolution_notes") + or current_resolution.get("resolution_notes", ""), + "created_at": existing_queue_row.get("created_at") + or current_resolution.get("reviewed_at", today_text), "updated_at": today_text, } ) @@ -516,6 +558,10 @@ def link_rows_from_state(link_lookup): @click.command() +@click.option("--giant-items-enriched-csv", default="data/giant-web/normalized_items.csv", show_default=True) +@click.option("--costco-items-enriched-csv", default="data/costco-web/normalized_items.csv", show_default=True) +@click.option("--giant-orders-csv", default="data/giant-web/collected_orders.csv", show_default=True) +@click.option("--costco-orders-csv", default="data/costco-web/collected_orders.csv", show_default=True) @click.option("--purchases-csv", default="data/review/purchases.csv", show_default=True) @click.option("--queue-csv", default="data/review/review_queue.csv", show_default=True) @click.option("--resolutions-csv", default="data/review/review_resolutions.csv", show_default=True) @@ -523,12 +569,40 @@ def link_rows_from_state(link_lookup): @click.option("--links-csv", default="data/review/product_links.csv", show_default=True) @click.option("--limit", default=0, show_default=True, type=int) @click.option("--refresh-only", is_flag=True, help="Only rebuild review_queue.csv without prompting.") -def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, links_csv, limit, refresh_only): - purchase_rows = build_purchases.read_optional_csv_rows(purchases_csv) +def main( + giant_items_enriched_csv, + costco_items_enriched_csv, + giant_orders_csv, + costco_orders_csv, + purchases_csv, + queue_csv, + resolutions_csv, + catalog_csv, + links_csv, + limit, + refresh_only, +): resolution_rows = build_purchases.read_optional_csv_rows(resolutions_csv) catalog_rows = build_purchases.merge_catalog_rows(build_purchases.read_optional_csv_rows(catalog_csv), []) - link_lookup = build_purchases.load_link_lookup(build_purchases.read_optional_csv_rows(links_csv)) - queue_rows = build_review_queue(purchase_rows, resolution_rows) + link_rows = build_purchases.read_optional_csv_rows(links_csv) + purchase_rows, refreshed_link_rows = build_purchases.build_purchase_rows( + build_purchases.read_optional_csv_rows(giant_items_enriched_csv), + build_purchases.read_optional_csv_rows(costco_items_enriched_csv), + build_purchases.read_optional_csv_rows(giant_orders_csv), + build_purchases.read_optional_csv_rows(costco_orders_csv), + resolution_rows, + link_rows, + catalog_rows, + ) + build_purchases.write_csv_rows(purchases_csv, purchase_rows, build_purchases.PURCHASE_FIELDS) + link_lookup = build_purchases.load_link_lookup(refreshed_link_rows) + queue_rows = build_review_queue( + purchase_rows, + resolution_rows, + refreshed_link_rows, + catalog_rows, + build_purchases.read_optional_csv_rows(queue_csv), + ) write_csv_rows(queue_csv, queue_rows, QUEUE_FIELDS) click.echo(f"wrote {len(queue_rows)} rows to {queue_csv}") diff --git a/tests/test_review_workflow.py b/tests/test_review_workflow.py index ce8e3d9..525b658 100644 --- a/tests/test_review_workflow.py +++ b/tests/test_review_workflow.py @@ -6,9 +6,94 @@ from unittest import mock from click.testing import CliRunner +import enrich_costco import review_products +def write_review_source_files(tmpdir, rows): + giant_items_csv = Path(tmpdir) / "giant_items.csv" + costco_items_csv = Path(tmpdir) / "costco_items.csv" + giant_orders_csv = Path(tmpdir) / "giant_orders.csv" + costco_orders_csv = Path(tmpdir) / "costco_orders.csv" + + fieldnames = enrich_costco.OUTPUT_FIELDS + grouped_rows = {"giant": [], "costco": []} + grouped_orders = {"giant": {}, "costco": {}} + + for index, row in enumerate(rows, start=1): + retailer = row.get("retailer", "giant") + normalized_row = {field: "" for field in fieldnames} + normalized_row.update( + { + "retailer": retailer, + "order_id": row.get("order_id", f"{retailer[0]}{index}"), + "line_no": row.get("line_no", str(index)), + "normalized_row_id": row.get( + "normalized_row_id", + f"{retailer}:{row.get('order_id', f'{retailer[0]}{index}')}:{row.get('line_no', str(index))}", + ), + "normalized_item_id": row.get("normalized_item_id", ""), + "order_date": row.get("purchase_date", ""), + "item_name": row.get("raw_item_name", ""), + "item_name_norm": row.get("normalized_item_name", ""), + "image_url": row.get("image_url", ""), + "upc": row.get("upc", ""), + "line_total": row.get("line_total", ""), + "net_line_total": row.get("net_line_total", ""), + "matched_discount_amount": row.get("matched_discount_amount", ""), + "qty": row.get("qty", "1"), + "unit": row.get("unit", "EA"), + "normalized_quantity": row.get("normalized_quantity", ""), + "normalized_quantity_unit": row.get("normalized_quantity_unit", ""), + "size_value": row.get("size_value", ""), + "size_unit": row.get("size_unit", ""), + "pack_qty": row.get("pack_qty", ""), + "measure_type": row.get("measure_type", "each"), + "retailer_item_id": row.get("retailer_item_id", ""), + "price_per_each": row.get("price_per_each", ""), + "price_per_lb": row.get("price_per_lb", ""), + "price_per_oz": row.get("price_per_oz", ""), + "is_discount_line": row.get("is_discount_line", "false"), + "is_coupon_line": row.get("is_coupon_line", "false"), + "is_fee": row.get("is_fee", "false"), + "raw_order_path": row.get("raw_order_path", ""), + } + ) + grouped_rows[retailer].append(normalized_row) + order_id = normalized_row["order_id"] + grouped_orders[retailer].setdefault( + order_id, + { + "order_id": order_id, + "store_name": row.get("store_name", ""), + "store_number": row.get("store_number", ""), + "store_city": row.get("store_city", ""), + "store_state": row.get("store_state", ""), + }, + ) + + for path, source_rows in [ + (giant_items_csv, grouped_rows["giant"]), + (costco_items_csv, grouped_rows["costco"]), + ]: + with path.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=fieldnames) + writer.writeheader() + writer.writerows(source_rows) + + order_fields = ["order_id", "store_name", "store_number", "store_city", "store_state"] + for path, source_rows in [ + (giant_orders_csv, grouped_orders["giant"].values()), + (costco_orders_csv, grouped_orders["costco"].values()), + ]: + with path.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=order_fields) + writer.writeheader() + writer.writerows(source_rows) + + return giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv + + class ReviewWorkflowTests(unittest.TestCase): def test_build_review_queue_groups_unresolved_purchases(self): queue_rows = review_products.build_review_queue( @@ -114,66 +199,47 @@ class ReviewWorkflowTests(unittest.TestCase): resolutions_csv = Path(tmpdir) / "review_resolutions.csv" catalog_csv = Path(tmpdir) / "catalog.csv" links_csv = Path(tmpdir) / "product_links.csv" - - purchase_fields = [ - "purchase_date", - "retailer", - "order_id", - "line_no", - "normalized_item_id", - "catalog_id", - "raw_item_name", - "normalized_item_name", - "image_url", - "upc", - "line_total", - ] - with purchases_csv.open("w", newline="", encoding="utf-8") as handle: - writer = csv.DictWriter(handle, fieldnames=purchase_fields) - writer.writeheader() - writer.writerows( - [ - { - "purchase_date": "2026-03-14", - "retailer": "costco", - "order_id": "c2", - "line_no": "2", - "normalized_item_id": "cnorm_mix", - "catalog_id": "", - "raw_item_name": "MIXED PEPPER 6-PACK", - "normalized_item_name": "MIXED PEPPER", - "image_url": "", - "upc": "", - "line_total": "7.49", - }, - { - "purchase_date": "2026-03-12", - "retailer": "costco", - "order_id": "c1", - "line_no": "1", - "normalized_item_id": "cnorm_mix", - "catalog_id": "", - "raw_item_name": "MIXED PEPPER 6-PACK", - "normalized_item_name": "MIXED PEPPER", - "image_url": "https://example.test/mixed-pepper.jpg", - "upc": "", - "line_total": "6.99", - }, - { - "purchase_date": "2026-03-10", - "retailer": "giant", - "order_id": "g1", - "line_no": "1", - "normalized_item_id": "gnorm_mix", - "catalog_id": "cat_mix", - "raw_item_name": "MIXED PEPPER", - "normalized_item_name": "MIXED PEPPER", - "image_url": "", - "upc": "", - "line_total": "5.99", - }, - ] - ) + giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files( + tmpdir, + [ + { + "purchase_date": "2026-03-14", + "retailer": "costco", + "order_id": "c2", + "line_no": "2", + "normalized_item_id": "cnorm_mix", + "raw_item_name": "MIXED PEPPER 6-PACK", + "normalized_item_name": "MIXED PEPPER", + "image_url": "", + "upc": "", + "line_total": "7.49", + }, + { + "purchase_date": "2026-03-12", + "retailer": "costco", + "order_id": "c1", + "line_no": "1", + "normalized_item_id": "cnorm_mix", + "raw_item_name": "MIXED PEPPER 6-PACK", + "normalized_item_name": "MIXED PEPPER", + "image_url": "https://example.test/mixed-pepper.jpg", + "upc": "", + "line_total": "6.99", + }, + { + "purchase_date": "2026-03-10", + "retailer": "giant", + "order_id": "g1", + "line_no": "1", + "normalized_item_id": "gnorm_mix", + "raw_item_name": "MIXED PEPPER", + "normalized_item_name": "MIXED PEPPER", + "image_url": "", + "upc": "", + "line_total": "5.99", + }, + ], + ) with catalog_csv.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) @@ -195,11 +261,34 @@ class ReviewWorkflowTests(unittest.TestCase): "updated_at": "", } ) + with links_csv.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.PRODUCT_LINK_FIELDS) + writer.writeheader() + writer.writerow( + { + "normalized_item_id": "gnorm_mix", + "catalog_id": "cat_mix", + "link_method": "manual_link", + "link_confidence": "high", + "review_status": "approved", + "reviewed_by": "", + "reviewed_at": "", + "link_notes": "", + } + ) runner = CliRunner() result = runner.invoke( review_products.main, [ + "--giant-items-enriched-csv", + str(giant_items_csv), + "--costco-items-enriched-csv", + str(costco_items_csv), + "--giant-orders-csv", + str(giant_orders_csv), + "--costco-orders-csv", + str(costco_orders_csv), "--purchases-csv", str(purchases_csv), "--queue-csv", @@ -234,40 +323,23 @@ class ReviewWorkflowTests(unittest.TestCase): resolutions_csv = Path(tmpdir) / "review_resolutions.csv" catalog_csv = Path(tmpdir) / "catalog.csv" links_csv = Path(tmpdir) / "product_links.csv" - - with purchases_csv.open("w", newline="", encoding="utf-8") as handle: - writer = csv.DictWriter( - handle, - fieldnames=[ - "purchase_date", - "retailer", - "order_id", - "line_no", - "normalized_item_id", - "catalog_id", - "raw_item_name", - "normalized_item_name", - "image_url", - "upc", - "line_total", - ], - ) - writer.writeheader() - writer.writerow( + giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files( + tmpdir, + [ { "purchase_date": "2026-03-14", "retailer": "giant", "order_id": "g1", "line_no": "1", "normalized_item_id": "gnorm_ice", - "catalog_id": "", "raw_item_name": "SB BAGGED ICE 20LB", "normalized_item_name": "BAGGED ICE", "image_url": "", "upc": "", "line_total": "3.50", } - ) + ], + ) with catalog_csv.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) @@ -276,6 +348,14 @@ class ReviewWorkflowTests(unittest.TestCase): result = CliRunner().invoke( review_products.main, [ + "--giant-items-enriched-csv", + str(giant_items_csv), + "--costco-items-enriched-csv", + str(costco_items_csv), + "--giant-orders-csv", + str(giant_orders_csv), + "--costco-orders-csv", + str(costco_orders_csv), "--purchases-csv", str(purchases_csv), "--queue-csv", @@ -301,68 +381,47 @@ class ReviewWorkflowTests(unittest.TestCase): resolutions_csv = Path(tmpdir) / "review_resolutions.csv" catalog_csv = Path(tmpdir) / "catalog.csv" links_csv = Path(tmpdir) / "product_links.csv" - - with purchases_csv.open("w", newline="", encoding="utf-8") as handle: - writer = csv.DictWriter( - handle, - fieldnames=[ - "purchase_date", - "retailer", - "order_id", - "line_no", - "normalized_item_id", - "catalog_id", - "raw_item_name", - "normalized_item_name", - "image_url", - "upc", - "line_total", - ], - ) - writer.writeheader() - writer.writerows( - [ - { - "purchase_date": "2026-03-14", - "retailer": "costco", - "order_id": "c2", - "line_no": "2", - "normalized_item_id": "cnorm_mix", - "catalog_id": "", - "raw_item_name": "MIXED PEPPER 6-PACK", - "normalized_item_name": "MIXED PEPPER", - "image_url": "", - "upc": "", - "line_total": "7.49", - }, - { - "purchase_date": "2026-03-12", - "retailer": "costco", - "order_id": "c1", - "line_no": "1", - "normalized_item_id": "cnorm_mix", - "catalog_id": "", - "raw_item_name": "MIXED PEPPER 6-PACK", - "normalized_item_name": "MIXED PEPPER", - "image_url": "", - "upc": "", - "line_total": "6.99", - }, - { - "purchase_date": "2026-03-10", - "retailer": "giant", - "order_id": "g1", - "line_no": "1", - "normalized_item_id": "gnorm_mix", - "catalog_id": "cat_mix", - "raw_item_name": "MIXED PEPPER", - "normalized_item_name": "MIXED PEPPER", - "image_url": "", - "upc": "", - "line_total": "5.99", - }, - ] - ) + giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files( + tmpdir, + [ + { + "purchase_date": "2026-03-14", + "retailer": "costco", + "order_id": "c2", + "line_no": "2", + "normalized_item_id": "cnorm_mix", + "raw_item_name": "MIXED PEPPER 6-PACK", + "normalized_item_name": "MIXED PEPPER", + "image_url": "", + "upc": "", + "line_total": "7.49", + }, + { + "purchase_date": "2026-03-12", + "retailer": "costco", + "order_id": "c1", + "line_no": "1", + "normalized_item_id": "cnorm_mix", + "raw_item_name": "MIXED PEPPER 6-PACK", + "normalized_item_name": "MIXED PEPPER", + "image_url": "", + "upc": "", + "line_total": "6.99", + }, + { + "purchase_date": "2026-03-10", + "retailer": "giant", + "order_id": "g1", + "line_no": "1", + "normalized_item_id": "gnorm_mix", + "raw_item_name": "MIXED PEPPER", + "normalized_item_name": "MIXED PEPPER", + "image_url": "", + "upc": "", + "line_total": "5.99", + }, + ], + ) with catalog_csv.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) @@ -384,10 +443,33 @@ class ReviewWorkflowTests(unittest.TestCase): "updated_at": "", } ) + with links_csv.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.PRODUCT_LINK_FIELDS) + writer.writeheader() + writer.writerow( + { + "normalized_item_id": "gnorm_mix", + "catalog_id": "cat_mix", + "link_method": "manual_link", + "link_confidence": "high", + "review_status": "approved", + "reviewed_by": "", + "reviewed_at": "", + "link_notes": "", + } + ) result = CliRunner().invoke( review_products.main, [ + "--giant-items-enriched-csv", + str(giant_items_csv), + "--costco-items-enriched-csv", + str(costco_items_csv), + "--giant-orders-csv", + str(giant_orders_csv), + "--costco-orders-csv", + str(costco_orders_csv), "--purchases-csv", str(purchases_csv), "--queue-csv", @@ -422,40 +504,23 @@ class ReviewWorkflowTests(unittest.TestCase): resolutions_csv = Path(tmpdir) / "review_resolutions.csv" catalog_csv = Path(tmpdir) / "catalog.csv" links_csv = Path(tmpdir) / "product_links.csv" - - with purchases_csv.open("w", newline="", encoding="utf-8") as handle: - writer = csv.DictWriter( - handle, - fieldnames=[ - "purchase_date", - "retailer", - "order_id", - "line_no", - "normalized_item_id", - "catalog_id", - "raw_item_name", - "normalized_item_name", - "image_url", - "upc", - "line_total", - ], - ) - writer.writeheader() - writer.writerow( + giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files( + tmpdir, + [ { "purchase_date": "2026-03-14", "retailer": "giant", "order_id": "g1", "line_no": "1", "normalized_item_id": "gnorm_ice", - "catalog_id": "", "raw_item_name": "SB BAGGED ICE 20LB", "normalized_item_name": "BAGGED ICE", "image_url": "", "upc": "", "line_total": "3.50", } - ) + ], + ) with catalog_csv.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) @@ -481,6 +546,14 @@ class ReviewWorkflowTests(unittest.TestCase): result = CliRunner().invoke( review_products.main, [ + "--giant-items-enriched-csv", + str(giant_items_csv), + "--costco-items-enriched-csv", + str(costco_items_csv), + "--giant-orders-csv", + str(giant_orders_csv), + "--costco-orders-csv", + str(costco_orders_csv), "--purchases-csv", str(purchases_csv), "--queue-csv", @@ -506,40 +579,23 @@ class ReviewWorkflowTests(unittest.TestCase): resolutions_csv = Path(tmpdir) / "review_resolutions.csv" catalog_csv = Path(tmpdir) / "catalog.csv" links_csv = Path(tmpdir) / "product_links.csv" - - with purchases_csv.open("w", newline="", encoding="utf-8") as handle: - writer = csv.DictWriter( - handle, - fieldnames=[ - "purchase_date", - "retailer", - "order_id", - "line_no", - "normalized_item_id", - "catalog_id", - "raw_item_name", - "normalized_item_name", - "image_url", - "upc", - "line_total", - ], - ) - writer.writeheader() - writer.writerow( + giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files( + tmpdir, + [ { "purchase_date": "2026-03-14", "retailer": "giant", "order_id": "g1", "line_no": "1", "normalized_item_id": "gnorm_skip", - "catalog_id": "", "raw_item_name": "TEST ITEM", "normalized_item_name": "TEST ITEM", "image_url": "", "upc": "", "line_total": "1.00", } - ) + ], + ) with catalog_csv.open("w", newline="", encoding="utf-8") as handle: writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) @@ -548,6 +604,14 @@ class ReviewWorkflowTests(unittest.TestCase): result = CliRunner().invoke( review_products.main, [ + "--giant-items-enriched-csv", + str(giant_items_csv), + "--costco-items-enriched-csv", + str(costco_items_csv), + "--giant-orders-csv", + str(giant_orders_csv), + "--costco-orders-csv", + str(costco_orders_csv), "--purchases-csv", str(purchases_csv), "--queue-csv", @@ -578,30 +642,12 @@ class ReviewWorkflowTests(unittest.TestCase): resolutions_csv = Path(tmpdir) / "review_resolutions.csv" catalog_csv = Path(tmpdir) / "catalog.csv" links_csv = Path(tmpdir) / "product_links.csv" - - with purchases_csv.open("w", newline="", encoding="utf-8") as handle: - writer = csv.DictWriter( - handle, - fieldnames=[ - "purchase_date", - "normalized_item_id", - "catalog_id", - "retailer", - "raw_item_name", - "normalized_item_name", - "image_url", - "upc", - "line_total", - "order_id", - "line_no", - ], - ) - writer.writeheader() - writer.writerow( + giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files( + tmpdir, + [ { "purchase_date": "2026-03-15", "normalized_item_id": "gnorm_ice", - "catalog_id": "", "retailer": "giant", "raw_item_name": "SB BAGGED ICE 20LB", "normalized_item_name": "BAGGED ICE", @@ -611,7 +657,8 @@ class ReviewWorkflowTests(unittest.TestCase): "order_id": "g1", "line_no": "1", } - ) + ], + ) with mock.patch.object( review_products.click, @@ -619,6 +666,10 @@ class ReviewWorkflowTests(unittest.TestCase): side_effect=["n", "ICE", "frozen", "ice", "manual merge", "q"], ): review_products.main.callback( + giant_items_enriched_csv=str(giant_items_csv), + costco_items_enriched_csv=str(costco_items_csv), + giant_orders_csv=str(giant_orders_csv), + costco_orders_csv=str(costco_orders_csv), purchases_csv=str(purchases_csv), queue_csv=str(queue_csv), resolutions_csv=str(resolutions_csv), @@ -647,6 +698,63 @@ class ReviewWorkflowTests(unittest.TestCase): self.assertEqual("ICE", catalog_rows[0]["catalog_name"]) self.assertEqual(catalog_rows[0]["catalog_id"], link_rows[0]["catalog_id"]) + def test_build_review_queue_readds_orphaned_and_incomplete_links(self): + purchase_rows = [ + { + "normalized_item_id": "gnorm_orphan", + "catalog_id": "cat_missing", + "retailer": "giant", + "raw_item_name": "ORPHAN ITEM", + "normalized_item_name": "ORPHAN ITEM", + "upc": "", + "line_total": "3.50", + "is_fee": "false", + "is_discount_line": "false", + "is_coupon_line": "false", + }, + { + "normalized_item_id": "gnorm_incomplete", + "catalog_id": "cat_incomplete", + "retailer": "giant", + "raw_item_name": "INCOMPLETE ITEM", + "normalized_item_name": "INCOMPLETE ITEM", + "upc": "", + "line_total": "4.50", + "is_fee": "false", + "is_discount_line": "false", + "is_coupon_line": "false", + }, + ] + link_rows = [ + { + "normalized_item_id": "gnorm_orphan", + "catalog_id": "cat_missing", + }, + { + "normalized_item_id": "gnorm_incomplete", + "catalog_id": "cat_incomplete", + }, + ] + catalog_rows = [ + { + "catalog_id": "cat_incomplete", + "catalog_name": "INCOMPLETE ITEM", + "product_type": "", + } + ] + + queue_rows = review_products.build_review_queue( + purchase_rows, + [], + link_rows, + catalog_rows, + [], + ) + + reasons = {row["normalized_item_id"]: row["reason_code"] for row in queue_rows} + self.assertEqual("orphaned_catalog_link", reasons["gnorm_orphan"]) + self.assertEqual("incomplete_catalog_link", reasons["gnorm_incomplete"]) + if __name__ == "__main__": unittest.main()