Reconcile review queue against current catalog state

This commit is contained in:
ben
2026-03-23 15:32:41 -04:00
parent a93229408b
commit 8ccf3ff43b
3 changed files with 417 additions and 226 deletions

View File

@@ -27,9 +27,11 @@ def build_status_summary(
costco_enriched,
purchases,
resolutions,
links,
catalog,
):
normalized_rows = giant_enriched + costco_enriched
queue_rows = review_products.build_review_queue(purchases, resolutions)
queue_rows = review_products.build_review_queue(purchases, resolutions, links, catalog, [])
queue_ids = {row["normalized_item_id"] for row in queue_rows}
unresolved_purchase_rows = [
@@ -37,6 +39,7 @@ def build_status_summary(
for row in purchases
if row.get("normalized_item_id")
and not row.get("catalog_id")
and row.get("resolution_action") != "exclude"
and row.get("is_fee") != "true"
and row.get("is_discount_line") != "true"
and row.get("is_coupon_line") != "true"
@@ -84,6 +87,8 @@ def build_status_summary(
@click.option("--costco-enriched-csv", default="data/costco-web/normalized_items.csv", show_default=True)
@click.option("--purchases-csv", default="data/review/purchases.csv", show_default=True)
@click.option("--resolutions-csv", default="data/review/review_resolutions.csv", show_default=True)
@click.option("--links-csv", default="data/review/product_links.csv", show_default=True)
@click.option("--catalog-csv", default="data/catalog.csv", show_default=True)
@click.option("--summary-csv", default="data/review/pipeline_status.csv", show_default=True)
@click.option("--summary-json", default="data/review/pipeline_status.json", show_default=True)
def main(
@@ -95,6 +100,8 @@ def main(
costco_enriched_csv,
purchases_csv,
resolutions_csv,
links_csv,
catalog_csv,
summary_csv,
summary_json,
):
@@ -107,6 +114,8 @@ def main(
read_rows_if_exists(costco_enriched_csv),
read_rows_if_exists(purchases_csv),
[build_purchases.normalize_resolution_row(row) for row in read_rows_if_exists(resolutions_csv)],
[build_purchases.normalize_link_row(row) for row in read_rows_if_exists(links_csv)],
[build_purchases.normalize_catalog_row(row) for row in read_rows_if_exists(catalog_csv)],
)
write_csv_rows(summary_csv, summary_rows, SUMMARY_FIELDS)
summary_json_path = Path(summary_json)

View File

@@ -31,6 +31,7 @@ INFO_COLOR = "cyan"
PROMPT_COLOR = "bright_yellow"
WARNING_COLOR = "magenta"
TOKEN_RE = re.compile(r"[A-Z0-9]+")
REQUIRED_CATALOG_FIELDS = ("catalog_name", "product_type")
def print_intro_text():
@@ -40,9 +41,37 @@ def print_intro_text():
click.echo(" category: broad analysis bucket such as dairy, produce, or frozen")
def build_review_queue(purchase_rows, resolution_rows):
def has_complete_catalog_row(catalog_row):
if not catalog_row:
return False
return all(catalog_row.get(field, "").strip() for field in REQUIRED_CATALOG_FIELDS)
def load_queue_lookup(queue_rows):
lookup = {}
for row in queue_rows:
normalized_item_id = row.get("normalized_item_id", "")
if normalized_item_id:
lookup[normalized_item_id] = row
return lookup
def build_review_queue(
purchase_rows,
resolution_rows,
link_rows=None,
catalog_rows=None,
existing_queue_rows=None,
):
by_normalized = defaultdict(list)
resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
link_lookup = build_purchases.load_link_lookup(link_rows or [])
catalog_lookup = {
row.get("catalog_id", ""): build_purchases.normalize_catalog_row(row)
for row in (catalog_rows or [])
if row.get("catalog_id", "")
}
queue_lookup = load_queue_lookup(existing_queue_rows or [])
for row in purchase_rows:
normalized_item_id = row.get("normalized_item_id", "")
@@ -54,30 +83,40 @@ def build_review_queue(purchase_rows, resolution_rows):
queue_rows = []
for normalized_item_id, rows in sorted(by_normalized.items()):
current_resolution = resolution_lookup.get(normalized_item_id, {})
if current_resolution.get("status") == "approved":
if current_resolution.get("status") == "approved" and current_resolution.get("resolution_action") == "exclude":
continue
existing_queue_row = queue_lookup.get(normalized_item_id, {})
linked_catalog_id = current_resolution.get("catalog_id") or link_lookup.get(normalized_item_id, {}).get("catalog_id", "")
linked_catalog_row = catalog_lookup.get(linked_catalog_id, {})
has_valid_catalog_link = bool(linked_catalog_id and has_complete_catalog_row(linked_catalog_row))
unresolved_rows = [
row
for row in rows
if not row.get("catalog_id")
and row.get("is_item", "true") != "false"
if row.get("is_item", "true") != "false"
and row.get("is_fee") != "true"
and row.get("is_discount_line") != "true"
and row.get("is_coupon_line") != "true"
]
if not unresolved_rows:
if not unresolved_rows or has_valid_catalog_link:
continue
retailers = sorted({row["retailer"] for row in rows})
review_id = stable_id("rvw", normalized_item_id)
reason_code = "missing_catalog_link"
if linked_catalog_id and linked_catalog_id not in catalog_lookup:
reason_code = "orphaned_catalog_link"
elif linked_catalog_id and not has_complete_catalog_row(linked_catalog_row):
reason_code = "incomplete_catalog_link"
queue_rows.append(
{
"review_id": review_id,
"retailer": " | ".join(retailers),
"normalized_item_id": normalized_item_id,
"catalog_id": current_resolution.get("catalog_id", ""),
"reason_code": "missing_catalog_link",
"catalog_id": linked_catalog_id,
"reason_code": reason_code,
"priority": "high",
"raw_item_names": compact_join(
sorted({row["raw_item_name"] for row in rows if row["raw_item_name"]}),
@@ -102,10 +141,13 @@ def build_review_queue(purchase_rows, resolution_rows):
limit=8,
),
"seen_count": str(len(rows)),
"status": current_resolution.get("status", "pending"),
"resolution_action": current_resolution.get("resolution_action", ""),
"resolution_notes": current_resolution.get("resolution_notes", ""),
"created_at": current_resolution.get("reviewed_at", today_text),
"status": existing_queue_row.get("status") or current_resolution.get("status", "pending"),
"resolution_action": existing_queue_row.get("resolution_action")
or current_resolution.get("resolution_action", ""),
"resolution_notes": existing_queue_row.get("resolution_notes")
or current_resolution.get("resolution_notes", ""),
"created_at": existing_queue_row.get("created_at")
or current_resolution.get("reviewed_at", today_text),
"updated_at": today_text,
}
)
@@ -516,6 +558,10 @@ def link_rows_from_state(link_lookup):
@click.command()
@click.option("--giant-items-enriched-csv", default="data/giant-web/normalized_items.csv", show_default=True)
@click.option("--costco-items-enriched-csv", default="data/costco-web/normalized_items.csv", show_default=True)
@click.option("--giant-orders-csv", default="data/giant-web/collected_orders.csv", show_default=True)
@click.option("--costco-orders-csv", default="data/costco-web/collected_orders.csv", show_default=True)
@click.option("--purchases-csv", default="data/review/purchases.csv", show_default=True)
@click.option("--queue-csv", default="data/review/review_queue.csv", show_default=True)
@click.option("--resolutions-csv", default="data/review/review_resolutions.csv", show_default=True)
@@ -523,12 +569,40 @@ def link_rows_from_state(link_lookup):
@click.option("--links-csv", default="data/review/product_links.csv", show_default=True)
@click.option("--limit", default=0, show_default=True, type=int)
@click.option("--refresh-only", is_flag=True, help="Only rebuild review_queue.csv without prompting.")
def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, links_csv, limit, refresh_only):
purchase_rows = build_purchases.read_optional_csv_rows(purchases_csv)
def main(
giant_items_enriched_csv,
costco_items_enriched_csv,
giant_orders_csv,
costco_orders_csv,
purchases_csv,
queue_csv,
resolutions_csv,
catalog_csv,
links_csv,
limit,
refresh_only,
):
resolution_rows = build_purchases.read_optional_csv_rows(resolutions_csv)
catalog_rows = build_purchases.merge_catalog_rows(build_purchases.read_optional_csv_rows(catalog_csv), [])
link_lookup = build_purchases.load_link_lookup(build_purchases.read_optional_csv_rows(links_csv))
queue_rows = build_review_queue(purchase_rows, resolution_rows)
link_rows = build_purchases.read_optional_csv_rows(links_csv)
purchase_rows, refreshed_link_rows = build_purchases.build_purchase_rows(
build_purchases.read_optional_csv_rows(giant_items_enriched_csv),
build_purchases.read_optional_csv_rows(costco_items_enriched_csv),
build_purchases.read_optional_csv_rows(giant_orders_csv),
build_purchases.read_optional_csv_rows(costco_orders_csv),
resolution_rows,
link_rows,
catalog_rows,
)
build_purchases.write_csv_rows(purchases_csv, purchase_rows, build_purchases.PURCHASE_FIELDS)
link_lookup = build_purchases.load_link_lookup(refreshed_link_rows)
queue_rows = build_review_queue(
purchase_rows,
resolution_rows,
refreshed_link_rows,
catalog_rows,
build_purchases.read_optional_csv_rows(queue_csv),
)
write_csv_rows(queue_csv, queue_rows, QUEUE_FIELDS)
click.echo(f"wrote {len(queue_rows)} rows to {queue_csv}")

View File

@@ -6,9 +6,94 @@ from unittest import mock
from click.testing import CliRunner
import enrich_costco
import review_products
def write_review_source_files(tmpdir, rows):
giant_items_csv = Path(tmpdir) / "giant_items.csv"
costco_items_csv = Path(tmpdir) / "costco_items.csv"
giant_orders_csv = Path(tmpdir) / "giant_orders.csv"
costco_orders_csv = Path(tmpdir) / "costco_orders.csv"
fieldnames = enrich_costco.OUTPUT_FIELDS
grouped_rows = {"giant": [], "costco": []}
grouped_orders = {"giant": {}, "costco": {}}
for index, row in enumerate(rows, start=1):
retailer = row.get("retailer", "giant")
normalized_row = {field: "" for field in fieldnames}
normalized_row.update(
{
"retailer": retailer,
"order_id": row.get("order_id", f"{retailer[0]}{index}"),
"line_no": row.get("line_no", str(index)),
"normalized_row_id": row.get(
"normalized_row_id",
f"{retailer}:{row.get('order_id', f'{retailer[0]}{index}')}:{row.get('line_no', str(index))}",
),
"normalized_item_id": row.get("normalized_item_id", ""),
"order_date": row.get("purchase_date", ""),
"item_name": row.get("raw_item_name", ""),
"item_name_norm": row.get("normalized_item_name", ""),
"image_url": row.get("image_url", ""),
"upc": row.get("upc", ""),
"line_total": row.get("line_total", ""),
"net_line_total": row.get("net_line_total", ""),
"matched_discount_amount": row.get("matched_discount_amount", ""),
"qty": row.get("qty", "1"),
"unit": row.get("unit", "EA"),
"normalized_quantity": row.get("normalized_quantity", ""),
"normalized_quantity_unit": row.get("normalized_quantity_unit", ""),
"size_value": row.get("size_value", ""),
"size_unit": row.get("size_unit", ""),
"pack_qty": row.get("pack_qty", ""),
"measure_type": row.get("measure_type", "each"),
"retailer_item_id": row.get("retailer_item_id", ""),
"price_per_each": row.get("price_per_each", ""),
"price_per_lb": row.get("price_per_lb", ""),
"price_per_oz": row.get("price_per_oz", ""),
"is_discount_line": row.get("is_discount_line", "false"),
"is_coupon_line": row.get("is_coupon_line", "false"),
"is_fee": row.get("is_fee", "false"),
"raw_order_path": row.get("raw_order_path", ""),
}
)
grouped_rows[retailer].append(normalized_row)
order_id = normalized_row["order_id"]
grouped_orders[retailer].setdefault(
order_id,
{
"order_id": order_id,
"store_name": row.get("store_name", ""),
"store_number": row.get("store_number", ""),
"store_city": row.get("store_city", ""),
"store_state": row.get("store_state", ""),
},
)
for path, source_rows in [
(giant_items_csv, grouped_rows["giant"]),
(costco_items_csv, grouped_rows["costco"]),
]:
with path.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(source_rows)
order_fields = ["order_id", "store_name", "store_number", "store_city", "store_state"]
for path, source_rows in [
(giant_orders_csv, grouped_orders["giant"].values()),
(costco_orders_csv, grouped_orders["costco"].values()),
]:
with path.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=order_fields)
writer.writeheader()
writer.writerows(source_rows)
return giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv
class ReviewWorkflowTests(unittest.TestCase):
def test_build_review_queue_groups_unresolved_purchases(self):
queue_rows = review_products.build_review_queue(
@@ -114,66 +199,47 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv"
purchase_fields = [
"purchase_date",
"retailer",
"order_id",
"line_no",
"normalized_item_id",
"catalog_id",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
]
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=purchase_fields)
writer.writeheader()
writer.writerows(
[
{
"purchase_date": "2026-03-14",
"retailer": "costco",
"order_id": "c2",
"line_no": "2",
"normalized_item_id": "cnorm_mix",
"catalog_id": "",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "7.49",
},
{
"purchase_date": "2026-03-12",
"retailer": "costco",
"order_id": "c1",
"line_no": "1",
"normalized_item_id": "cnorm_mix",
"catalog_id": "",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "https://example.test/mixed-pepper.jpg",
"upc": "",
"line_total": "6.99",
},
{
"purchase_date": "2026-03-10",
"retailer": "giant",
"order_id": "g1",
"line_no": "1",
"normalized_item_id": "gnorm_mix",
"catalog_id": "cat_mix",
"raw_item_name": "MIXED PEPPER",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "5.99",
},
]
)
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
tmpdir,
[
{
"purchase_date": "2026-03-14",
"retailer": "costco",
"order_id": "c2",
"line_no": "2",
"normalized_item_id": "cnorm_mix",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "7.49",
},
{
"purchase_date": "2026-03-12",
"retailer": "costco",
"order_id": "c1",
"line_no": "1",
"normalized_item_id": "cnorm_mix",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "https://example.test/mixed-pepper.jpg",
"upc": "",
"line_total": "6.99",
},
{
"purchase_date": "2026-03-10",
"retailer": "giant",
"order_id": "g1",
"line_no": "1",
"normalized_item_id": "gnorm_mix",
"raw_item_name": "MIXED PEPPER",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "5.99",
},
],
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
@@ -195,11 +261,34 @@ class ReviewWorkflowTests(unittest.TestCase):
"updated_at": "",
}
)
with links_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.PRODUCT_LINK_FIELDS)
writer.writeheader()
writer.writerow(
{
"normalized_item_id": "gnorm_mix",
"catalog_id": "cat_mix",
"link_method": "manual_link",
"link_confidence": "high",
"review_status": "approved",
"reviewed_by": "",
"reviewed_at": "",
"link_notes": "",
}
)
runner = CliRunner()
result = runner.invoke(
review_products.main,
[
"--giant-items-enriched-csv",
str(giant_items_csv),
"--costco-items-enriched-csv",
str(costco_items_csv),
"--giant-orders-csv",
str(giant_orders_csv),
"--costco-orders-csv",
str(costco_orders_csv),
"--purchases-csv",
str(purchases_csv),
"--queue-csv",
@@ -234,40 +323,23 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv"
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(
handle,
fieldnames=[
"purchase_date",
"retailer",
"order_id",
"line_no",
"normalized_item_id",
"catalog_id",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
],
)
writer.writeheader()
writer.writerow(
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
tmpdir,
[
{
"purchase_date": "2026-03-14",
"retailer": "giant",
"order_id": "g1",
"line_no": "1",
"normalized_item_id": "gnorm_ice",
"catalog_id": "",
"raw_item_name": "SB BAGGED ICE 20LB",
"normalized_item_name": "BAGGED ICE",
"image_url": "",
"upc": "",
"line_total": "3.50",
}
)
],
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
@@ -276,6 +348,14 @@ class ReviewWorkflowTests(unittest.TestCase):
result = CliRunner().invoke(
review_products.main,
[
"--giant-items-enriched-csv",
str(giant_items_csv),
"--costco-items-enriched-csv",
str(costco_items_csv),
"--giant-orders-csv",
str(giant_orders_csv),
"--costco-orders-csv",
str(costco_orders_csv),
"--purchases-csv",
str(purchases_csv),
"--queue-csv",
@@ -301,68 +381,47 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv"
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(
handle,
fieldnames=[
"purchase_date",
"retailer",
"order_id",
"line_no",
"normalized_item_id",
"catalog_id",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
],
)
writer.writeheader()
writer.writerows(
[
{
"purchase_date": "2026-03-14",
"retailer": "costco",
"order_id": "c2",
"line_no": "2",
"normalized_item_id": "cnorm_mix",
"catalog_id": "",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "7.49",
},
{
"purchase_date": "2026-03-12",
"retailer": "costco",
"order_id": "c1",
"line_no": "1",
"normalized_item_id": "cnorm_mix",
"catalog_id": "",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "6.99",
},
{
"purchase_date": "2026-03-10",
"retailer": "giant",
"order_id": "g1",
"line_no": "1",
"normalized_item_id": "gnorm_mix",
"catalog_id": "cat_mix",
"raw_item_name": "MIXED PEPPER",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "5.99",
},
]
)
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
tmpdir,
[
{
"purchase_date": "2026-03-14",
"retailer": "costco",
"order_id": "c2",
"line_no": "2",
"normalized_item_id": "cnorm_mix",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "7.49",
},
{
"purchase_date": "2026-03-12",
"retailer": "costco",
"order_id": "c1",
"line_no": "1",
"normalized_item_id": "cnorm_mix",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "6.99",
},
{
"purchase_date": "2026-03-10",
"retailer": "giant",
"order_id": "g1",
"line_no": "1",
"normalized_item_id": "gnorm_mix",
"raw_item_name": "MIXED PEPPER",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "5.99",
},
],
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
@@ -384,10 +443,33 @@ class ReviewWorkflowTests(unittest.TestCase):
"updated_at": "",
}
)
with links_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.PRODUCT_LINK_FIELDS)
writer.writeheader()
writer.writerow(
{
"normalized_item_id": "gnorm_mix",
"catalog_id": "cat_mix",
"link_method": "manual_link",
"link_confidence": "high",
"review_status": "approved",
"reviewed_by": "",
"reviewed_at": "",
"link_notes": "",
}
)
result = CliRunner().invoke(
review_products.main,
[
"--giant-items-enriched-csv",
str(giant_items_csv),
"--costco-items-enriched-csv",
str(costco_items_csv),
"--giant-orders-csv",
str(giant_orders_csv),
"--costco-orders-csv",
str(costco_orders_csv),
"--purchases-csv",
str(purchases_csv),
"--queue-csv",
@@ -422,40 +504,23 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv"
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(
handle,
fieldnames=[
"purchase_date",
"retailer",
"order_id",
"line_no",
"normalized_item_id",
"catalog_id",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
],
)
writer.writeheader()
writer.writerow(
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
tmpdir,
[
{
"purchase_date": "2026-03-14",
"retailer": "giant",
"order_id": "g1",
"line_no": "1",
"normalized_item_id": "gnorm_ice",
"catalog_id": "",
"raw_item_name": "SB BAGGED ICE 20LB",
"normalized_item_name": "BAGGED ICE",
"image_url": "",
"upc": "",
"line_total": "3.50",
}
)
],
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
@@ -481,6 +546,14 @@ class ReviewWorkflowTests(unittest.TestCase):
result = CliRunner().invoke(
review_products.main,
[
"--giant-items-enriched-csv",
str(giant_items_csv),
"--costco-items-enriched-csv",
str(costco_items_csv),
"--giant-orders-csv",
str(giant_orders_csv),
"--costco-orders-csv",
str(costco_orders_csv),
"--purchases-csv",
str(purchases_csv),
"--queue-csv",
@@ -506,40 +579,23 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv"
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(
handle,
fieldnames=[
"purchase_date",
"retailer",
"order_id",
"line_no",
"normalized_item_id",
"catalog_id",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
],
)
writer.writeheader()
writer.writerow(
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
tmpdir,
[
{
"purchase_date": "2026-03-14",
"retailer": "giant",
"order_id": "g1",
"line_no": "1",
"normalized_item_id": "gnorm_skip",
"catalog_id": "",
"raw_item_name": "TEST ITEM",
"normalized_item_name": "TEST ITEM",
"image_url": "",
"upc": "",
"line_total": "1.00",
}
)
],
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
@@ -548,6 +604,14 @@ class ReviewWorkflowTests(unittest.TestCase):
result = CliRunner().invoke(
review_products.main,
[
"--giant-items-enriched-csv",
str(giant_items_csv),
"--costco-items-enriched-csv",
str(costco_items_csv),
"--giant-orders-csv",
str(giant_orders_csv),
"--costco-orders-csv",
str(costco_orders_csv),
"--purchases-csv",
str(purchases_csv),
"--queue-csv",
@@ -578,30 +642,12 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv"
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(
handle,
fieldnames=[
"purchase_date",
"normalized_item_id",
"catalog_id",
"retailer",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
"order_id",
"line_no",
],
)
writer.writeheader()
writer.writerow(
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
tmpdir,
[
{
"purchase_date": "2026-03-15",
"normalized_item_id": "gnorm_ice",
"catalog_id": "",
"retailer": "giant",
"raw_item_name": "SB BAGGED ICE 20LB",
"normalized_item_name": "BAGGED ICE",
@@ -611,7 +657,8 @@ class ReviewWorkflowTests(unittest.TestCase):
"order_id": "g1",
"line_no": "1",
}
)
],
)
with mock.patch.object(
review_products.click,
@@ -619,6 +666,10 @@ class ReviewWorkflowTests(unittest.TestCase):
side_effect=["n", "ICE", "frozen", "ice", "manual merge", "q"],
):
review_products.main.callback(
giant_items_enriched_csv=str(giant_items_csv),
costco_items_enriched_csv=str(costco_items_csv),
giant_orders_csv=str(giant_orders_csv),
costco_orders_csv=str(costco_orders_csv),
purchases_csv=str(purchases_csv),
queue_csv=str(queue_csv),
resolutions_csv=str(resolutions_csv),
@@ -647,6 +698,63 @@ class ReviewWorkflowTests(unittest.TestCase):
self.assertEqual("ICE", catalog_rows[0]["catalog_name"])
self.assertEqual(catalog_rows[0]["catalog_id"], link_rows[0]["catalog_id"])
def test_build_review_queue_readds_orphaned_and_incomplete_links(self):
purchase_rows = [
{
"normalized_item_id": "gnorm_orphan",
"catalog_id": "cat_missing",
"retailer": "giant",
"raw_item_name": "ORPHAN ITEM",
"normalized_item_name": "ORPHAN ITEM",
"upc": "",
"line_total": "3.50",
"is_fee": "false",
"is_discount_line": "false",
"is_coupon_line": "false",
},
{
"normalized_item_id": "gnorm_incomplete",
"catalog_id": "cat_incomplete",
"retailer": "giant",
"raw_item_name": "INCOMPLETE ITEM",
"normalized_item_name": "INCOMPLETE ITEM",
"upc": "",
"line_total": "4.50",
"is_fee": "false",
"is_discount_line": "false",
"is_coupon_line": "false",
},
]
link_rows = [
{
"normalized_item_id": "gnorm_orphan",
"catalog_id": "cat_missing",
},
{
"normalized_item_id": "gnorm_incomplete",
"catalog_id": "cat_incomplete",
},
]
catalog_rows = [
{
"catalog_id": "cat_incomplete",
"catalog_name": "INCOMPLETE ITEM",
"product_type": "",
}
]
queue_rows = review_products.build_review_queue(
purchase_rows,
[],
link_rows,
catalog_rows,
[],
)
reasons = {row["normalized_item_id"]: row["reason_code"] for row in queue_rows}
self.assertEqual("orphaned_catalog_link", reasons["gnorm_orphan"])
self.assertEqual("incomplete_catalog_link", reasons["gnorm_incomplete"])
if __name__ == "__main__":
unittest.main()