Reconcile review queue against current catalog state

This commit is contained in:
ben
2026-03-23 15:32:41 -04:00
parent a93229408b
commit 8ccf3ff43b
3 changed files with 417 additions and 226 deletions

View File

@@ -27,9 +27,11 @@ def build_status_summary(
costco_enriched, costco_enriched,
purchases, purchases,
resolutions, resolutions,
links,
catalog,
): ):
normalized_rows = giant_enriched + costco_enriched normalized_rows = giant_enriched + costco_enriched
queue_rows = review_products.build_review_queue(purchases, resolutions) queue_rows = review_products.build_review_queue(purchases, resolutions, links, catalog, [])
queue_ids = {row["normalized_item_id"] for row in queue_rows} queue_ids = {row["normalized_item_id"] for row in queue_rows}
unresolved_purchase_rows = [ unresolved_purchase_rows = [
@@ -37,6 +39,7 @@ def build_status_summary(
for row in purchases for row in purchases
if row.get("normalized_item_id") if row.get("normalized_item_id")
and not row.get("catalog_id") and not row.get("catalog_id")
and row.get("resolution_action") != "exclude"
and row.get("is_fee") != "true" and row.get("is_fee") != "true"
and row.get("is_discount_line") != "true" and row.get("is_discount_line") != "true"
and row.get("is_coupon_line") != "true" and row.get("is_coupon_line") != "true"
@@ -84,6 +87,8 @@ def build_status_summary(
@click.option("--costco-enriched-csv", default="data/costco-web/normalized_items.csv", show_default=True) @click.option("--costco-enriched-csv", default="data/costco-web/normalized_items.csv", show_default=True)
@click.option("--purchases-csv", default="data/review/purchases.csv", show_default=True) @click.option("--purchases-csv", default="data/review/purchases.csv", show_default=True)
@click.option("--resolutions-csv", default="data/review/review_resolutions.csv", show_default=True) @click.option("--resolutions-csv", default="data/review/review_resolutions.csv", show_default=True)
@click.option("--links-csv", default="data/review/product_links.csv", show_default=True)
@click.option("--catalog-csv", default="data/catalog.csv", show_default=True)
@click.option("--summary-csv", default="data/review/pipeline_status.csv", show_default=True) @click.option("--summary-csv", default="data/review/pipeline_status.csv", show_default=True)
@click.option("--summary-json", default="data/review/pipeline_status.json", show_default=True) @click.option("--summary-json", default="data/review/pipeline_status.json", show_default=True)
def main( def main(
@@ -95,6 +100,8 @@ def main(
costco_enriched_csv, costco_enriched_csv,
purchases_csv, purchases_csv,
resolutions_csv, resolutions_csv,
links_csv,
catalog_csv,
summary_csv, summary_csv,
summary_json, summary_json,
): ):
@@ -107,6 +114,8 @@ def main(
read_rows_if_exists(costco_enriched_csv), read_rows_if_exists(costco_enriched_csv),
read_rows_if_exists(purchases_csv), read_rows_if_exists(purchases_csv),
[build_purchases.normalize_resolution_row(row) for row in read_rows_if_exists(resolutions_csv)], [build_purchases.normalize_resolution_row(row) for row in read_rows_if_exists(resolutions_csv)],
[build_purchases.normalize_link_row(row) for row in read_rows_if_exists(links_csv)],
[build_purchases.normalize_catalog_row(row) for row in read_rows_if_exists(catalog_csv)],
) )
write_csv_rows(summary_csv, summary_rows, SUMMARY_FIELDS) write_csv_rows(summary_csv, summary_rows, SUMMARY_FIELDS)
summary_json_path = Path(summary_json) summary_json_path = Path(summary_json)

View File

@@ -31,6 +31,7 @@ INFO_COLOR = "cyan"
PROMPT_COLOR = "bright_yellow" PROMPT_COLOR = "bright_yellow"
WARNING_COLOR = "magenta" WARNING_COLOR = "magenta"
TOKEN_RE = re.compile(r"[A-Z0-9]+") TOKEN_RE = re.compile(r"[A-Z0-9]+")
REQUIRED_CATALOG_FIELDS = ("catalog_name", "product_type")
def print_intro_text(): def print_intro_text():
@@ -40,9 +41,37 @@ def print_intro_text():
click.echo(" category: broad analysis bucket such as dairy, produce, or frozen") click.echo(" category: broad analysis bucket such as dairy, produce, or frozen")
def build_review_queue(purchase_rows, resolution_rows): def has_complete_catalog_row(catalog_row):
if not catalog_row:
return False
return all(catalog_row.get(field, "").strip() for field in REQUIRED_CATALOG_FIELDS)
def load_queue_lookup(queue_rows):
lookup = {}
for row in queue_rows:
normalized_item_id = row.get("normalized_item_id", "")
if normalized_item_id:
lookup[normalized_item_id] = row
return lookup
def build_review_queue(
purchase_rows,
resolution_rows,
link_rows=None,
catalog_rows=None,
existing_queue_rows=None,
):
by_normalized = defaultdict(list) by_normalized = defaultdict(list)
resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows) resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
link_lookup = build_purchases.load_link_lookup(link_rows or [])
catalog_lookup = {
row.get("catalog_id", ""): build_purchases.normalize_catalog_row(row)
for row in (catalog_rows or [])
if row.get("catalog_id", "")
}
queue_lookup = load_queue_lookup(existing_queue_rows or [])
for row in purchase_rows: for row in purchase_rows:
normalized_item_id = row.get("normalized_item_id", "") normalized_item_id = row.get("normalized_item_id", "")
@@ -54,30 +83,40 @@ def build_review_queue(purchase_rows, resolution_rows):
queue_rows = [] queue_rows = []
for normalized_item_id, rows in sorted(by_normalized.items()): for normalized_item_id, rows in sorted(by_normalized.items()):
current_resolution = resolution_lookup.get(normalized_item_id, {}) current_resolution = resolution_lookup.get(normalized_item_id, {})
if current_resolution.get("status") == "approved": if current_resolution.get("status") == "approved" and current_resolution.get("resolution_action") == "exclude":
continue continue
existing_queue_row = queue_lookup.get(normalized_item_id, {})
linked_catalog_id = current_resolution.get("catalog_id") or link_lookup.get(normalized_item_id, {}).get("catalog_id", "")
linked_catalog_row = catalog_lookup.get(linked_catalog_id, {})
has_valid_catalog_link = bool(linked_catalog_id and has_complete_catalog_row(linked_catalog_row))
unresolved_rows = [ unresolved_rows = [
row row
for row in rows for row in rows
if not row.get("catalog_id") if row.get("is_item", "true") != "false"
and row.get("is_item", "true") != "false"
and row.get("is_fee") != "true" and row.get("is_fee") != "true"
and row.get("is_discount_line") != "true" and row.get("is_discount_line") != "true"
and row.get("is_coupon_line") != "true" and row.get("is_coupon_line") != "true"
] ]
if not unresolved_rows: if not unresolved_rows or has_valid_catalog_link:
continue continue
retailers = sorted({row["retailer"] for row in rows}) retailers = sorted({row["retailer"] for row in rows})
review_id = stable_id("rvw", normalized_item_id) review_id = stable_id("rvw", normalized_item_id)
reason_code = "missing_catalog_link"
if linked_catalog_id and linked_catalog_id not in catalog_lookup:
reason_code = "orphaned_catalog_link"
elif linked_catalog_id and not has_complete_catalog_row(linked_catalog_row):
reason_code = "incomplete_catalog_link"
queue_rows.append( queue_rows.append(
{ {
"review_id": review_id, "review_id": review_id,
"retailer": " | ".join(retailers), "retailer": " | ".join(retailers),
"normalized_item_id": normalized_item_id, "normalized_item_id": normalized_item_id,
"catalog_id": current_resolution.get("catalog_id", ""), "catalog_id": linked_catalog_id,
"reason_code": "missing_catalog_link", "reason_code": reason_code,
"priority": "high", "priority": "high",
"raw_item_names": compact_join( "raw_item_names": compact_join(
sorted({row["raw_item_name"] for row in rows if row["raw_item_name"]}), sorted({row["raw_item_name"] for row in rows if row["raw_item_name"]}),
@@ -102,10 +141,13 @@ def build_review_queue(purchase_rows, resolution_rows):
limit=8, limit=8,
), ),
"seen_count": str(len(rows)), "seen_count": str(len(rows)),
"status": current_resolution.get("status", "pending"), "status": existing_queue_row.get("status") or current_resolution.get("status", "pending"),
"resolution_action": current_resolution.get("resolution_action", ""), "resolution_action": existing_queue_row.get("resolution_action")
"resolution_notes": current_resolution.get("resolution_notes", ""), or current_resolution.get("resolution_action", ""),
"created_at": current_resolution.get("reviewed_at", today_text), "resolution_notes": existing_queue_row.get("resolution_notes")
or current_resolution.get("resolution_notes", ""),
"created_at": existing_queue_row.get("created_at")
or current_resolution.get("reviewed_at", today_text),
"updated_at": today_text, "updated_at": today_text,
} }
) )
@@ -516,6 +558,10 @@ def link_rows_from_state(link_lookup):
@click.command() @click.command()
@click.option("--giant-items-enriched-csv", default="data/giant-web/normalized_items.csv", show_default=True)
@click.option("--costco-items-enriched-csv", default="data/costco-web/normalized_items.csv", show_default=True)
@click.option("--giant-orders-csv", default="data/giant-web/collected_orders.csv", show_default=True)
@click.option("--costco-orders-csv", default="data/costco-web/collected_orders.csv", show_default=True)
@click.option("--purchases-csv", default="data/review/purchases.csv", show_default=True) @click.option("--purchases-csv", default="data/review/purchases.csv", show_default=True)
@click.option("--queue-csv", default="data/review/review_queue.csv", show_default=True) @click.option("--queue-csv", default="data/review/review_queue.csv", show_default=True)
@click.option("--resolutions-csv", default="data/review/review_resolutions.csv", show_default=True) @click.option("--resolutions-csv", default="data/review/review_resolutions.csv", show_default=True)
@@ -523,12 +569,40 @@ def link_rows_from_state(link_lookup):
@click.option("--links-csv", default="data/review/product_links.csv", show_default=True) @click.option("--links-csv", default="data/review/product_links.csv", show_default=True)
@click.option("--limit", default=0, show_default=True, type=int) @click.option("--limit", default=0, show_default=True, type=int)
@click.option("--refresh-only", is_flag=True, help="Only rebuild review_queue.csv without prompting.") @click.option("--refresh-only", is_flag=True, help="Only rebuild review_queue.csv without prompting.")
def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, links_csv, limit, refresh_only): def main(
purchase_rows = build_purchases.read_optional_csv_rows(purchases_csv) giant_items_enriched_csv,
costco_items_enriched_csv,
giant_orders_csv,
costco_orders_csv,
purchases_csv,
queue_csv,
resolutions_csv,
catalog_csv,
links_csv,
limit,
refresh_only,
):
resolution_rows = build_purchases.read_optional_csv_rows(resolutions_csv) resolution_rows = build_purchases.read_optional_csv_rows(resolutions_csv)
catalog_rows = build_purchases.merge_catalog_rows(build_purchases.read_optional_csv_rows(catalog_csv), []) catalog_rows = build_purchases.merge_catalog_rows(build_purchases.read_optional_csv_rows(catalog_csv), [])
link_lookup = build_purchases.load_link_lookup(build_purchases.read_optional_csv_rows(links_csv)) link_rows = build_purchases.read_optional_csv_rows(links_csv)
queue_rows = build_review_queue(purchase_rows, resolution_rows) purchase_rows, refreshed_link_rows = build_purchases.build_purchase_rows(
build_purchases.read_optional_csv_rows(giant_items_enriched_csv),
build_purchases.read_optional_csv_rows(costco_items_enriched_csv),
build_purchases.read_optional_csv_rows(giant_orders_csv),
build_purchases.read_optional_csv_rows(costco_orders_csv),
resolution_rows,
link_rows,
catalog_rows,
)
build_purchases.write_csv_rows(purchases_csv, purchase_rows, build_purchases.PURCHASE_FIELDS)
link_lookup = build_purchases.load_link_lookup(refreshed_link_rows)
queue_rows = build_review_queue(
purchase_rows,
resolution_rows,
refreshed_link_rows,
catalog_rows,
build_purchases.read_optional_csv_rows(queue_csv),
)
write_csv_rows(queue_csv, queue_rows, QUEUE_FIELDS) write_csv_rows(queue_csv, queue_rows, QUEUE_FIELDS)
click.echo(f"wrote {len(queue_rows)} rows to {queue_csv}") click.echo(f"wrote {len(queue_rows)} rows to {queue_csv}")

View File

@@ -6,9 +6,94 @@ from unittest import mock
from click.testing import CliRunner from click.testing import CliRunner
import enrich_costco
import review_products import review_products
def write_review_source_files(tmpdir, rows):
giant_items_csv = Path(tmpdir) / "giant_items.csv"
costco_items_csv = Path(tmpdir) / "costco_items.csv"
giant_orders_csv = Path(tmpdir) / "giant_orders.csv"
costco_orders_csv = Path(tmpdir) / "costco_orders.csv"
fieldnames = enrich_costco.OUTPUT_FIELDS
grouped_rows = {"giant": [], "costco": []}
grouped_orders = {"giant": {}, "costco": {}}
for index, row in enumerate(rows, start=1):
retailer = row.get("retailer", "giant")
normalized_row = {field: "" for field in fieldnames}
normalized_row.update(
{
"retailer": retailer,
"order_id": row.get("order_id", f"{retailer[0]}{index}"),
"line_no": row.get("line_no", str(index)),
"normalized_row_id": row.get(
"normalized_row_id",
f"{retailer}:{row.get('order_id', f'{retailer[0]}{index}')}:{row.get('line_no', str(index))}",
),
"normalized_item_id": row.get("normalized_item_id", ""),
"order_date": row.get("purchase_date", ""),
"item_name": row.get("raw_item_name", ""),
"item_name_norm": row.get("normalized_item_name", ""),
"image_url": row.get("image_url", ""),
"upc": row.get("upc", ""),
"line_total": row.get("line_total", ""),
"net_line_total": row.get("net_line_total", ""),
"matched_discount_amount": row.get("matched_discount_amount", ""),
"qty": row.get("qty", "1"),
"unit": row.get("unit", "EA"),
"normalized_quantity": row.get("normalized_quantity", ""),
"normalized_quantity_unit": row.get("normalized_quantity_unit", ""),
"size_value": row.get("size_value", ""),
"size_unit": row.get("size_unit", ""),
"pack_qty": row.get("pack_qty", ""),
"measure_type": row.get("measure_type", "each"),
"retailer_item_id": row.get("retailer_item_id", ""),
"price_per_each": row.get("price_per_each", ""),
"price_per_lb": row.get("price_per_lb", ""),
"price_per_oz": row.get("price_per_oz", ""),
"is_discount_line": row.get("is_discount_line", "false"),
"is_coupon_line": row.get("is_coupon_line", "false"),
"is_fee": row.get("is_fee", "false"),
"raw_order_path": row.get("raw_order_path", ""),
}
)
grouped_rows[retailer].append(normalized_row)
order_id = normalized_row["order_id"]
grouped_orders[retailer].setdefault(
order_id,
{
"order_id": order_id,
"store_name": row.get("store_name", ""),
"store_number": row.get("store_number", ""),
"store_city": row.get("store_city", ""),
"store_state": row.get("store_state", ""),
},
)
for path, source_rows in [
(giant_items_csv, grouped_rows["giant"]),
(costco_items_csv, grouped_rows["costco"]),
]:
with path.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(source_rows)
order_fields = ["order_id", "store_name", "store_number", "store_city", "store_state"]
for path, source_rows in [
(giant_orders_csv, grouped_orders["giant"].values()),
(costco_orders_csv, grouped_orders["costco"].values()),
]:
with path.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=order_fields)
writer.writeheader()
writer.writerows(source_rows)
return giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv
class ReviewWorkflowTests(unittest.TestCase): class ReviewWorkflowTests(unittest.TestCase):
def test_build_review_queue_groups_unresolved_purchases(self): def test_build_review_queue_groups_unresolved_purchases(self):
queue_rows = review_products.build_review_queue( queue_rows = review_products.build_review_queue(
@@ -114,66 +199,47 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv" resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv" catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv" links_csv = Path(tmpdir) / "product_links.csv"
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
purchase_fields = [ tmpdir,
"purchase_date", [
"retailer", {
"order_id", "purchase_date": "2026-03-14",
"line_no", "retailer": "costco",
"normalized_item_id", "order_id": "c2",
"catalog_id", "line_no": "2",
"raw_item_name", "normalized_item_id": "cnorm_mix",
"normalized_item_name", "raw_item_name": "MIXED PEPPER 6-PACK",
"image_url", "normalized_item_name": "MIXED PEPPER",
"upc", "image_url": "",
"line_total", "upc": "",
] "line_total": "7.49",
with purchases_csv.open("w", newline="", encoding="utf-8") as handle: },
writer = csv.DictWriter(handle, fieldnames=purchase_fields) {
writer.writeheader() "purchase_date": "2026-03-12",
writer.writerows( "retailer": "costco",
[ "order_id": "c1",
{ "line_no": "1",
"purchase_date": "2026-03-14", "normalized_item_id": "cnorm_mix",
"retailer": "costco", "raw_item_name": "MIXED PEPPER 6-PACK",
"order_id": "c2", "normalized_item_name": "MIXED PEPPER",
"line_no": "2", "image_url": "https://example.test/mixed-pepper.jpg",
"normalized_item_id": "cnorm_mix", "upc": "",
"catalog_id": "", "line_total": "6.99",
"raw_item_name": "MIXED PEPPER 6-PACK", },
"normalized_item_name": "MIXED PEPPER", {
"image_url": "", "purchase_date": "2026-03-10",
"upc": "", "retailer": "giant",
"line_total": "7.49", "order_id": "g1",
}, "line_no": "1",
{ "normalized_item_id": "gnorm_mix",
"purchase_date": "2026-03-12", "raw_item_name": "MIXED PEPPER",
"retailer": "costco", "normalized_item_name": "MIXED PEPPER",
"order_id": "c1", "image_url": "",
"line_no": "1", "upc": "",
"normalized_item_id": "cnorm_mix", "line_total": "5.99",
"catalog_id": "", },
"raw_item_name": "MIXED PEPPER 6-PACK", ],
"normalized_item_name": "MIXED PEPPER", )
"image_url": "https://example.test/mixed-pepper.jpg",
"upc": "",
"line_total": "6.99",
},
{
"purchase_date": "2026-03-10",
"retailer": "giant",
"order_id": "g1",
"line_no": "1",
"normalized_item_id": "gnorm_mix",
"catalog_id": "cat_mix",
"raw_item_name": "MIXED PEPPER",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "5.99",
},
]
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle: with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
@@ -195,11 +261,34 @@ class ReviewWorkflowTests(unittest.TestCase):
"updated_at": "", "updated_at": "",
} }
) )
with links_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.PRODUCT_LINK_FIELDS)
writer.writeheader()
writer.writerow(
{
"normalized_item_id": "gnorm_mix",
"catalog_id": "cat_mix",
"link_method": "manual_link",
"link_confidence": "high",
"review_status": "approved",
"reviewed_by": "",
"reviewed_at": "",
"link_notes": "",
}
)
runner = CliRunner() runner = CliRunner()
result = runner.invoke( result = runner.invoke(
review_products.main, review_products.main,
[ [
"--giant-items-enriched-csv",
str(giant_items_csv),
"--costco-items-enriched-csv",
str(costco_items_csv),
"--giant-orders-csv",
str(giant_orders_csv),
"--costco-orders-csv",
str(costco_orders_csv),
"--purchases-csv", "--purchases-csv",
str(purchases_csv), str(purchases_csv),
"--queue-csv", "--queue-csv",
@@ -234,40 +323,23 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv" resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv" catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv" links_csv = Path(tmpdir) / "product_links.csv"
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
with purchases_csv.open("w", newline="", encoding="utf-8") as handle: tmpdir,
writer = csv.DictWriter( [
handle,
fieldnames=[
"purchase_date",
"retailer",
"order_id",
"line_no",
"normalized_item_id",
"catalog_id",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
],
)
writer.writeheader()
writer.writerow(
{ {
"purchase_date": "2026-03-14", "purchase_date": "2026-03-14",
"retailer": "giant", "retailer": "giant",
"order_id": "g1", "order_id": "g1",
"line_no": "1", "line_no": "1",
"normalized_item_id": "gnorm_ice", "normalized_item_id": "gnorm_ice",
"catalog_id": "",
"raw_item_name": "SB BAGGED ICE 20LB", "raw_item_name": "SB BAGGED ICE 20LB",
"normalized_item_name": "BAGGED ICE", "normalized_item_name": "BAGGED ICE",
"image_url": "", "image_url": "",
"upc": "", "upc": "",
"line_total": "3.50", "line_total": "3.50",
} }
) ],
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle: with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
@@ -276,6 +348,14 @@ class ReviewWorkflowTests(unittest.TestCase):
result = CliRunner().invoke( result = CliRunner().invoke(
review_products.main, review_products.main,
[ [
"--giant-items-enriched-csv",
str(giant_items_csv),
"--costco-items-enriched-csv",
str(costco_items_csv),
"--giant-orders-csv",
str(giant_orders_csv),
"--costco-orders-csv",
str(costco_orders_csv),
"--purchases-csv", "--purchases-csv",
str(purchases_csv), str(purchases_csv),
"--queue-csv", "--queue-csv",
@@ -301,68 +381,47 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv" resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv" catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv" links_csv = Path(tmpdir) / "product_links.csv"
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
with purchases_csv.open("w", newline="", encoding="utf-8") as handle: tmpdir,
writer = csv.DictWriter( [
handle, {
fieldnames=[ "purchase_date": "2026-03-14",
"purchase_date", "retailer": "costco",
"retailer", "order_id": "c2",
"order_id", "line_no": "2",
"line_no", "normalized_item_id": "cnorm_mix",
"normalized_item_id", "raw_item_name": "MIXED PEPPER 6-PACK",
"catalog_id", "normalized_item_name": "MIXED PEPPER",
"raw_item_name", "image_url": "",
"normalized_item_name", "upc": "",
"image_url", "line_total": "7.49",
"upc", },
"line_total", {
], "purchase_date": "2026-03-12",
) "retailer": "costco",
writer.writeheader() "order_id": "c1",
writer.writerows( "line_no": "1",
[ "normalized_item_id": "cnorm_mix",
{ "raw_item_name": "MIXED PEPPER 6-PACK",
"purchase_date": "2026-03-14", "normalized_item_name": "MIXED PEPPER",
"retailer": "costco", "image_url": "",
"order_id": "c2", "upc": "",
"line_no": "2", "line_total": "6.99",
"normalized_item_id": "cnorm_mix", },
"catalog_id": "", {
"raw_item_name": "MIXED PEPPER 6-PACK", "purchase_date": "2026-03-10",
"normalized_item_name": "MIXED PEPPER", "retailer": "giant",
"image_url": "", "order_id": "g1",
"upc": "", "line_no": "1",
"line_total": "7.49", "normalized_item_id": "gnorm_mix",
}, "raw_item_name": "MIXED PEPPER",
{ "normalized_item_name": "MIXED PEPPER",
"purchase_date": "2026-03-12", "image_url": "",
"retailer": "costco", "upc": "",
"order_id": "c1", "line_total": "5.99",
"line_no": "1", },
"normalized_item_id": "cnorm_mix", ],
"catalog_id": "", )
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "6.99",
},
{
"purchase_date": "2026-03-10",
"retailer": "giant",
"order_id": "g1",
"line_no": "1",
"normalized_item_id": "gnorm_mix",
"catalog_id": "cat_mix",
"raw_item_name": "MIXED PEPPER",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "5.99",
},
]
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle: with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
@@ -384,10 +443,33 @@ class ReviewWorkflowTests(unittest.TestCase):
"updated_at": "", "updated_at": "",
} }
) )
with links_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.PRODUCT_LINK_FIELDS)
writer.writeheader()
writer.writerow(
{
"normalized_item_id": "gnorm_mix",
"catalog_id": "cat_mix",
"link_method": "manual_link",
"link_confidence": "high",
"review_status": "approved",
"reviewed_by": "",
"reviewed_at": "",
"link_notes": "",
}
)
result = CliRunner().invoke( result = CliRunner().invoke(
review_products.main, review_products.main,
[ [
"--giant-items-enriched-csv",
str(giant_items_csv),
"--costco-items-enriched-csv",
str(costco_items_csv),
"--giant-orders-csv",
str(giant_orders_csv),
"--costco-orders-csv",
str(costco_orders_csv),
"--purchases-csv", "--purchases-csv",
str(purchases_csv), str(purchases_csv),
"--queue-csv", "--queue-csv",
@@ -422,40 +504,23 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv" resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv" catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv" links_csv = Path(tmpdir) / "product_links.csv"
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
with purchases_csv.open("w", newline="", encoding="utf-8") as handle: tmpdir,
writer = csv.DictWriter( [
handle,
fieldnames=[
"purchase_date",
"retailer",
"order_id",
"line_no",
"normalized_item_id",
"catalog_id",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
],
)
writer.writeheader()
writer.writerow(
{ {
"purchase_date": "2026-03-14", "purchase_date": "2026-03-14",
"retailer": "giant", "retailer": "giant",
"order_id": "g1", "order_id": "g1",
"line_no": "1", "line_no": "1",
"normalized_item_id": "gnorm_ice", "normalized_item_id": "gnorm_ice",
"catalog_id": "",
"raw_item_name": "SB BAGGED ICE 20LB", "raw_item_name": "SB BAGGED ICE 20LB",
"normalized_item_name": "BAGGED ICE", "normalized_item_name": "BAGGED ICE",
"image_url": "", "image_url": "",
"upc": "", "upc": "",
"line_total": "3.50", "line_total": "3.50",
} }
) ],
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle: with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
@@ -481,6 +546,14 @@ class ReviewWorkflowTests(unittest.TestCase):
result = CliRunner().invoke( result = CliRunner().invoke(
review_products.main, review_products.main,
[ [
"--giant-items-enriched-csv",
str(giant_items_csv),
"--costco-items-enriched-csv",
str(costco_items_csv),
"--giant-orders-csv",
str(giant_orders_csv),
"--costco-orders-csv",
str(costco_orders_csv),
"--purchases-csv", "--purchases-csv",
str(purchases_csv), str(purchases_csv),
"--queue-csv", "--queue-csv",
@@ -506,40 +579,23 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv" resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv" catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv" links_csv = Path(tmpdir) / "product_links.csv"
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
with purchases_csv.open("w", newline="", encoding="utf-8") as handle: tmpdir,
writer = csv.DictWriter( [
handle,
fieldnames=[
"purchase_date",
"retailer",
"order_id",
"line_no",
"normalized_item_id",
"catalog_id",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
],
)
writer.writeheader()
writer.writerow(
{ {
"purchase_date": "2026-03-14", "purchase_date": "2026-03-14",
"retailer": "giant", "retailer": "giant",
"order_id": "g1", "order_id": "g1",
"line_no": "1", "line_no": "1",
"normalized_item_id": "gnorm_skip", "normalized_item_id": "gnorm_skip",
"catalog_id": "",
"raw_item_name": "TEST ITEM", "raw_item_name": "TEST ITEM",
"normalized_item_name": "TEST ITEM", "normalized_item_name": "TEST ITEM",
"image_url": "", "image_url": "",
"upc": "", "upc": "",
"line_total": "1.00", "line_total": "1.00",
} }
) ],
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle: with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
@@ -548,6 +604,14 @@ class ReviewWorkflowTests(unittest.TestCase):
result = CliRunner().invoke( result = CliRunner().invoke(
review_products.main, review_products.main,
[ [
"--giant-items-enriched-csv",
str(giant_items_csv),
"--costco-items-enriched-csv",
str(costco_items_csv),
"--giant-orders-csv",
str(giant_orders_csv),
"--costco-orders-csv",
str(costco_orders_csv),
"--purchases-csv", "--purchases-csv",
str(purchases_csv), str(purchases_csv),
"--queue-csv", "--queue-csv",
@@ -578,30 +642,12 @@ class ReviewWorkflowTests(unittest.TestCase):
resolutions_csv = Path(tmpdir) / "review_resolutions.csv" resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "catalog.csv" catalog_csv = Path(tmpdir) / "catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv" links_csv = Path(tmpdir) / "product_links.csv"
giant_items_csv, costco_items_csv, giant_orders_csv, costco_orders_csv = write_review_source_files(
with purchases_csv.open("w", newline="", encoding="utf-8") as handle: tmpdir,
writer = csv.DictWriter( [
handle,
fieldnames=[
"purchase_date",
"normalized_item_id",
"catalog_id",
"retailer",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
"order_id",
"line_no",
],
)
writer.writeheader()
writer.writerow(
{ {
"purchase_date": "2026-03-15", "purchase_date": "2026-03-15",
"normalized_item_id": "gnorm_ice", "normalized_item_id": "gnorm_ice",
"catalog_id": "",
"retailer": "giant", "retailer": "giant",
"raw_item_name": "SB BAGGED ICE 20LB", "raw_item_name": "SB BAGGED ICE 20LB",
"normalized_item_name": "BAGGED ICE", "normalized_item_name": "BAGGED ICE",
@@ -611,7 +657,8 @@ class ReviewWorkflowTests(unittest.TestCase):
"order_id": "g1", "order_id": "g1",
"line_no": "1", "line_no": "1",
} }
) ],
)
with mock.patch.object( with mock.patch.object(
review_products.click, review_products.click,
@@ -619,6 +666,10 @@ class ReviewWorkflowTests(unittest.TestCase):
side_effect=["n", "ICE", "frozen", "ice", "manual merge", "q"], side_effect=["n", "ICE", "frozen", "ice", "manual merge", "q"],
): ):
review_products.main.callback( review_products.main.callback(
giant_items_enriched_csv=str(giant_items_csv),
costco_items_enriched_csv=str(costco_items_csv),
giant_orders_csv=str(giant_orders_csv),
costco_orders_csv=str(costco_orders_csv),
purchases_csv=str(purchases_csv), purchases_csv=str(purchases_csv),
queue_csv=str(queue_csv), queue_csv=str(queue_csv),
resolutions_csv=str(resolutions_csv), resolutions_csv=str(resolutions_csv),
@@ -647,6 +698,63 @@ class ReviewWorkflowTests(unittest.TestCase):
self.assertEqual("ICE", catalog_rows[0]["catalog_name"]) self.assertEqual("ICE", catalog_rows[0]["catalog_name"])
self.assertEqual(catalog_rows[0]["catalog_id"], link_rows[0]["catalog_id"]) self.assertEqual(catalog_rows[0]["catalog_id"], link_rows[0]["catalog_id"])
def test_build_review_queue_readds_orphaned_and_incomplete_links(self):
purchase_rows = [
{
"normalized_item_id": "gnorm_orphan",
"catalog_id": "cat_missing",
"retailer": "giant",
"raw_item_name": "ORPHAN ITEM",
"normalized_item_name": "ORPHAN ITEM",
"upc": "",
"line_total": "3.50",
"is_fee": "false",
"is_discount_line": "false",
"is_coupon_line": "false",
},
{
"normalized_item_id": "gnorm_incomplete",
"catalog_id": "cat_incomplete",
"retailer": "giant",
"raw_item_name": "INCOMPLETE ITEM",
"normalized_item_name": "INCOMPLETE ITEM",
"upc": "",
"line_total": "4.50",
"is_fee": "false",
"is_discount_line": "false",
"is_coupon_line": "false",
},
]
link_rows = [
{
"normalized_item_id": "gnorm_orphan",
"catalog_id": "cat_missing",
},
{
"normalized_item_id": "gnorm_incomplete",
"catalog_id": "cat_incomplete",
},
]
catalog_rows = [
{
"catalog_id": "cat_incomplete",
"catalog_name": "INCOMPLETE ITEM",
"product_type": "",
}
]
queue_rows = review_products.build_review_queue(
purchase_rows,
[],
link_rows,
catalog_rows,
[],
)
reasons = {row["normalized_item_id"]: row["reason_code"] for row in queue_rows}
self.assertEqual("orphaned_catalog_link", reasons["gnorm_orphan"])
self.assertEqual("incomplete_catalog_link", reasons["gnorm_incomplete"])
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()