Reconcile review queue against current catalog state
This commit is contained in:
@@ -31,6 +31,7 @@ INFO_COLOR = "cyan"
|
||||
PROMPT_COLOR = "bright_yellow"
|
||||
WARNING_COLOR = "magenta"
|
||||
TOKEN_RE = re.compile(r"[A-Z0-9]+")
|
||||
REQUIRED_CATALOG_FIELDS = ("catalog_name", "product_type")
|
||||
|
||||
|
||||
def print_intro_text():
|
||||
@@ -40,9 +41,37 @@ def print_intro_text():
|
||||
click.echo(" category: broad analysis bucket such as dairy, produce, or frozen")
|
||||
|
||||
|
||||
def build_review_queue(purchase_rows, resolution_rows):
|
||||
def has_complete_catalog_row(catalog_row):
|
||||
if not catalog_row:
|
||||
return False
|
||||
return all(catalog_row.get(field, "").strip() for field in REQUIRED_CATALOG_FIELDS)
|
||||
|
||||
|
||||
def load_queue_lookup(queue_rows):
|
||||
lookup = {}
|
||||
for row in queue_rows:
|
||||
normalized_item_id = row.get("normalized_item_id", "")
|
||||
if normalized_item_id:
|
||||
lookup[normalized_item_id] = row
|
||||
return lookup
|
||||
|
||||
|
||||
def build_review_queue(
|
||||
purchase_rows,
|
||||
resolution_rows,
|
||||
link_rows=None,
|
||||
catalog_rows=None,
|
||||
existing_queue_rows=None,
|
||||
):
|
||||
by_normalized = defaultdict(list)
|
||||
resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
|
||||
link_lookup = build_purchases.load_link_lookup(link_rows or [])
|
||||
catalog_lookup = {
|
||||
row.get("catalog_id", ""): build_purchases.normalize_catalog_row(row)
|
||||
for row in (catalog_rows or [])
|
||||
if row.get("catalog_id", "")
|
||||
}
|
||||
queue_lookup = load_queue_lookup(existing_queue_rows or [])
|
||||
|
||||
for row in purchase_rows:
|
||||
normalized_item_id = row.get("normalized_item_id", "")
|
||||
@@ -54,30 +83,40 @@ def build_review_queue(purchase_rows, resolution_rows):
|
||||
queue_rows = []
|
||||
for normalized_item_id, rows in sorted(by_normalized.items()):
|
||||
current_resolution = resolution_lookup.get(normalized_item_id, {})
|
||||
if current_resolution.get("status") == "approved":
|
||||
if current_resolution.get("status") == "approved" and current_resolution.get("resolution_action") == "exclude":
|
||||
continue
|
||||
|
||||
existing_queue_row = queue_lookup.get(normalized_item_id, {})
|
||||
linked_catalog_id = current_resolution.get("catalog_id") or link_lookup.get(normalized_item_id, {}).get("catalog_id", "")
|
||||
linked_catalog_row = catalog_lookup.get(linked_catalog_id, {})
|
||||
has_valid_catalog_link = bool(linked_catalog_id and has_complete_catalog_row(linked_catalog_row))
|
||||
|
||||
unresolved_rows = [
|
||||
row
|
||||
for row in rows
|
||||
if not row.get("catalog_id")
|
||||
and row.get("is_item", "true") != "false"
|
||||
if row.get("is_item", "true") != "false"
|
||||
and row.get("is_fee") != "true"
|
||||
and row.get("is_discount_line") != "true"
|
||||
and row.get("is_coupon_line") != "true"
|
||||
]
|
||||
if not unresolved_rows:
|
||||
if not unresolved_rows or has_valid_catalog_link:
|
||||
continue
|
||||
|
||||
retailers = sorted({row["retailer"] for row in rows})
|
||||
review_id = stable_id("rvw", normalized_item_id)
|
||||
reason_code = "missing_catalog_link"
|
||||
if linked_catalog_id and linked_catalog_id not in catalog_lookup:
|
||||
reason_code = "orphaned_catalog_link"
|
||||
elif linked_catalog_id and not has_complete_catalog_row(linked_catalog_row):
|
||||
reason_code = "incomplete_catalog_link"
|
||||
|
||||
queue_rows.append(
|
||||
{
|
||||
"review_id": review_id,
|
||||
"retailer": " | ".join(retailers),
|
||||
"normalized_item_id": normalized_item_id,
|
||||
"catalog_id": current_resolution.get("catalog_id", ""),
|
||||
"reason_code": "missing_catalog_link",
|
||||
"catalog_id": linked_catalog_id,
|
||||
"reason_code": reason_code,
|
||||
"priority": "high",
|
||||
"raw_item_names": compact_join(
|
||||
sorted({row["raw_item_name"] for row in rows if row["raw_item_name"]}),
|
||||
@@ -102,10 +141,13 @@ def build_review_queue(purchase_rows, resolution_rows):
|
||||
limit=8,
|
||||
),
|
||||
"seen_count": str(len(rows)),
|
||||
"status": current_resolution.get("status", "pending"),
|
||||
"resolution_action": current_resolution.get("resolution_action", ""),
|
||||
"resolution_notes": current_resolution.get("resolution_notes", ""),
|
||||
"created_at": current_resolution.get("reviewed_at", today_text),
|
||||
"status": existing_queue_row.get("status") or current_resolution.get("status", "pending"),
|
||||
"resolution_action": existing_queue_row.get("resolution_action")
|
||||
or current_resolution.get("resolution_action", ""),
|
||||
"resolution_notes": existing_queue_row.get("resolution_notes")
|
||||
or current_resolution.get("resolution_notes", ""),
|
||||
"created_at": existing_queue_row.get("created_at")
|
||||
or current_resolution.get("reviewed_at", today_text),
|
||||
"updated_at": today_text,
|
||||
}
|
||||
)
|
||||
@@ -516,6 +558,10 @@ def link_rows_from_state(link_lookup):
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--giant-items-enriched-csv", default="data/giant-web/normalized_items.csv", show_default=True)
|
||||
@click.option("--costco-items-enriched-csv", default="data/costco-web/normalized_items.csv", show_default=True)
|
||||
@click.option("--giant-orders-csv", default="data/giant-web/collected_orders.csv", show_default=True)
|
||||
@click.option("--costco-orders-csv", default="data/costco-web/collected_orders.csv", show_default=True)
|
||||
@click.option("--purchases-csv", default="data/review/purchases.csv", show_default=True)
|
||||
@click.option("--queue-csv", default="data/review/review_queue.csv", show_default=True)
|
||||
@click.option("--resolutions-csv", default="data/review/review_resolutions.csv", show_default=True)
|
||||
@@ -523,12 +569,40 @@ def link_rows_from_state(link_lookup):
|
||||
@click.option("--links-csv", default="data/review/product_links.csv", show_default=True)
|
||||
@click.option("--limit", default=0, show_default=True, type=int)
|
||||
@click.option("--refresh-only", is_flag=True, help="Only rebuild review_queue.csv without prompting.")
|
||||
def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, links_csv, limit, refresh_only):
|
||||
purchase_rows = build_purchases.read_optional_csv_rows(purchases_csv)
|
||||
def main(
|
||||
giant_items_enriched_csv,
|
||||
costco_items_enriched_csv,
|
||||
giant_orders_csv,
|
||||
costco_orders_csv,
|
||||
purchases_csv,
|
||||
queue_csv,
|
||||
resolutions_csv,
|
||||
catalog_csv,
|
||||
links_csv,
|
||||
limit,
|
||||
refresh_only,
|
||||
):
|
||||
resolution_rows = build_purchases.read_optional_csv_rows(resolutions_csv)
|
||||
catalog_rows = build_purchases.merge_catalog_rows(build_purchases.read_optional_csv_rows(catalog_csv), [])
|
||||
link_lookup = build_purchases.load_link_lookup(build_purchases.read_optional_csv_rows(links_csv))
|
||||
queue_rows = build_review_queue(purchase_rows, resolution_rows)
|
||||
link_rows = build_purchases.read_optional_csv_rows(links_csv)
|
||||
purchase_rows, refreshed_link_rows = build_purchases.build_purchase_rows(
|
||||
build_purchases.read_optional_csv_rows(giant_items_enriched_csv),
|
||||
build_purchases.read_optional_csv_rows(costco_items_enriched_csv),
|
||||
build_purchases.read_optional_csv_rows(giant_orders_csv),
|
||||
build_purchases.read_optional_csv_rows(costco_orders_csv),
|
||||
resolution_rows,
|
||||
link_rows,
|
||||
catalog_rows,
|
||||
)
|
||||
build_purchases.write_csv_rows(purchases_csv, purchase_rows, build_purchases.PURCHASE_FIELDS)
|
||||
link_lookup = build_purchases.load_link_lookup(refreshed_link_rows)
|
||||
queue_rows = build_review_queue(
|
||||
purchase_rows,
|
||||
resolution_rows,
|
||||
refreshed_link_rows,
|
||||
catalog_rows,
|
||||
build_purchases.read_optional_csv_rows(queue_csv),
|
||||
)
|
||||
write_csv_rows(queue_csv, queue_rows, QUEUE_FIELDS)
|
||||
click.echo(f"wrote {len(queue_rows)} rows to {queue_csv}")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user