Add terminal review resolution workflow
This commit is contained in:
223
review_products.py
Normal file
223
review_products.py
Normal file
@@ -0,0 +1,223 @@
|
||||
from collections import defaultdict
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
|
||||
import build_purchases
|
||||
from layer_helpers import compact_join, stable_id, write_csv_rows
|
||||
|
||||
|
||||
QUEUE_FIELDS = [
|
||||
"review_id",
|
||||
"retailer",
|
||||
"observed_product_id",
|
||||
"canonical_product_id",
|
||||
"reason_code",
|
||||
"priority",
|
||||
"raw_item_names",
|
||||
"normalized_names",
|
||||
"upc_values",
|
||||
"example_prices",
|
||||
"seen_count",
|
||||
"status",
|
||||
"resolution_action",
|
||||
"resolution_notes",
|
||||
"created_at",
|
||||
"updated_at",
|
||||
]
|
||||
|
||||
|
||||
def build_review_queue(purchase_rows, resolution_rows):
|
||||
by_observed = defaultdict(list)
|
||||
resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
|
||||
|
||||
for row in purchase_rows:
|
||||
observed_product_id = row.get("observed_product_id", "")
|
||||
if not observed_product_id:
|
||||
continue
|
||||
by_observed[observed_product_id].append(row)
|
||||
|
||||
today_text = str(date.today())
|
||||
queue_rows = []
|
||||
for observed_product_id, rows in sorted(by_observed.items()):
|
||||
current_resolution = resolution_lookup.get(observed_product_id, {})
|
||||
if current_resolution.get("status") == "approved":
|
||||
continue
|
||||
unresolved_rows = [row for row in rows if not row.get("canonical_product_id")]
|
||||
if not unresolved_rows:
|
||||
continue
|
||||
|
||||
retailers = sorted({row["retailer"] for row in rows})
|
||||
review_id = stable_id("rvw", observed_product_id)
|
||||
queue_rows.append(
|
||||
{
|
||||
"review_id": review_id,
|
||||
"retailer": " | ".join(retailers),
|
||||
"observed_product_id": observed_product_id,
|
||||
"canonical_product_id": current_resolution.get("canonical_product_id", ""),
|
||||
"reason_code": "missing_canonical_link",
|
||||
"priority": "high",
|
||||
"raw_item_names": compact_join(
|
||||
sorted({row["raw_item_name"] for row in rows if row["raw_item_name"]}),
|
||||
limit=8,
|
||||
),
|
||||
"normalized_names": compact_join(
|
||||
sorted(
|
||||
{
|
||||
row["normalized_item_name"]
|
||||
for row in rows
|
||||
if row["normalized_item_name"]
|
||||
}
|
||||
),
|
||||
limit=8,
|
||||
),
|
||||
"upc_values": compact_join(
|
||||
sorted({row["upc"] for row in rows if row["upc"]}),
|
||||
limit=8,
|
||||
),
|
||||
"example_prices": compact_join(
|
||||
sorted({row["line_total"] for row in rows if row["line_total"]}),
|
||||
limit=8,
|
||||
),
|
||||
"seen_count": str(len(rows)),
|
||||
"status": current_resolution.get("status", "pending"),
|
||||
"resolution_action": current_resolution.get("resolution_action", ""),
|
||||
"resolution_notes": current_resolution.get("resolution_notes", ""),
|
||||
"created_at": current_resolution.get("reviewed_at", today_text),
|
||||
"updated_at": today_text,
|
||||
}
|
||||
)
|
||||
return queue_rows
|
||||
|
||||
|
||||
def save_resolution_rows(path, rows):
|
||||
write_csv_rows(path, rows, build_purchases.RESOLUTION_FIELDS)
|
||||
|
||||
|
||||
def save_catalog_rows(path, rows):
|
||||
write_csv_rows(path, rows, build_purchases.CATALOG_FIELDS)
|
||||
|
||||
|
||||
def prompt_resolution(queue_row, catalog_rows):
|
||||
click.echo("")
|
||||
click.echo(f"observed_product_id: {queue_row['observed_product_id']}")
|
||||
click.echo(f"retailer: {queue_row['retailer']}")
|
||||
click.echo(f"raw names: {queue_row['raw_item_names']}")
|
||||
click.echo(f"normalized names: {queue_row['normalized_names']}")
|
||||
click.echo(f"upcs: {queue_row['upc_values']}")
|
||||
click.echo(f"example prices: {queue_row['example_prices']}")
|
||||
click.echo(f"seen count: {queue_row['seen_count']}")
|
||||
click.echo("actions: [l]ink existing [n]ew canonical [x]exclude [s]kip [q]uit")
|
||||
action = click.prompt("action", type=click.Choice(["l", "n", "x", "s", "q"]))
|
||||
if action == "q":
|
||||
return None, None
|
||||
if action == "s":
|
||||
return {
|
||||
"observed_product_id": queue_row["observed_product_id"],
|
||||
"canonical_product_id": "",
|
||||
"resolution_action": "skip",
|
||||
"status": "pending",
|
||||
"resolution_notes": queue_row.get("resolution_notes", ""),
|
||||
"reviewed_at": str(date.today()),
|
||||
}, None
|
||||
if action == "x":
|
||||
notes = click.prompt("exclude notes", default="", show_default=False)
|
||||
return {
|
||||
"observed_product_id": queue_row["observed_product_id"],
|
||||
"canonical_product_id": "",
|
||||
"resolution_action": "exclude",
|
||||
"status": "approved",
|
||||
"resolution_notes": notes,
|
||||
"reviewed_at": str(date.today()),
|
||||
}, None
|
||||
if action == "l":
|
||||
click.echo("existing canonicals:")
|
||||
for row in catalog_rows[:10]:
|
||||
click.echo(f" {row['canonical_product_id']} {row['canonical_name']}")
|
||||
canonical_product_id = click.prompt("canonical product id", type=str)
|
||||
notes = click.prompt("link notes", default="", show_default=False)
|
||||
return {
|
||||
"observed_product_id": queue_row["observed_product_id"],
|
||||
"canonical_product_id": canonical_product_id,
|
||||
"resolution_action": "link",
|
||||
"status": "approved",
|
||||
"resolution_notes": notes,
|
||||
"reviewed_at": str(date.today()),
|
||||
}, None
|
||||
|
||||
canonical_name = click.prompt("canonical name", type=str)
|
||||
category = click.prompt("category", default="", show_default=False)
|
||||
product_type = click.prompt("product type", default="", show_default=False)
|
||||
notes = click.prompt("notes", default="", show_default=False)
|
||||
canonical_product_id = stable_id("gcan", f"manual|{canonical_name}|{category}|{product_type}")
|
||||
canonical_row = {
|
||||
"canonical_product_id": canonical_product_id,
|
||||
"canonical_name": canonical_name,
|
||||
"category": category,
|
||||
"product_type": product_type,
|
||||
"brand": "",
|
||||
"variant": "",
|
||||
"size_value": "",
|
||||
"size_unit": "",
|
||||
"pack_qty": "",
|
||||
"measure_type": "",
|
||||
"notes": notes,
|
||||
"created_at": str(date.today()),
|
||||
"updated_at": str(date.today()),
|
||||
}
|
||||
resolution_row = {
|
||||
"observed_product_id": queue_row["observed_product_id"],
|
||||
"canonical_product_id": canonical_product_id,
|
||||
"resolution_action": "create",
|
||||
"status": "approved",
|
||||
"resolution_notes": notes,
|
||||
"reviewed_at": str(date.today()),
|
||||
}
|
||||
return resolution_row, canonical_row
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--purchases-csv", default="combined_output/purchases.csv", show_default=True)
|
||||
@click.option("--queue-csv", default="combined_output/review_queue.csv", show_default=True)
|
||||
@click.option("--resolutions-csv", default="combined_output/review_resolutions.csv", show_default=True)
|
||||
@click.option("--catalog-csv", default="combined_output/canonical_catalog.csv", show_default=True)
|
||||
@click.option("--limit", default=0, show_default=True, type=int)
|
||||
@click.option("--refresh-only", is_flag=True, help="Only rebuild review_queue.csv without prompting.")
|
||||
def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, limit, refresh_only):
|
||||
purchase_rows = build_purchases.read_optional_csv_rows(purchases_csv)
|
||||
resolution_rows = build_purchases.read_optional_csv_rows(resolutions_csv)
|
||||
catalog_rows = build_purchases.read_optional_csv_rows(catalog_csv)
|
||||
queue_rows = build_review_queue(purchase_rows, resolution_rows)
|
||||
write_csv_rows(queue_csv, queue_rows, QUEUE_FIELDS)
|
||||
click.echo(f"wrote {len(queue_rows)} rows to {queue_csv}")
|
||||
|
||||
if refresh_only:
|
||||
return
|
||||
|
||||
resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
|
||||
catalog_by_id = {row["canonical_product_id"]: row for row in catalog_rows if row.get("canonical_product_id")}
|
||||
reviewed = 0
|
||||
for queue_row in queue_rows:
|
||||
if limit and reviewed >= limit:
|
||||
break
|
||||
result = prompt_resolution(queue_row, catalog_rows)
|
||||
if result == (None, None):
|
||||
break
|
||||
resolution_row, canonical_row = result
|
||||
resolution_lookup[resolution_row["observed_product_id"]] = resolution_row
|
||||
if canonical_row and canonical_row["canonical_product_id"] not in catalog_by_id:
|
||||
catalog_by_id[canonical_row["canonical_product_id"]] = canonical_row
|
||||
catalog_rows.append(canonical_row)
|
||||
reviewed += 1
|
||||
|
||||
save_resolution_rows(resolutions_csv, sorted(resolution_lookup.values(), key=lambda row: row["observed_product_id"]))
|
||||
save_catalog_rows(catalog_csv, sorted(catalog_by_id.values(), key=lambda row: row["canonical_product_id"]))
|
||||
click.echo(
|
||||
f"saved {len(resolution_lookup)} resolution rows to {resolutions_csv} "
|
||||
f"and {len(catalog_by_id)} catalog rows to {catalog_csv}"
|
||||
)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user