Tighten review prompt flow
This commit is contained in:
@@ -31,6 +31,13 @@ PROMPT_COLOR = "bright_yellow"
|
||||
WARNING_COLOR = "magenta"
|
||||
|
||||
|
||||
def print_intro_text():
|
||||
click.secho("Review guide:", fg=INFO_COLOR)
|
||||
click.echo(" catalog name: unique product identity including variant, but not packaging")
|
||||
click.echo(" product type: general product you want to compare across purchases")
|
||||
click.echo(" category: broad analysis bucket such as dairy, produce, or frozen")
|
||||
|
||||
|
||||
def build_review_queue(purchase_rows, resolution_rows):
|
||||
by_normalized = defaultdict(list)
|
||||
resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
|
||||
@@ -111,6 +118,10 @@ def save_catalog_rows(path, rows):
|
||||
write_csv_rows(path, rows, build_purchases.CATALOG_FIELDS)
|
||||
|
||||
|
||||
def save_link_rows(path, rows):
|
||||
write_csv_rows(path, rows, build_purchases.PRODUCT_LINK_FIELDS)
|
||||
|
||||
|
||||
def sort_related_items(rows):
|
||||
return sorted(
|
||||
rows,
|
||||
@@ -179,23 +190,64 @@ def build_catalog_suggestions(related_rows, purchase_rows, catalog_rows, limit=3
|
||||
return suggestions
|
||||
|
||||
|
||||
def suggestion_display_rows(suggestions, purchase_rows, catalog_rows):
|
||||
linked_purchase_counts = defaultdict(int)
|
||||
linked_normalized_ids = defaultdict(set)
|
||||
for row in purchase_rows:
|
||||
catalog_id = row.get("catalog_id", "")
|
||||
normalized_item_id = row.get("normalized_item_id", "")
|
||||
if not catalog_id or not normalized_item_id:
|
||||
continue
|
||||
linked_purchase_counts[catalog_id] += 1
|
||||
linked_normalized_ids[catalog_id].add(normalized_item_id)
|
||||
|
||||
display_rows = []
|
||||
catalog_details = {
|
||||
row["catalog_id"]: {
|
||||
"product_type": row.get("product_type", ""),
|
||||
"category": row.get("category", ""),
|
||||
}
|
||||
for row in catalog_rows
|
||||
if row.get("catalog_id")
|
||||
}
|
||||
for row in purchase_rows:
|
||||
if row.get("catalog_id"):
|
||||
catalog_details.setdefault(
|
||||
row["catalog_id"],
|
||||
{
|
||||
"product_type": row.get("product_type", ""),
|
||||
"category": row.get("category", ""),
|
||||
},
|
||||
)
|
||||
|
||||
for row in suggestions:
|
||||
catalog_id = row["catalog_id"]
|
||||
details = catalog_details.get(catalog_id, {})
|
||||
display_rows.append(
|
||||
{
|
||||
**row,
|
||||
"product_type": details.get("product_type", ""),
|
||||
"category": details.get("category", ""),
|
||||
"linked_purchase_rows": linked_purchase_counts.get(catalog_id, 0),
|
||||
"linked_normalized_items": len(linked_normalized_ids.get(catalog_id, set())),
|
||||
}
|
||||
)
|
||||
return display_rows
|
||||
|
||||
|
||||
def build_display_lines(related_rows):
|
||||
lines = []
|
||||
for index, row in enumerate(sort_related_items(related_rows), start=1):
|
||||
lines.append(
|
||||
" [{index}] {purchase_date} | {line_total} | {raw_item_name} | {normalized_item_name} | "
|
||||
"{upc} | {retailer}".format(
|
||||
" [{index}] {raw_item_name} | {retailer} | {purchase_date} | {line_total} | {image_url}".format(
|
||||
index=index,
|
||||
raw_item_name=row.get("raw_item_name", ""),
|
||||
retailer=row.get("retailer", ""),
|
||||
purchase_date=row.get("purchase_date", ""),
|
||||
line_total=row.get("line_total", ""),
|
||||
raw_item_name=row.get("raw_item_name", ""),
|
||||
normalized_item_name=row.get("normalized_item_name", ""),
|
||||
upc=row.get("upc", ""),
|
||||
retailer=row.get("retailer", ""),
|
||||
image_url=row.get("image_url", ""),
|
||||
)
|
||||
)
|
||||
if row.get("image_url"):
|
||||
lines.append(f" {row['image_url']}")
|
||||
if not lines:
|
||||
lines.append(" [1] no matched item rows found")
|
||||
return lines
|
||||
@@ -216,7 +268,11 @@ def choose_existing_catalog(display_rows, normalized_name, matched_count):
|
||||
fg=INFO_COLOR,
|
||||
)
|
||||
for index, row in enumerate(display_rows, start=1):
|
||||
click.echo(f" [{index}] {row['catalog_name']} | {row['catalog_id']}")
|
||||
click.echo(
|
||||
f" [{index}] {row['catalog_name']}, {row.get('product_type', '')}, "
|
||||
f"{row.get('category', '')} ({row['linked_normalized_items']} items, "
|
||||
f"{row['linked_purchase_rows']} rows)"
|
||||
)
|
||||
choice = click.prompt(
|
||||
click.style("selection", fg=PROMPT_COLOR),
|
||||
type=click.IntRange(1, len(display_rows)),
|
||||
@@ -241,13 +297,16 @@ def choose_existing_catalog(display_rows, normalized_name, matched_count):
|
||||
|
||||
|
||||
def prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queue_index, queue_total):
|
||||
suggestions = build_catalog_suggestions(related_rows, purchase_rows, catalog_rows)
|
||||
suggestions = suggestion_display_rows(
|
||||
build_catalog_suggestions(related_rows, purchase_rows, catalog_rows),
|
||||
purchase_rows,
|
||||
catalog_rows,
|
||||
)
|
||||
normalized_name = normalized_label(queue_row, related_rows)
|
||||
matched_count = len(related_rows)
|
||||
click.echo("")
|
||||
click.secho(
|
||||
f"Review {queue_index}/{queue_total}: Resolve normalized_item {normalized_name} "
|
||||
"to catalog_name [__]?",
|
||||
f"Review {queue_index}/{queue_total}: {normalized_name}",
|
||||
fg=INFO_COLOR,
|
||||
)
|
||||
click.echo(f"{matched_count} matched items:")
|
||||
@@ -256,11 +315,34 @@ def prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queu
|
||||
if suggestions:
|
||||
click.echo(f"{len(suggestions)} catalog_name suggestions found:")
|
||||
for index, suggestion in enumerate(suggestions, start=1):
|
||||
click.echo(f" [{index}] {suggestion['catalog_name']}")
|
||||
click.echo(
|
||||
f" [{index}] {suggestion['catalog_name']}, {suggestion.get('product_type', '')}, "
|
||||
f"{suggestion.get('category', '')} ({suggestion['linked_normalized_items']} items, "
|
||||
f"{suggestion['linked_purchase_rows']} rows)"
|
||||
)
|
||||
else:
|
||||
click.echo("no catalog_name suggestions found")
|
||||
click.secho("[l]ink existing [n]ew catalog e[x]clude [s]kip [q]uit:", fg=PROMPT_COLOR)
|
||||
action = click.prompt("", type=click.Choice(["l", "n", "x", "s", "q"]), prompt_suffix=" ")
|
||||
prompt_bits = []
|
||||
if suggestions:
|
||||
prompt_bits.append("[#] link to suggestion")
|
||||
prompt_bits.extend(["[l]ink existing", "[n]ew", "[s]kip", "e[x]clude", "[q]uit"])
|
||||
click.secho(" ".join(prompt_bits) + " >", fg=PROMPT_COLOR)
|
||||
action = click.prompt("", type=str, prompt_suffix=" ").strip().lower()
|
||||
if action.isdigit() and suggestions:
|
||||
choice = int(action)
|
||||
if 1 <= choice <= len(suggestions):
|
||||
chosen_row = suggestions[choice - 1]
|
||||
notes = click.prompt(click.style("link notes", fg=PROMPT_COLOR), default="", show_default=False)
|
||||
return {
|
||||
"normalized_item_id": queue_row["normalized_item_id"],
|
||||
"catalog_id": chosen_row["catalog_id"],
|
||||
"resolution_action": "link",
|
||||
"status": "approved",
|
||||
"resolution_notes": notes,
|
||||
"reviewed_at": str(date.today()),
|
||||
}, None
|
||||
click.secho("invalid suggestion number", fg=WARNING_COLOR)
|
||||
return prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queue_index, queue_total)
|
||||
if action == "q":
|
||||
return None, None
|
||||
if action == "s":
|
||||
@@ -288,6 +370,10 @@ def prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queu
|
||||
"catalog_id": row["catalog_id"],
|
||||
"catalog_name": row["catalog_name"],
|
||||
"reason": "catalog sample",
|
||||
"product_type": row.get("product_type", ""),
|
||||
"category": row.get("category", ""),
|
||||
"linked_normalized_items": 0,
|
||||
"linked_purchase_rows": 0,
|
||||
}
|
||||
for row in catalog_rows[:10]
|
||||
if row.get("catalog_id")
|
||||
@@ -317,10 +403,13 @@ def prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queu
|
||||
"resolution_notes": notes,
|
||||
"reviewed_at": str(date.today()),
|
||||
}, None
|
||||
if action != "n":
|
||||
click.secho("invalid action", fg=WARNING_COLOR)
|
||||
return prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queue_index, queue_total)
|
||||
|
||||
catalog_name = click.prompt(click.style("catalog name", fg=PROMPT_COLOR), type=str)
|
||||
category = click.prompt(click.style("category", fg=PROMPT_COLOR), default="", show_default=False)
|
||||
product_type = click.prompt(click.style("product type", fg=PROMPT_COLOR), default="", show_default=False)
|
||||
category = click.prompt(click.style("category", fg=PROMPT_COLOR), default="", show_default=False)
|
||||
notes = click.prompt(click.style("notes", fg=PROMPT_COLOR), default="", show_default=False)
|
||||
catalog_id = stable_id("cat", f"manual|{catalog_name}|{category}|{product_type}")
|
||||
catalog_row = {
|
||||
@@ -349,17 +438,41 @@ def prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queu
|
||||
return resolution_row, catalog_row
|
||||
|
||||
|
||||
def apply_resolution_to_queue(queue_rows, resolution_lookup):
|
||||
today_text = str(date.today())
|
||||
updated_rows = []
|
||||
for row in queue_rows:
|
||||
resolution = resolution_lookup.get(row["normalized_item_id"], {})
|
||||
row_copy = dict(row)
|
||||
if resolution:
|
||||
row_copy["catalog_id"] = resolution.get("catalog_id", "")
|
||||
row_copy["status"] = resolution.get("status", row_copy.get("status", "pending"))
|
||||
row_copy["resolution_action"] = resolution.get("resolution_action", "")
|
||||
row_copy["resolution_notes"] = resolution.get("resolution_notes", "")
|
||||
row_copy["updated_at"] = resolution.get("reviewed_at", today_text)
|
||||
if resolution.get("status") == "approved":
|
||||
row_copy["created_at"] = row_copy.get("created_at") or resolution.get("reviewed_at", today_text)
|
||||
updated_rows.append(row_copy)
|
||||
return updated_rows
|
||||
|
||||
|
||||
def link_rows_from_state(link_lookup):
|
||||
return sorted(link_lookup.values(), key=lambda row: row["normalized_item_id"])
|
||||
|
||||
|
||||
@click.command()
|
||||
@click.option("--purchases-csv", default="data/review/purchases.csv", show_default=True)
|
||||
@click.option("--queue-csv", default="data/review/review_queue.csv", show_default=True)
|
||||
@click.option("--resolutions-csv", default="data/review/review_resolutions.csv", show_default=True)
|
||||
@click.option("--catalog-csv", default="data/catalog.csv", show_default=True)
|
||||
@click.option("--links-csv", default="data/review/product_links.csv", show_default=True)
|
||||
@click.option("--limit", default=0, show_default=True, type=int)
|
||||
@click.option("--refresh-only", is_flag=True, help="Only rebuild review_queue.csv without prompting.")
|
||||
def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, limit, refresh_only):
|
||||
def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, links_csv, limit, refresh_only):
|
||||
purchase_rows = build_purchases.read_optional_csv_rows(purchases_csv)
|
||||
resolution_rows = build_purchases.read_optional_csv_rows(resolutions_csv)
|
||||
catalog_rows = build_purchases.merge_catalog_rows(build_purchases.read_optional_csv_rows(catalog_csv), [])
|
||||
link_lookup = build_purchases.load_link_lookup(build_purchases.read_optional_csv_rows(links_csv))
|
||||
queue_rows = build_review_queue(purchase_rows, resolution_rows)
|
||||
write_csv_rows(queue_csv, queue_rows, QUEUE_FIELDS)
|
||||
click.echo(f"wrote {len(queue_rows)} rows to {queue_csv}")
|
||||
@@ -367,6 +480,7 @@ def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, limit, refresh_
|
||||
if refresh_only:
|
||||
return
|
||||
|
||||
print_intro_text()
|
||||
resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
|
||||
catalog_by_id = {row["catalog_id"]: row for row in catalog_rows if row.get("catalog_id")}
|
||||
rows_by_normalized = defaultdict(list)
|
||||
@@ -388,16 +502,38 @@ def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, limit, refresh_
|
||||
if catalog_row and catalog_row["catalog_id"] not in catalog_by_id:
|
||||
catalog_by_id[catalog_row["catalog_id"]] = catalog_row
|
||||
catalog_rows.append(catalog_row)
|
||||
normalized_item_id = resolution_row["normalized_item_id"]
|
||||
if resolution_row["status"] == "approved":
|
||||
if resolution_row["resolution_action"] in {"link", "create"} and resolution_row.get("catalog_id"):
|
||||
link_lookup[normalized_item_id] = {
|
||||
"normalized_item_id": normalized_item_id,
|
||||
"catalog_id": resolution_row["catalog_id"],
|
||||
"link_method": f"manual_{resolution_row['resolution_action']}",
|
||||
"link_confidence": "high",
|
||||
"review_status": "approved",
|
||||
"reviewed_by": "",
|
||||
"reviewed_at": resolution_row.get("reviewed_at", ""),
|
||||
"link_notes": resolution_row.get("resolution_notes", ""),
|
||||
}
|
||||
elif resolution_row["resolution_action"] == "exclude":
|
||||
link_lookup.pop(normalized_item_id, None)
|
||||
queue_rows = apply_resolution_to_queue(queue_rows, resolution_lookup)
|
||||
write_csv_rows(queue_csv, queue_rows, QUEUE_FIELDS)
|
||||
save_resolution_rows(
|
||||
resolutions_csv,
|
||||
sorted(resolution_lookup.values(), key=lambda row: row["normalized_item_id"]),
|
||||
)
|
||||
save_catalog_rows(catalog_csv, sorted(catalog_by_id.values(), key=lambda row: row["catalog_id"]))
|
||||
save_link_rows(links_csv, link_rows_from_state(link_lookup))
|
||||
reviewed += 1
|
||||
|
||||
save_resolution_rows(
|
||||
resolutions_csv,
|
||||
sorted(resolution_lookup.values(), key=lambda row: row["normalized_item_id"]),
|
||||
)
|
||||
save_resolution_rows(resolutions_csv, sorted(resolution_lookup.values(), key=lambda row: row["normalized_item_id"]))
|
||||
save_catalog_rows(catalog_csv, sorted(catalog_by_id.values(), key=lambda row: row["catalog_id"]))
|
||||
save_link_rows(links_csv, link_rows_from_state(link_lookup))
|
||||
click.echo(
|
||||
f"saved {len(resolution_lookup)} resolution rows to {resolutions_csv} "
|
||||
f"and {len(catalog_by_id)} catalog rows to {catalog_csv}"
|
||||
f"saved {len(resolution_lookup)} resolution rows to {resolutions_csv}, "
|
||||
f"{len(catalog_by_id)} catalog rows to {catalog_csv}, "
|
||||
f"and {len(link_lookup)} product links to {links_csv}"
|
||||
)
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user