Improve product review display workflow
This commit is contained in:
@@ -1,6 +1,5 @@
|
||||
from collections import defaultdict
|
||||
from datetime import date
|
||||
from pathlib import Path
|
||||
|
||||
import click
|
||||
|
||||
@@ -99,17 +98,140 @@ def save_catalog_rows(path, rows):
|
||||
write_csv_rows(path, rows, build_purchases.CATALOG_FIELDS)
|
||||
|
||||
|
||||
def prompt_resolution(queue_row, catalog_rows):
|
||||
INFO_COLOR = "cyan"
|
||||
PROMPT_COLOR = "bright_yellow"
|
||||
WARNING_COLOR = "magenta"
|
||||
|
||||
|
||||
def sort_related_items(rows):
|
||||
return sorted(
|
||||
rows,
|
||||
key=lambda row: (
|
||||
row.get("purchase_date", ""),
|
||||
row.get("order_id", ""),
|
||||
int(row.get("line_no", "0") or "0"),
|
||||
),
|
||||
reverse=True,
|
||||
)
|
||||
|
||||
|
||||
def build_canonical_suggestions(related_rows, catalog_rows, limit=3):
|
||||
normalized_names = {
|
||||
row.get("normalized_item_name", "").strip().upper()
|
||||
for row in related_rows
|
||||
if row.get("normalized_item_name", "").strip()
|
||||
}
|
||||
upcs = {
|
||||
row.get("upc", "").strip()
|
||||
for row in related_rows
|
||||
if row.get("upc", "").strip()
|
||||
}
|
||||
suggestions = []
|
||||
seen_ids = set()
|
||||
|
||||
def add_matches(rows, reason):
|
||||
for row in rows:
|
||||
canonical_product_id = row.get("canonical_product_id", "")
|
||||
if not canonical_product_id or canonical_product_id in seen_ids:
|
||||
continue
|
||||
seen_ids.add(canonical_product_id)
|
||||
suggestions.append(
|
||||
{
|
||||
"canonical_product_id": canonical_product_id,
|
||||
"canonical_name": row.get("canonical_name", ""),
|
||||
"reason": reason,
|
||||
}
|
||||
)
|
||||
if len(suggestions) >= limit:
|
||||
return True
|
||||
return False
|
||||
|
||||
exact_upc_rows = [
|
||||
row
|
||||
for row in catalog_rows
|
||||
if row.get("upc", "").strip() and row.get("upc", "").strip() in upcs
|
||||
]
|
||||
if add_matches(exact_upc_rows, "exact upc"):
|
||||
return suggestions
|
||||
|
||||
exact_name_rows = [
|
||||
row
|
||||
for row in catalog_rows
|
||||
if row.get("canonical_name", "").strip().upper() in normalized_names
|
||||
]
|
||||
if add_matches(exact_name_rows, "exact normalized name"):
|
||||
return suggestions
|
||||
|
||||
contains_rows = []
|
||||
for row in catalog_rows:
|
||||
canonical_name = row.get("canonical_name", "").strip().upper()
|
||||
if not canonical_name:
|
||||
continue
|
||||
for normalized_name in normalized_names:
|
||||
if normalized_name in canonical_name or canonical_name in normalized_name:
|
||||
contains_rows.append(row)
|
||||
break
|
||||
add_matches(contains_rows, "canonical name contains match")
|
||||
return suggestions
|
||||
|
||||
|
||||
def build_display_lines(queue_row, related_rows):
|
||||
lines = []
|
||||
for row in sort_related_items(related_rows):
|
||||
lines.append(
|
||||
" - {purchase_date} | {line_total} | {raw_item_name} | {normalized_item_name} | "
|
||||
"{upc} | {retailer}".format(
|
||||
purchase_date=row.get("purchase_date", ""),
|
||||
line_total=row.get("line_total", ""),
|
||||
raw_item_name=row.get("raw_item_name", ""),
|
||||
normalized_item_name=row.get("normalized_item_name", ""),
|
||||
upc=row.get("upc", ""),
|
||||
retailer=row.get("retailer", ""),
|
||||
)
|
||||
)
|
||||
if row.get("image_url"):
|
||||
lines.append(f" image: {row['image_url']}")
|
||||
if not lines:
|
||||
lines.append(" - no matched item rows found")
|
||||
return lines
|
||||
|
||||
|
||||
def prompt_resolution(queue_row, related_rows, catalog_rows, queue_index, queue_total):
|
||||
suggestions = build_canonical_suggestions(related_rows, catalog_rows)
|
||||
click.echo("")
|
||||
click.secho(
|
||||
f"Review observed product ({queue_index}/{queue_total})",
|
||||
fg=INFO_COLOR,
|
||||
)
|
||||
click.echo(
|
||||
"Resolve this observed product group to an existing canonical_name, "
|
||||
"a new canonical_name, exclude it, or skip it."
|
||||
)
|
||||
click.echo(f"observed_product_id: {queue_row['observed_product_id']}")
|
||||
click.echo(f"retailer: {queue_row['retailer']}")
|
||||
click.echo(f"raw names: {queue_row['raw_item_names']}")
|
||||
click.echo(f"normalized names: {queue_row['normalized_names']}")
|
||||
click.echo(f"upcs: {queue_row['upc_values']}")
|
||||
click.echo(f"example prices: {queue_row['example_prices']}")
|
||||
click.echo(f"seen count: {queue_row['seen_count']}")
|
||||
click.echo("actions: [l]ink existing [n]ew canonical [x]exclude [s]kip [q]uit")
|
||||
action = click.prompt("action", type=click.Choice(["l", "n", "x", "s", "q"]))
|
||||
click.echo(f"observed_name(s): {queue_row['normalized_names']}")
|
||||
click.echo(f"upc(s): {queue_row['upc_values']}")
|
||||
click.echo(f"seen_count: {queue_row['seen_count']}")
|
||||
click.secho("matched items:", fg=INFO_COLOR)
|
||||
for line in build_display_lines(queue_row, related_rows):
|
||||
click.echo(line)
|
||||
if suggestions:
|
||||
click.secho("suggested canonical_name values:", fg=INFO_COLOR)
|
||||
for suggestion in suggestions:
|
||||
click.echo(
|
||||
f" - {suggestion['canonical_product_id']} | {suggestion['canonical_name']} "
|
||||
f"({suggestion['reason']})"
|
||||
)
|
||||
else:
|
||||
click.secho("no deterministic canonical suggestions found", fg=WARNING_COLOR)
|
||||
click.secho(
|
||||
"actions: [l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit",
|
||||
fg=PROMPT_COLOR,
|
||||
)
|
||||
action = click.prompt(
|
||||
click.style("action", fg=PROMPT_COLOR),
|
||||
type=click.Choice(["l", "n", "x", "s", "q"]),
|
||||
)
|
||||
if action == "q":
|
||||
return None, None
|
||||
if action == "s":
|
||||
@@ -122,7 +244,11 @@ def prompt_resolution(queue_row, catalog_rows):
|
||||
"reviewed_at": str(date.today()),
|
||||
}, None
|
||||
if action == "x":
|
||||
notes = click.prompt("exclude notes", default="", show_default=False)
|
||||
notes = click.prompt(
|
||||
click.style("exclude notes", fg=PROMPT_COLOR),
|
||||
default="",
|
||||
show_default=False,
|
||||
)
|
||||
return {
|
||||
"observed_product_id": queue_row["observed_product_id"],
|
||||
"canonical_product_id": "",
|
||||
@@ -132,11 +258,28 @@ def prompt_resolution(queue_row, catalog_rows):
|
||||
"reviewed_at": str(date.today()),
|
||||
}, None
|
||||
if action == "l":
|
||||
click.echo("existing canonicals:")
|
||||
for row in catalog_rows[:10]:
|
||||
click.echo(f" {row['canonical_product_id']} {row['canonical_name']}")
|
||||
canonical_product_id = click.prompt("canonical product id", type=str)
|
||||
notes = click.prompt("link notes", default="", show_default=False)
|
||||
display_rows = suggestions or [
|
||||
{
|
||||
"canonical_product_id": row["canonical_product_id"],
|
||||
"canonical_name": row["canonical_name"],
|
||||
"reason": "catalog sample",
|
||||
}
|
||||
for row in catalog_rows[:10]
|
||||
]
|
||||
click.secho("existing canonical_name values:", fg=INFO_COLOR)
|
||||
for row in display_rows:
|
||||
click.echo(
|
||||
f" - {row['canonical_product_id']} | {row['canonical_name']} ({row['reason']})"
|
||||
)
|
||||
canonical_product_id = click.prompt(
|
||||
click.style("canonical product id", fg=PROMPT_COLOR),
|
||||
type=str,
|
||||
)
|
||||
notes = click.prompt(
|
||||
click.style("link notes", fg=PROMPT_COLOR),
|
||||
default="",
|
||||
show_default=False,
|
||||
)
|
||||
return {
|
||||
"observed_product_id": queue_row["observed_product_id"],
|
||||
"canonical_product_id": canonical_product_id,
|
||||
@@ -146,10 +289,22 @@ def prompt_resolution(queue_row, catalog_rows):
|
||||
"reviewed_at": str(date.today()),
|
||||
}, None
|
||||
|
||||
canonical_name = click.prompt("canonical name", type=str)
|
||||
category = click.prompt("category", default="", show_default=False)
|
||||
product_type = click.prompt("product type", default="", show_default=False)
|
||||
notes = click.prompt("notes", default="", show_default=False)
|
||||
canonical_name = click.prompt(click.style("canonical name", fg=PROMPT_COLOR), type=str)
|
||||
category = click.prompt(
|
||||
click.style("category", fg=PROMPT_COLOR),
|
||||
default="",
|
||||
show_default=False,
|
||||
)
|
||||
product_type = click.prompt(
|
||||
click.style("product type", fg=PROMPT_COLOR),
|
||||
default="",
|
||||
show_default=False,
|
||||
)
|
||||
notes = click.prompt(
|
||||
click.style("notes", fg=PROMPT_COLOR),
|
||||
default="",
|
||||
show_default=False,
|
||||
)
|
||||
canonical_product_id = stable_id("gcan", f"manual|{canonical_name}|{category}|{product_type}")
|
||||
canonical_row = {
|
||||
"canonical_product_id": canonical_product_id,
|
||||
@@ -197,11 +352,17 @@ def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, limit, refresh_
|
||||
|
||||
resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
|
||||
catalog_by_id = {row["canonical_product_id"]: row for row in catalog_rows if row.get("canonical_product_id")}
|
||||
rows_by_observed = defaultdict(list)
|
||||
for row in purchase_rows:
|
||||
observed_product_id = row.get("observed_product_id", "")
|
||||
if observed_product_id:
|
||||
rows_by_observed[observed_product_id].append(row)
|
||||
reviewed = 0
|
||||
for queue_row in queue_rows:
|
||||
for index, queue_row in enumerate(queue_rows, start=1):
|
||||
if limit and reviewed >= limit:
|
||||
break
|
||||
result = prompt_resolution(queue_row, catalog_rows)
|
||||
related_rows = rows_by_observed.get(queue_row["observed_product_id"], [])
|
||||
result = prompt_resolution(queue_row, related_rows, catalog_rows, index, len(queue_rows))
|
||||
if result == (None, None):
|
||||
break
|
||||
resolution_row, canonical_row = result
|
||||
|
||||
Reference in New Issue
Block a user