Improve product review display workflow

This commit is contained in:
ben
2026-03-17 12:25:47 -04:00
parent e494386e64
commit 7b8141cd42
5 changed files with 372 additions and 46 deletions

View File

@@ -22,6 +22,7 @@ PURCHASE_FIELDS = [
"resolution_action",
"raw_item_name",
"normalized_item_name",
"image_url",
"retailer_item_id",
"upc",
"qty",
@@ -280,6 +281,7 @@ def build_purchase_rows(
"resolution_action": resolution.get("resolution_action", ""),
"raw_item_name": row["item_name"],
"normalized_item_name": row["item_name_norm"],
"image_url": row.get("image_url", ""),
"retailer_item_id": row["retailer_item_id"],
"upc": row["upc"],
"qty": row["qty"],

View File

@@ -367,7 +367,7 @@
- commit: `c7dad54` on branch `cx`
- tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python build_purchases.py`; `./venv/bin/python review_products.py --refresh-only`; verified `combined_output/review_queue.csv`, `combined_output/review_resolutions.csv` workflow, and `combined_output/canonical_catalog.csv`
- date: 2026-03-16
* [ ] t1.12: simplify review process display
* [X] t1.12: simplify review process display
Clearly show current state separate from proposed future state.
** acceptance criteria
1. Display position in review queue, e.g., (1/22)
@@ -386,10 +386,13 @@ Clearly show current state separate from proposed future state.
** evidence
- commit:
- tests:
- date:
- tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python review_products.py --help`; verified review prompt now shows queue position, matched item history, image URLs when present, and deterministic canonical suggestions
- date: 2026-03-17
** notes
- The big win here was clarifying terminology in the prompt itself: the reviewer is resolving one observed product group to a canonical_name, not linking raw rows to each other.
- Showing the full matched-item list by descending date made the review context much more legible than the old compact summary fields.
- Deterministic suggestions help, but they are intentionally conservative and shallow; this improves reviewer speed without pretending to solve product matching automatically.
* [ ] t1.10: add optional llm-assisted suggestion workflow for unresolved products (2-4 commits)

View File

@@ -1,6 +1,5 @@
from collections import defaultdict
from datetime import date
from pathlib import Path
import click
@@ -99,17 +98,140 @@ def save_catalog_rows(path, rows):
write_csv_rows(path, rows, build_purchases.CATALOG_FIELDS)
def prompt_resolution(queue_row, catalog_rows):
INFO_COLOR = "cyan"
PROMPT_COLOR = "bright_yellow"
WARNING_COLOR = "magenta"
def sort_related_items(rows):
return sorted(
rows,
key=lambda row: (
row.get("purchase_date", ""),
row.get("order_id", ""),
int(row.get("line_no", "0") or "0"),
),
reverse=True,
)
def build_canonical_suggestions(related_rows, catalog_rows, limit=3):
normalized_names = {
row.get("normalized_item_name", "").strip().upper()
for row in related_rows
if row.get("normalized_item_name", "").strip()
}
upcs = {
row.get("upc", "").strip()
for row in related_rows
if row.get("upc", "").strip()
}
suggestions = []
seen_ids = set()
def add_matches(rows, reason):
for row in rows:
canonical_product_id = row.get("canonical_product_id", "")
if not canonical_product_id or canonical_product_id in seen_ids:
continue
seen_ids.add(canonical_product_id)
suggestions.append(
{
"canonical_product_id": canonical_product_id,
"canonical_name": row.get("canonical_name", ""),
"reason": reason,
}
)
if len(suggestions) >= limit:
return True
return False
exact_upc_rows = [
row
for row in catalog_rows
if row.get("upc", "").strip() and row.get("upc", "").strip() in upcs
]
if add_matches(exact_upc_rows, "exact upc"):
return suggestions
exact_name_rows = [
row
for row in catalog_rows
if row.get("canonical_name", "").strip().upper() in normalized_names
]
if add_matches(exact_name_rows, "exact normalized name"):
return suggestions
contains_rows = []
for row in catalog_rows:
canonical_name = row.get("canonical_name", "").strip().upper()
if not canonical_name:
continue
for normalized_name in normalized_names:
if normalized_name in canonical_name or canonical_name in normalized_name:
contains_rows.append(row)
break
add_matches(contains_rows, "canonical name contains match")
return suggestions
def build_display_lines(queue_row, related_rows):
lines = []
for row in sort_related_items(related_rows):
lines.append(
" - {purchase_date} | {line_total} | {raw_item_name} | {normalized_item_name} | "
"{upc} | {retailer}".format(
purchase_date=row.get("purchase_date", ""),
line_total=row.get("line_total", ""),
raw_item_name=row.get("raw_item_name", ""),
normalized_item_name=row.get("normalized_item_name", ""),
upc=row.get("upc", ""),
retailer=row.get("retailer", ""),
)
)
if row.get("image_url"):
lines.append(f" image: {row['image_url']}")
if not lines:
lines.append(" - no matched item rows found")
return lines
def prompt_resolution(queue_row, related_rows, catalog_rows, queue_index, queue_total):
suggestions = build_canonical_suggestions(related_rows, catalog_rows)
click.echo("")
click.secho(
f"Review observed product ({queue_index}/{queue_total})",
fg=INFO_COLOR,
)
click.echo(
"Resolve this observed product group to an existing canonical_name, "
"a new canonical_name, exclude it, or skip it."
)
click.echo(f"observed_product_id: {queue_row['observed_product_id']}")
click.echo(f"retailer: {queue_row['retailer']}")
click.echo(f"raw names: {queue_row['raw_item_names']}")
click.echo(f"normalized names: {queue_row['normalized_names']}")
click.echo(f"upcs: {queue_row['upc_values']}")
click.echo(f"example prices: {queue_row['example_prices']}")
click.echo(f"seen count: {queue_row['seen_count']}")
click.echo("actions: [l]ink existing [n]ew canonical [x]exclude [s]kip [q]uit")
action = click.prompt("action", type=click.Choice(["l", "n", "x", "s", "q"]))
click.echo(f"observed_name(s): {queue_row['normalized_names']}")
click.echo(f"upc(s): {queue_row['upc_values']}")
click.echo(f"seen_count: {queue_row['seen_count']}")
click.secho("matched items:", fg=INFO_COLOR)
for line in build_display_lines(queue_row, related_rows):
click.echo(line)
if suggestions:
click.secho("suggested canonical_name values:", fg=INFO_COLOR)
for suggestion in suggestions:
click.echo(
f" - {suggestion['canonical_product_id']} | {suggestion['canonical_name']} "
f"({suggestion['reason']})"
)
else:
click.secho("no deterministic canonical suggestions found", fg=WARNING_COLOR)
click.secho(
"actions: [l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit",
fg=PROMPT_COLOR,
)
action = click.prompt(
click.style("action", fg=PROMPT_COLOR),
type=click.Choice(["l", "n", "x", "s", "q"]),
)
if action == "q":
return None, None
if action == "s":
@@ -122,7 +244,11 @@ def prompt_resolution(queue_row, catalog_rows):
"reviewed_at": str(date.today()),
}, None
if action == "x":
notes = click.prompt("exclude notes", default="", show_default=False)
notes = click.prompt(
click.style("exclude notes", fg=PROMPT_COLOR),
default="",
show_default=False,
)
return {
"observed_product_id": queue_row["observed_product_id"],
"canonical_product_id": "",
@@ -132,11 +258,28 @@ def prompt_resolution(queue_row, catalog_rows):
"reviewed_at": str(date.today()),
}, None
if action == "l":
click.echo("existing canonicals:")
for row in catalog_rows[:10]:
click.echo(f" {row['canonical_product_id']} {row['canonical_name']}")
canonical_product_id = click.prompt("canonical product id", type=str)
notes = click.prompt("link notes", default="", show_default=False)
display_rows = suggestions or [
{
"canonical_product_id": row["canonical_product_id"],
"canonical_name": row["canonical_name"],
"reason": "catalog sample",
}
for row in catalog_rows[:10]
]
click.secho("existing canonical_name values:", fg=INFO_COLOR)
for row in display_rows:
click.echo(
f" - {row['canonical_product_id']} | {row['canonical_name']} ({row['reason']})"
)
canonical_product_id = click.prompt(
click.style("canonical product id", fg=PROMPT_COLOR),
type=str,
)
notes = click.prompt(
click.style("link notes", fg=PROMPT_COLOR),
default="",
show_default=False,
)
return {
"observed_product_id": queue_row["observed_product_id"],
"canonical_product_id": canonical_product_id,
@@ -146,10 +289,22 @@ def prompt_resolution(queue_row, catalog_rows):
"reviewed_at": str(date.today()),
}, None
canonical_name = click.prompt("canonical name", type=str)
category = click.prompt("category", default="", show_default=False)
product_type = click.prompt("product type", default="", show_default=False)
notes = click.prompt("notes", default="", show_default=False)
canonical_name = click.prompt(click.style("canonical name", fg=PROMPT_COLOR), type=str)
category = click.prompt(
click.style("category", fg=PROMPT_COLOR),
default="",
show_default=False,
)
product_type = click.prompt(
click.style("product type", fg=PROMPT_COLOR),
default="",
show_default=False,
)
notes = click.prompt(
click.style("notes", fg=PROMPT_COLOR),
default="",
show_default=False,
)
canonical_product_id = stable_id("gcan", f"manual|{canonical_name}|{category}|{product_type}")
canonical_row = {
"canonical_product_id": canonical_product_id,
@@ -197,11 +352,17 @@ def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, limit, refresh_
resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
catalog_by_id = {row["canonical_product_id"]: row for row in catalog_rows if row.get("canonical_product_id")}
rows_by_observed = defaultdict(list)
for row in purchase_rows:
observed_product_id = row.get("observed_product_id", "")
if observed_product_id:
rows_by_observed[observed_product_id].append(row)
reviewed = 0
for queue_row in queue_rows:
for index, queue_row in enumerate(queue_rows, start=1):
if limit and reviewed >= limit:
break
result = prompt_resolution(queue_row, catalog_rows)
related_rows = rows_by_observed.get(queue_row["observed_product_id"], [])
result = prompt_resolution(queue_row, related_rows, catalog_rows, index, len(queue_rows))
if result == (None, None):
break
resolution_row, canonical_row = result

View File

@@ -41,6 +41,7 @@ class PurchaseLogTests(unittest.TestCase):
"order_date": "2026-03-01",
"item_name": "FRESH BANANA",
"item_name_norm": "BANANA",
"image_url": "https://example.test/banana.jpg",
"retailer_item_id": "100",
"upc": "4011",
"qty": "1",
@@ -99,24 +100,18 @@ class PurchaseLogTests(unittest.TestCase):
}
]
<<<<<<< HEAD
rows, _observed, _canon, _links = build_purchases.build_purchase_rows(
=======
rows = build_purchases.build_purchase_rows(
>>>>>>> be1bf63 (Build pivot-ready purchase log)
[giant_row],
[costco_row],
giant_orders,
costco_orders,
<<<<<<< HEAD
[],
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
)
self.assertEqual(2, len(rows))
self.assertTrue(all(row["canonical_product_id"] for row in rows))
self.assertEqual({"giant", "costco"}, {row["retailer"] for row in rows})
self.assertEqual("https://example.test/banana.jpg", rows[0]["image_url"])
def test_main_writes_purchase_and_example_csvs(self):
with tempfile.TemporaryDirectory() as tmpdir:
@@ -124,11 +119,13 @@ class PurchaseLogTests(unittest.TestCase):
costco_items = Path(tmpdir) / "costco_items.csv"
giant_orders = Path(tmpdir) / "giant_orders.csv"
costco_orders = Path(tmpdir) / "costco_orders.csv"
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
links_csv = Path(tmpdir) / "product_links.csv"
purchases_csv = Path(tmpdir) / "combined" / "purchases.csv"
examples_csv = Path(tmpdir) / "combined" / "comparison_examples.csv"
fieldnames = enrich_costco.OUTPUT_FIELDS
rows = []
giant_row = {field: "" for field in fieldnames}
giant_row.update(
{
@@ -178,7 +175,6 @@ class PurchaseLogTests(unittest.TestCase):
"is_fee": "false",
}
)
rows.extend([giant_row, costco_row])
for path, source_rows in [
(giant_items, [giant_row]),
@@ -189,12 +185,35 @@ class PurchaseLogTests(unittest.TestCase):
writer.writeheader()
writer.writerows(source_rows)
order_fields = ["order_id", "store_name", "store_number", "store_city", "store_state"]
for path, source_rows in [
(giant_orders, [{"order_id": "g1", "store_name": "Giant", "store_number": "42", "store_city": "Springfield", "store_state": "VA"}]),
(costco_orders, [{"order_id": "c1", "store_name": "MT VERNON", "store_number": "1115", "store_city": "ALEXANDRIA", "store_state": "VA"}]),
(
giant_orders,
[
{
"order_id": "g1",
"store_name": "Giant",
"store_number": "42",
"store_city": "Springfield",
"store_state": "VA",
}
],
),
(
costco_orders,
[
{
"order_id": "c1",
"store_name": "MT VERNON",
"store_number": "1115",
"store_city": "ALEXANDRIA",
"store_state": "VA",
}
],
),
]:
with path.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=["order_id", "store_name", "store_number", "store_city", "store_state"])
writer = csv.DictWriter(handle, fieldnames=order_fields)
writer.writeheader()
writer.writerows(source_rows)
@@ -203,12 +222,9 @@ class PurchaseLogTests(unittest.TestCase):
costco_items_enriched_csv=str(costco_items),
giant_orders_csv=str(giant_orders),
costco_orders_csv=str(costco_orders),
<<<<<<< HEAD
resolutions_csv=str(Path(tmpdir) / "review_resolutions.csv"),
catalog_csv=str(Path(tmpdir) / "canonical_catalog.csv"),
links_csv=str(Path(tmpdir) / "product_links.csv"),
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
resolutions_csv=str(resolutions_csv),
catalog_csv=str(catalog_csv),
links_csv=str(links_csv),
output_csv=str(purchases_csv),
examples_csv=str(examples_csv),
)
@@ -222,7 +238,6 @@ class PurchaseLogTests(unittest.TestCase):
self.assertEqual(2, len(purchase_rows))
self.assertEqual(1, len(example_rows))
<<<<<<< HEAD
def test_build_purchase_rows_applies_manual_resolution(self):
fieldnames = enrich_costco.OUTPUT_FIELDS
giant_row = {field: "" for field in fieldnames}
@@ -255,7 +270,15 @@ class PurchaseLogTests(unittest.TestCase):
rows, _observed, _canon, _links = build_purchases.build_purchase_rows(
[giant_row],
[],
[{"order_id": "g1", "store_name": "Giant", "store_number": "42", "store_city": "Springfield", "store_state": "VA"}],
[
{
"order_id": "g1",
"store_name": "Giant",
"store_number": "42",
"store_city": "Springfield",
"store_state": "VA",
}
],
[],
[
{
@@ -273,8 +296,6 @@ class PurchaseLogTests(unittest.TestCase):
self.assertEqual("approved", rows[0]["review_status"])
self.assertEqual("create", rows[0]["resolution_action"])
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
if __name__ == "__main__":
unittest.main()

View File

@@ -4,6 +4,8 @@ import unittest
from pathlib import Path
from unittest import mock
from click.testing import CliRunner
import review_products
@@ -37,6 +39,135 @@ class ReviewWorkflowTests(unittest.TestCase):
self.assertEqual("gobs_1", queue_rows[0]["observed_product_id"])
self.assertIn("SB BAGGED ICE 20LB", queue_rows[0]["raw_item_names"])
def test_build_canonical_suggestions_prefers_upc_then_name(self):
suggestions = review_products.build_canonical_suggestions(
[
{
"normalized_item_name": "MIXED PEPPER",
"upc": "12345",
}
],
[
{
"canonical_product_id": "gcan_1",
"canonical_name": "MIXED PEPPER",
"upc": "",
},
{
"canonical_product_id": "gcan_2",
"canonical_name": "MIXED PEPPER 6 PACK",
"upc": "12345",
},
],
)
self.assertEqual("gcan_2", suggestions[0]["canonical_product_id"])
self.assertEqual("exact upc", suggestions[0]["reason"])
self.assertEqual("gcan_1", suggestions[1]["canonical_product_id"])
def test_review_products_displays_position_items_and_suggestions(self):
with tempfile.TemporaryDirectory() as tmpdir:
purchases_csv = Path(tmpdir) / "purchases.csv"
queue_csv = Path(tmpdir) / "review_queue.csv"
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
purchase_fields = [
"purchase_date",
"retailer",
"order_id",
"line_no",
"observed_product_id",
"canonical_product_id",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
]
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=purchase_fields)
writer.writeheader()
writer.writerows(
[
{
"purchase_date": "2026-03-14",
"retailer": "costco",
"order_id": "c2",
"line_no": "2",
"observed_product_id": "gobs_mix",
"canonical_product_id": "",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "7.49",
},
{
"purchase_date": "2026-03-12",
"retailer": "costco",
"order_id": "c1",
"line_no": "1",
"observed_product_id": "gobs_mix",
"canonical_product_id": "",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "https://example.test/mixed-pepper.jpg",
"upc": "",
"line_total": "6.99",
},
]
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
writer.writeheader()
writer.writerow(
{
"canonical_product_id": "gcan_mix",
"canonical_name": "MIXED PEPPER",
"category": "produce",
"product_type": "pepper",
"brand": "",
"variant": "",
"size_value": "",
"size_unit": "",
"pack_qty": "",
"measure_type": "",
"notes": "",
"created_at": "",
"updated_at": "",
}
)
runner = CliRunner()
result = runner.invoke(
review_products.main,
[
"--purchases-csv",
str(purchases_csv),
"--queue-csv",
str(queue_csv),
"--resolutions-csv",
str(resolutions_csv),
"--catalog-csv",
str(catalog_csv),
],
input="q\n",
color=True,
)
self.assertEqual(0, result.exit_code)
self.assertIn("Review observed product (1/1)", result.output)
self.assertIn("Resolve this observed product group", result.output)
self.assertIn("actions: [l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit", result.output)
first_item = result.output.index("2026-03-14 | 7.49")
second_item = result.output.index("2026-03-12 | 6.99")
self.assertLess(first_item, second_item)
self.assertIn("image: https://example.test/mixed-pepper.jpg", result.output)
self.assertIn("gcan_mix | MIXED PEPPER (exact normalized name)", result.output)
self.assertIn("\x1b[", result.output)
def test_review_products_creates_canonical_and_resolution(self):
with tempfile.TemporaryDirectory() as tmpdir:
purchases_csv = Path(tmpdir) / "purchases.csv"
@@ -48,25 +179,33 @@ class ReviewWorkflowTests(unittest.TestCase):
writer = csv.DictWriter(
handle,
fieldnames=[
"purchase_date",
"observed_product_id",
"canonical_product_id",
"retailer",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
"order_id",
"line_no",
],
)
writer.writeheader()
writer.writerow(
{
"purchase_date": "2026-03-15",
"observed_product_id": "gobs_ice",
"canonical_product_id": "",
"retailer": "giant",
"raw_item_name": "SB BAGGED ICE 20LB",
"normalized_item_name": "BAGGED ICE",
"image_url": "",
"upc": "",
"line_total": "3.50",
"order_id": "g1",
"line_no": "1",
}
)