From 7b8141cd4243132d1c1cb4298de6d54955c4da41 Mon Sep 17 00:00:00 2001 From: ben Date: Tue, 17 Mar 2026 12:25:47 -0400 Subject: [PATCH] Improve product review display workflow --- build_purchases.py | 2 + pm/tasks.org | 9 +- review_products.py | 203 ++++++++++++++++++++++++++++++---- tests/test_purchases.py | 65 +++++++---- tests/test_review_workflow.py | 139 +++++++++++++++++++++++ 5 files changed, 372 insertions(+), 46 deletions(-) diff --git a/build_purchases.py b/build_purchases.py index 4f4996f..a6781d9 100644 --- a/build_purchases.py +++ b/build_purchases.py @@ -22,6 +22,7 @@ PURCHASE_FIELDS = [ "resolution_action", "raw_item_name", "normalized_item_name", + "image_url", "retailer_item_id", "upc", "qty", @@ -280,6 +281,7 @@ def build_purchase_rows( "resolution_action": resolution.get("resolution_action", ""), "raw_item_name": row["item_name"], "normalized_item_name": row["item_name_norm"], + "image_url": row.get("image_url", ""), "retailer_item_id": row["retailer_item_id"], "upc": row["upc"], "qty": row["qty"], diff --git a/pm/tasks.org b/pm/tasks.org index f571310..990eff5 100644 --- a/pm/tasks.org +++ b/pm/tasks.org @@ -367,7 +367,7 @@ - commit: `c7dad54` on branch `cx` - tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python build_purchases.py`; `./venv/bin/python review_products.py --refresh-only`; verified `combined_output/review_queue.csv`, `combined_output/review_resolutions.csv` workflow, and `combined_output/canonical_catalog.csv` - date: 2026-03-16 -* [ ] t1.12: simplify review process display +* [X] t1.12: simplify review process display Clearly show current state separate from proposed future state. ** acceptance criteria 1. Display position in review queue, e.g., (1/22) @@ -386,10 +386,13 @@ Clearly show current state separate from proposed future state. ** evidence - commit: -- tests: -- date: +- tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python review_products.py --help`; verified review prompt now shows queue position, matched item history, image URLs when present, and deterministic canonical suggestions +- date: 2026-03-17 ** notes +- The big win here was clarifying terminology in the prompt itself: the reviewer is resolving one observed product group to a canonical_name, not linking raw rows to each other. +- Showing the full matched-item list by descending date made the review context much more legible than the old compact summary fields. +- Deterministic suggestions help, but they are intentionally conservative and shallow; this improves reviewer speed without pretending to solve product matching automatically. * [ ] t1.10: add optional llm-assisted suggestion workflow for unresolved products (2-4 commits) diff --git a/review_products.py b/review_products.py index 90ee3e1..bd41fec 100644 --- a/review_products.py +++ b/review_products.py @@ -1,6 +1,5 @@ from collections import defaultdict from datetime import date -from pathlib import Path import click @@ -99,17 +98,140 @@ def save_catalog_rows(path, rows): write_csv_rows(path, rows, build_purchases.CATALOG_FIELDS) -def prompt_resolution(queue_row, catalog_rows): +INFO_COLOR = "cyan" +PROMPT_COLOR = "bright_yellow" +WARNING_COLOR = "magenta" + + +def sort_related_items(rows): + return sorted( + rows, + key=lambda row: ( + row.get("purchase_date", ""), + row.get("order_id", ""), + int(row.get("line_no", "0") or "0"), + ), + reverse=True, + ) + + +def build_canonical_suggestions(related_rows, catalog_rows, limit=3): + normalized_names = { + row.get("normalized_item_name", "").strip().upper() + for row in related_rows + if row.get("normalized_item_name", "").strip() + } + upcs = { + row.get("upc", "").strip() + for row in related_rows + if row.get("upc", "").strip() + } + suggestions = [] + seen_ids = set() + + def add_matches(rows, reason): + for row in rows: + canonical_product_id = row.get("canonical_product_id", "") + if not canonical_product_id or canonical_product_id in seen_ids: + continue + seen_ids.add(canonical_product_id) + suggestions.append( + { + "canonical_product_id": canonical_product_id, + "canonical_name": row.get("canonical_name", ""), + "reason": reason, + } + ) + if len(suggestions) >= limit: + return True + return False + + exact_upc_rows = [ + row + for row in catalog_rows + if row.get("upc", "").strip() and row.get("upc", "").strip() in upcs + ] + if add_matches(exact_upc_rows, "exact upc"): + return suggestions + + exact_name_rows = [ + row + for row in catalog_rows + if row.get("canonical_name", "").strip().upper() in normalized_names + ] + if add_matches(exact_name_rows, "exact normalized name"): + return suggestions + + contains_rows = [] + for row in catalog_rows: + canonical_name = row.get("canonical_name", "").strip().upper() + if not canonical_name: + continue + for normalized_name in normalized_names: + if normalized_name in canonical_name or canonical_name in normalized_name: + contains_rows.append(row) + break + add_matches(contains_rows, "canonical name contains match") + return suggestions + + +def build_display_lines(queue_row, related_rows): + lines = [] + for row in sort_related_items(related_rows): + lines.append( + " - {purchase_date} | {line_total} | {raw_item_name} | {normalized_item_name} | " + "{upc} | {retailer}".format( + purchase_date=row.get("purchase_date", ""), + line_total=row.get("line_total", ""), + raw_item_name=row.get("raw_item_name", ""), + normalized_item_name=row.get("normalized_item_name", ""), + upc=row.get("upc", ""), + retailer=row.get("retailer", ""), + ) + ) + if row.get("image_url"): + lines.append(f" image: {row['image_url']}") + if not lines: + lines.append(" - no matched item rows found") + return lines + + +def prompt_resolution(queue_row, related_rows, catalog_rows, queue_index, queue_total): + suggestions = build_canonical_suggestions(related_rows, catalog_rows) click.echo("") + click.secho( + f"Review observed product ({queue_index}/{queue_total})", + fg=INFO_COLOR, + ) + click.echo( + "Resolve this observed product group to an existing canonical_name, " + "a new canonical_name, exclude it, or skip it." + ) click.echo(f"observed_product_id: {queue_row['observed_product_id']}") click.echo(f"retailer: {queue_row['retailer']}") - click.echo(f"raw names: {queue_row['raw_item_names']}") - click.echo(f"normalized names: {queue_row['normalized_names']}") - click.echo(f"upcs: {queue_row['upc_values']}") - click.echo(f"example prices: {queue_row['example_prices']}") - click.echo(f"seen count: {queue_row['seen_count']}") - click.echo("actions: [l]ink existing [n]ew canonical [x]exclude [s]kip [q]uit") - action = click.prompt("action", type=click.Choice(["l", "n", "x", "s", "q"])) + click.echo(f"observed_name(s): {queue_row['normalized_names']}") + click.echo(f"upc(s): {queue_row['upc_values']}") + click.echo(f"seen_count: {queue_row['seen_count']}") + click.secho("matched items:", fg=INFO_COLOR) + for line in build_display_lines(queue_row, related_rows): + click.echo(line) + if suggestions: + click.secho("suggested canonical_name values:", fg=INFO_COLOR) + for suggestion in suggestions: + click.echo( + f" - {suggestion['canonical_product_id']} | {suggestion['canonical_name']} " + f"({suggestion['reason']})" + ) + else: + click.secho("no deterministic canonical suggestions found", fg=WARNING_COLOR) + click.secho( + "actions: [l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit", + fg=PROMPT_COLOR, + ) + action = click.prompt( + click.style("action", fg=PROMPT_COLOR), + type=click.Choice(["l", "n", "x", "s", "q"]), + ) if action == "q": return None, None if action == "s": @@ -122,7 +244,11 @@ def prompt_resolution(queue_row, catalog_rows): "reviewed_at": str(date.today()), }, None if action == "x": - notes = click.prompt("exclude notes", default="", show_default=False) + notes = click.prompt( + click.style("exclude notes", fg=PROMPT_COLOR), + default="", + show_default=False, + ) return { "observed_product_id": queue_row["observed_product_id"], "canonical_product_id": "", @@ -132,11 +258,28 @@ def prompt_resolution(queue_row, catalog_rows): "reviewed_at": str(date.today()), }, None if action == "l": - click.echo("existing canonicals:") - for row in catalog_rows[:10]: - click.echo(f" {row['canonical_product_id']} {row['canonical_name']}") - canonical_product_id = click.prompt("canonical product id", type=str) - notes = click.prompt("link notes", default="", show_default=False) + display_rows = suggestions or [ + { + "canonical_product_id": row["canonical_product_id"], + "canonical_name": row["canonical_name"], + "reason": "catalog sample", + } + for row in catalog_rows[:10] + ] + click.secho("existing canonical_name values:", fg=INFO_COLOR) + for row in display_rows: + click.echo( + f" - {row['canonical_product_id']} | {row['canonical_name']} ({row['reason']})" + ) + canonical_product_id = click.prompt( + click.style("canonical product id", fg=PROMPT_COLOR), + type=str, + ) + notes = click.prompt( + click.style("link notes", fg=PROMPT_COLOR), + default="", + show_default=False, + ) return { "observed_product_id": queue_row["observed_product_id"], "canonical_product_id": canonical_product_id, @@ -146,10 +289,22 @@ def prompt_resolution(queue_row, catalog_rows): "reviewed_at": str(date.today()), }, None - canonical_name = click.prompt("canonical name", type=str) - category = click.prompt("category", default="", show_default=False) - product_type = click.prompt("product type", default="", show_default=False) - notes = click.prompt("notes", default="", show_default=False) + canonical_name = click.prompt(click.style("canonical name", fg=PROMPT_COLOR), type=str) + category = click.prompt( + click.style("category", fg=PROMPT_COLOR), + default="", + show_default=False, + ) + product_type = click.prompt( + click.style("product type", fg=PROMPT_COLOR), + default="", + show_default=False, + ) + notes = click.prompt( + click.style("notes", fg=PROMPT_COLOR), + default="", + show_default=False, + ) canonical_product_id = stable_id("gcan", f"manual|{canonical_name}|{category}|{product_type}") canonical_row = { "canonical_product_id": canonical_product_id, @@ -197,11 +352,17 @@ def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, limit, refresh_ resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows) catalog_by_id = {row["canonical_product_id"]: row for row in catalog_rows if row.get("canonical_product_id")} + rows_by_observed = defaultdict(list) + for row in purchase_rows: + observed_product_id = row.get("observed_product_id", "") + if observed_product_id: + rows_by_observed[observed_product_id].append(row) reviewed = 0 - for queue_row in queue_rows: + for index, queue_row in enumerate(queue_rows, start=1): if limit and reviewed >= limit: break - result = prompt_resolution(queue_row, catalog_rows) + related_rows = rows_by_observed.get(queue_row["observed_product_id"], []) + result = prompt_resolution(queue_row, related_rows, catalog_rows, index, len(queue_rows)) if result == (None, None): break resolution_row, canonical_row = result diff --git a/tests/test_purchases.py b/tests/test_purchases.py index 32c6fce..1b6cb08 100644 --- a/tests/test_purchases.py +++ b/tests/test_purchases.py @@ -41,6 +41,7 @@ class PurchaseLogTests(unittest.TestCase): "order_date": "2026-03-01", "item_name": "FRESH BANANA", "item_name_norm": "BANANA", + "image_url": "https://example.test/banana.jpg", "retailer_item_id": "100", "upc": "4011", "qty": "1", @@ -99,24 +100,18 @@ class PurchaseLogTests(unittest.TestCase): } ] -<<<<<<< HEAD rows, _observed, _canon, _links = build_purchases.build_purchase_rows( -======= - rows = build_purchases.build_purchase_rows( ->>>>>>> be1bf63 (Build pivot-ready purchase log) [giant_row], [costco_row], giant_orders, costco_orders, -<<<<<<< HEAD [], -======= ->>>>>>> be1bf63 (Build pivot-ready purchase log) ) self.assertEqual(2, len(rows)) self.assertTrue(all(row["canonical_product_id"] for row in rows)) self.assertEqual({"giant", "costco"}, {row["retailer"] for row in rows}) + self.assertEqual("https://example.test/banana.jpg", rows[0]["image_url"]) def test_main_writes_purchase_and_example_csvs(self): with tempfile.TemporaryDirectory() as tmpdir: @@ -124,11 +119,13 @@ class PurchaseLogTests(unittest.TestCase): costco_items = Path(tmpdir) / "costco_items.csv" giant_orders = Path(tmpdir) / "giant_orders.csv" costco_orders = Path(tmpdir) / "costco_orders.csv" + resolutions_csv = Path(tmpdir) / "review_resolutions.csv" + catalog_csv = Path(tmpdir) / "canonical_catalog.csv" + links_csv = Path(tmpdir) / "product_links.csv" purchases_csv = Path(tmpdir) / "combined" / "purchases.csv" examples_csv = Path(tmpdir) / "combined" / "comparison_examples.csv" fieldnames = enrich_costco.OUTPUT_FIELDS - rows = [] giant_row = {field: "" for field in fieldnames} giant_row.update( { @@ -178,7 +175,6 @@ class PurchaseLogTests(unittest.TestCase): "is_fee": "false", } ) - rows.extend([giant_row, costco_row]) for path, source_rows in [ (giant_items, [giant_row]), @@ -189,12 +185,35 @@ class PurchaseLogTests(unittest.TestCase): writer.writeheader() writer.writerows(source_rows) + order_fields = ["order_id", "store_name", "store_number", "store_city", "store_state"] for path, source_rows in [ - (giant_orders, [{"order_id": "g1", "store_name": "Giant", "store_number": "42", "store_city": "Springfield", "store_state": "VA"}]), - (costco_orders, [{"order_id": "c1", "store_name": "MT VERNON", "store_number": "1115", "store_city": "ALEXANDRIA", "store_state": "VA"}]), + ( + giant_orders, + [ + { + "order_id": "g1", + "store_name": "Giant", + "store_number": "42", + "store_city": "Springfield", + "store_state": "VA", + } + ], + ), + ( + costco_orders, + [ + { + "order_id": "c1", + "store_name": "MT VERNON", + "store_number": "1115", + "store_city": "ALEXANDRIA", + "store_state": "VA", + } + ], + ), ]: with path.open("w", newline="", encoding="utf-8") as handle: - writer = csv.DictWriter(handle, fieldnames=["order_id", "store_name", "store_number", "store_city", "store_state"]) + writer = csv.DictWriter(handle, fieldnames=order_fields) writer.writeheader() writer.writerows(source_rows) @@ -203,12 +222,9 @@ class PurchaseLogTests(unittest.TestCase): costco_items_enriched_csv=str(costco_items), giant_orders_csv=str(giant_orders), costco_orders_csv=str(costco_orders), -<<<<<<< HEAD - resolutions_csv=str(Path(tmpdir) / "review_resolutions.csv"), - catalog_csv=str(Path(tmpdir) / "canonical_catalog.csv"), - links_csv=str(Path(tmpdir) / "product_links.csv"), -======= ->>>>>>> be1bf63 (Build pivot-ready purchase log) + resolutions_csv=str(resolutions_csv), + catalog_csv=str(catalog_csv), + links_csv=str(links_csv), output_csv=str(purchases_csv), examples_csv=str(examples_csv), ) @@ -222,7 +238,6 @@ class PurchaseLogTests(unittest.TestCase): self.assertEqual(2, len(purchase_rows)) self.assertEqual(1, len(example_rows)) -<<<<<<< HEAD def test_build_purchase_rows_applies_manual_resolution(self): fieldnames = enrich_costco.OUTPUT_FIELDS giant_row = {field: "" for field in fieldnames} @@ -255,7 +270,15 @@ class PurchaseLogTests(unittest.TestCase): rows, _observed, _canon, _links = build_purchases.build_purchase_rows( [giant_row], [], - [{"order_id": "g1", "store_name": "Giant", "store_number": "42", "store_city": "Springfield", "store_state": "VA"}], + [ + { + "order_id": "g1", + "store_name": "Giant", + "store_number": "42", + "store_city": "Springfield", + "store_state": "VA", + } + ], [], [ { @@ -273,8 +296,6 @@ class PurchaseLogTests(unittest.TestCase): self.assertEqual("approved", rows[0]["review_status"]) self.assertEqual("create", rows[0]["resolution_action"]) -======= ->>>>>>> be1bf63 (Build pivot-ready purchase log) if __name__ == "__main__": unittest.main() diff --git a/tests/test_review_workflow.py b/tests/test_review_workflow.py index e1cb3d0..34ea5e3 100644 --- a/tests/test_review_workflow.py +++ b/tests/test_review_workflow.py @@ -4,6 +4,8 @@ import unittest from pathlib import Path from unittest import mock +from click.testing import CliRunner + import review_products @@ -37,6 +39,135 @@ class ReviewWorkflowTests(unittest.TestCase): self.assertEqual("gobs_1", queue_rows[0]["observed_product_id"]) self.assertIn("SB BAGGED ICE 20LB", queue_rows[0]["raw_item_names"]) + def test_build_canonical_suggestions_prefers_upc_then_name(self): + suggestions = review_products.build_canonical_suggestions( + [ + { + "normalized_item_name": "MIXED PEPPER", + "upc": "12345", + } + ], + [ + { + "canonical_product_id": "gcan_1", + "canonical_name": "MIXED PEPPER", + "upc": "", + }, + { + "canonical_product_id": "gcan_2", + "canonical_name": "MIXED PEPPER 6 PACK", + "upc": "12345", + }, + ], + ) + + self.assertEqual("gcan_2", suggestions[0]["canonical_product_id"]) + self.assertEqual("exact upc", suggestions[0]["reason"]) + self.assertEqual("gcan_1", suggestions[1]["canonical_product_id"]) + + def test_review_products_displays_position_items_and_suggestions(self): + with tempfile.TemporaryDirectory() as tmpdir: + purchases_csv = Path(tmpdir) / "purchases.csv" + queue_csv = Path(tmpdir) / "review_queue.csv" + resolutions_csv = Path(tmpdir) / "review_resolutions.csv" + catalog_csv = Path(tmpdir) / "canonical_catalog.csv" + + purchase_fields = [ + "purchase_date", + "retailer", + "order_id", + "line_no", + "observed_product_id", + "canonical_product_id", + "raw_item_name", + "normalized_item_name", + "image_url", + "upc", + "line_total", + ] + with purchases_csv.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=purchase_fields) + writer.writeheader() + writer.writerows( + [ + { + "purchase_date": "2026-03-14", + "retailer": "costco", + "order_id": "c2", + "line_no": "2", + "observed_product_id": "gobs_mix", + "canonical_product_id": "", + "raw_item_name": "MIXED PEPPER 6-PACK", + "normalized_item_name": "MIXED PEPPER", + "image_url": "", + "upc": "", + "line_total": "7.49", + }, + { + "purchase_date": "2026-03-12", + "retailer": "costco", + "order_id": "c1", + "line_no": "1", + "observed_product_id": "gobs_mix", + "canonical_product_id": "", + "raw_item_name": "MIXED PEPPER 6-PACK", + "normalized_item_name": "MIXED PEPPER", + "image_url": "https://example.test/mixed-pepper.jpg", + "upc": "", + "line_total": "6.99", + }, + ] + ) + + with catalog_csv.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) + writer.writeheader() + writer.writerow( + { + "canonical_product_id": "gcan_mix", + "canonical_name": "MIXED PEPPER", + "category": "produce", + "product_type": "pepper", + "brand": "", + "variant": "", + "size_value": "", + "size_unit": "", + "pack_qty": "", + "measure_type": "", + "notes": "", + "created_at": "", + "updated_at": "", + } + ) + + runner = CliRunner() + result = runner.invoke( + review_products.main, + [ + "--purchases-csv", + str(purchases_csv), + "--queue-csv", + str(queue_csv), + "--resolutions-csv", + str(resolutions_csv), + "--catalog-csv", + str(catalog_csv), + ], + input="q\n", + color=True, + ) + + self.assertEqual(0, result.exit_code) + self.assertIn("Review observed product (1/1)", result.output) + self.assertIn("Resolve this observed product group", result.output) + self.assertIn("actions: [l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit", result.output) + first_item = result.output.index("2026-03-14 | 7.49") + second_item = result.output.index("2026-03-12 | 6.99") + self.assertLess(first_item, second_item) + self.assertIn("image: https://example.test/mixed-pepper.jpg", result.output) + self.assertIn("gcan_mix | MIXED PEPPER (exact normalized name)", result.output) + self.assertIn("\x1b[", result.output) + def test_review_products_creates_canonical_and_resolution(self): with tempfile.TemporaryDirectory() as tmpdir: purchases_csv = Path(tmpdir) / "purchases.csv" @@ -48,25 +179,33 @@ class ReviewWorkflowTests(unittest.TestCase): writer = csv.DictWriter( handle, fieldnames=[ + "purchase_date", "observed_product_id", "canonical_product_id", "retailer", "raw_item_name", "normalized_item_name", + "image_url", "upc", "line_total", + "order_id", + "line_no", ], ) writer.writeheader() writer.writerow( { + "purchase_date": "2026-03-15", "observed_product_id": "gobs_ice", "canonical_product_id": "", "retailer": "giant", "raw_item_name": "SB BAGGED ICE 20LB", "normalized_item_name": "BAGGED ICE", + "image_url": "", "upc": "", "line_total": "3.50", + "order_id": "g1", + "line_no": "1", } )