From d39497c298b32e646bb82072456e3fc66929ab3a Mon Sep 17 00:00:00 2001 From: ben Date: Tue, 17 Mar 2026 13:25:12 -0400 Subject: [PATCH] Refine product review prompt flow --- pm/tasks.org | 41 ++++++-- review_products.py | 116 ++++++++++++++------- tests/test_review_workflow.py | 184 ++++++++++++++++++++++++++++++++-- 3 files changed, 288 insertions(+), 53 deletions(-) diff --git a/pm/tasks.org b/pm/tasks.org index 990eff5..8f7554e 100644 --- a/pm/tasks.org +++ b/pm/tasks.org @@ -371,29 +371,52 @@ Clearly show current state separate from proposed future state. ** acceptance criteria 1. Display position in review queue, e.g., (1/22) -2. Add short help text header to the review item explaining that the action resolves the current observed product group -3. color-code outputs based on info, prompt/menu, warning +2. Display compact header with observed_product under review, queue position, and canonical decision, e.g.: "Resolve [n] observed product group [name] and associated items to canonical_name [name]? (\n [n] matched items)" +3. color-code outputs based on info, input/prompt, warning/error 1. color action menu/requests for input differently from display text; do not color individual options separately + 2. "no canonical_name suggestions found" is informational, not a warning/error. 4. update action menu `[x]exclude` to `e[x]clude` 5. on each review item, display a list of all matched items to be linked, sorted by descending date: 1. YYYY-mm-dd, price, raw item name, normalized item name, upc, retailer 2. image URL, if exists + 3. Sample: 6. on each review item, suggest (but do not auto-apply) up to 3 likely existing canonicals using determinstic rules, e.g: 1. exact normalized name match 2. prefix/contains match on canonical name 3. exact UPC -- reinforce project terminology such as raw_name, observed_name, canonical_name - +7. Sample Entry: +#+begin_comment +Review 7/22: Resolve observed_product MIXED PEPPER to canonical_name [__]? +2 matched items: + [1] 2026-03-12 | 7.49 | MIXED PEPPER 6-PACK | MIXED PEPPER | [upc] | costco | [img_url] + [2] [YYYY-mm-dd] | [price] | [raw_name] | [observed_name] | [upc] | [retailer] | [img_url] +2 canonical suggestions found: + [1] BELL PEPPERS, PRODUCE + [2] PEPPER, SPICES +- reinforce project terminology such as raw_name, observed_name, canonical_name +#+end_comment +8. When link is selected, users should be able to select the number of the item in the list, e.g.: +#+begin_comment +Select the canonical_name to associate [n] items with: + [1] GRB GRADU PCH PUF1. | gcan_01b0d623aa02 + [2] BTB CHICKEN | gcan_0201f0feb749 + [3] LIME | gcan_02074d9e7359 +#+end_comment +9. Add confirmation to link selection with instructions, "[n] [observed_name] and future observed_name matches will be associated with [canonical_name], is this ok? + actions: [Y]es [n]o [b]ack [s]kip [q]uit ** evidence -- commit: -- tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python review_products.py --help`; verified review prompt now shows queue position, matched item history, image URLs when present, and deterministic canonical suggestions +- commit: `7b8141c`, `de74218` +- tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python -m unittest tests.test_review_workflow tests.test_purchases`; `./venv/bin/python review_products.py --help`; verified compact review header, numbered matched-item display, informational no-suggestion state, numbered canonical selection, and confirmation flow - date: 2026-03-17 ** notes -- The big win here was clarifying terminology in the prompt itself: the reviewer is resolving one observed product group to a canonical_name, not linking raw rows to each other. -- Showing the full matched-item list by descending date made the review context much more legible than the old compact summary fields. -- Deterministic suggestions help, but they are intentionally conservative and shallow; this improves reviewer speed without pretending to solve product matching automatically. +- The key improvement was shifting the prompt from system metadata to reviewer intent: one observed_product, its matched retailer rows, and one canonical_name decision. +- Numbered canonical selection plus confirmation worked better than free-text id entry and should reduce accidental links. +- Deterministic suggestions remain intentionally conservative; they speed up common cases, but unresolved items still depend on human review by design. + +- resolve observed product group (group id) + to canonical name: * [ ] t1.10: add optional llm-assisted suggestion workflow for unresolved products (2-4 commits) ** acceptance criteria diff --git a/review_products.py b/review_products.py index bd41fec..a2c6956 100644 --- a/review_products.py +++ b/review_products.py @@ -177,10 +177,11 @@ def build_canonical_suggestions(related_rows, catalog_rows, limit=3): def build_display_lines(queue_row, related_rows): lines = [] - for row in sort_related_items(related_rows): + for index, row in enumerate(sort_related_items(related_rows), start=1): lines.append( - " - {purchase_date} | {line_total} | {raw_item_name} | {normalized_item_name} | " + " [{index}] {purchase_date} | {line_total} | {raw_item_name} | {normalized_item_name} | " "{upc} | {retailer}".format( + index=index, purchase_date=row.get("purchase_date", ""), line_total=row.get("line_total", ""), raw_item_name=row.get("raw_item_name", ""), @@ -190,47 +191,81 @@ def build_display_lines(queue_row, related_rows): ) ) if row.get("image_url"): - lines.append(f" image: {row['image_url']}") + lines.append(f" {row['image_url']}") if not lines: - lines.append(" - no matched item rows found") + lines.append(" [1] no matched item rows found") return lines +def observed_name(queue_row, related_rows): + if queue_row.get("normalized_names"): + return queue_row["normalized_names"].split(" | ")[0] + for row in related_rows: + if row.get("normalized_item_name"): + return row["normalized_item_name"] + return queue_row.get("observed_product_id", "") + + +def choose_existing_canonical(display_rows, observed_label, matched_count): + click.secho( + f"Select the canonical_name to associate {matched_count} items with:", + fg=INFO_COLOR, + ) + for index, row in enumerate(display_rows, start=1): + click.echo(f" [{index}] {row['canonical_name']} | {row['canonical_product_id']}") + choice = click.prompt( + click.style("selection", fg=PROMPT_COLOR), + type=click.IntRange(1, len(display_rows)), + ) + chosen_row = display_rows[choice - 1] + click.echo( + f'{matched_count} "{observed_label}" items and future matches will be associated ' + f'with "{chosen_row["canonical_name"]}".' + ) + click.secho( + "actions: [y]es [n]o [b]ack [s]kip [q]uit", + fg=PROMPT_COLOR, + ) + confirm = click.prompt( + click.style("confirm", fg=PROMPT_COLOR), + type=click.Choice(["y", "n", "b", "s", "q"]), + ) + if confirm == "y": + return chosen_row["canonical_product_id"], "" + if confirm == "s": + return "", "skip" + if confirm == "q": + return "", "quit" + return "", "back" + + def prompt_resolution(queue_row, related_rows, catalog_rows, queue_index, queue_total): suggestions = build_canonical_suggestions(related_rows, catalog_rows) + observed_label = observed_name(queue_row, related_rows) + matched_count = len(related_rows) click.echo("") click.secho( - f"Review observed product ({queue_index}/{queue_total})", + f"Review {queue_index}/{queue_total}: Resolve observed_product {observed_label} " + "to canonical_name [__]?", fg=INFO_COLOR, ) - click.echo( - "Resolve this observed product group to an existing canonical_name, " - "a new canonical_name, exclude it, or skip it." - ) - click.echo(f"observed_product_id: {queue_row['observed_product_id']}") - click.echo(f"retailer: {queue_row['retailer']}") - click.echo(f"observed_name(s): {queue_row['normalized_names']}") - click.echo(f"upc(s): {queue_row['upc_values']}") - click.echo(f"seen_count: {queue_row['seen_count']}") - click.secho("matched items:", fg=INFO_COLOR) + click.echo(f"{matched_count} matched items:") for line in build_display_lines(queue_row, related_rows): click.echo(line) if suggestions: - click.secho("suggested canonical_name values:", fg=INFO_COLOR) - for suggestion in suggestions: - click.echo( - f" - {suggestion['canonical_product_id']} | {suggestion['canonical_name']} " - f"({suggestion['reason']})" - ) + click.echo(f"{len(suggestions)} canonical suggestions found:") + for index, suggestion in enumerate(suggestions, start=1): + click.echo(f" [{index}] {suggestion['canonical_name']}") else: - click.secho("no deterministic canonical suggestions found", fg=WARNING_COLOR) + click.echo("no canonical_name suggestions found") click.secho( - "actions: [l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit", + "[l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit:", fg=PROMPT_COLOR, ) action = click.prompt( - click.style("action", fg=PROMPT_COLOR), + "", type=click.Choice(["l", "n", "x", "s", "q"]), + prompt_suffix=" ", ) if action == "q": return None, None @@ -266,20 +301,27 @@ def prompt_resolution(queue_row, related_rows, catalog_rows, queue_index, queue_ } for row in catalog_rows[:10] ] - click.secho("existing canonical_name values:", fg=INFO_COLOR) - for row in display_rows: - click.echo( - f" - {row['canonical_product_id']} | {row['canonical_name']} ({row['reason']})" + while True: + canonical_product_id, outcome = choose_existing_canonical( + display_rows, + observed_label, + matched_count, ) - canonical_product_id = click.prompt( - click.style("canonical product id", fg=PROMPT_COLOR), - type=str, - ) - notes = click.prompt( - click.style("link notes", fg=PROMPT_COLOR), - default="", - show_default=False, - ) + if outcome == "skip": + return { + "observed_product_id": queue_row["observed_product_id"], + "canonical_product_id": "", + "resolution_action": "skip", + "status": "pending", + "resolution_notes": queue_row.get("resolution_notes", ""), + "reviewed_at": str(date.today()), + }, None + if outcome == "quit": + return None, None + if outcome == "back": + continue + break + notes = click.prompt(click.style("link notes", fg=PROMPT_COLOR), default="", show_default=False) return { "observed_product_id": queue_row["observed_product_id"], "canonical_product_id": canonical_product_id, diff --git a/tests/test_review_workflow.py b/tests/test_review_workflow.py index 34ea5e3..5749338 100644 --- a/tests/test_review_workflow.py +++ b/tests/test_review_workflow.py @@ -158,16 +158,186 @@ class ReviewWorkflowTests(unittest.TestCase): ) self.assertEqual(0, result.exit_code) - self.assertIn("Review observed product (1/1)", result.output) - self.assertIn("Resolve this observed product group", result.output) - self.assertIn("actions: [l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit", result.output) - first_item = result.output.index("2026-03-14 | 7.49") - second_item = result.output.index("2026-03-12 | 6.99") + self.assertIn("Review 1/1: Resolve observed_product MIXED PEPPER to canonical_name [__]?", result.output) + self.assertIn("2 matched items:", result.output) + self.assertIn("[l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit:", result.output) + first_item = result.output.index("[1] 2026-03-14 | 7.49") + second_item = result.output.index("[2] 2026-03-12 | 6.99") self.assertLess(first_item, second_item) - self.assertIn("image: https://example.test/mixed-pepper.jpg", result.output) - self.assertIn("gcan_mix | MIXED PEPPER (exact normalized name)", result.output) + self.assertIn("https://example.test/mixed-pepper.jpg", result.output) + self.assertIn("1 canonical suggestions found:", result.output) + self.assertIn("[1] MIXED PEPPER", result.output) self.assertIn("\x1b[", result.output) + def test_review_products_no_suggestions_is_informational(self): + with tempfile.TemporaryDirectory() as tmpdir: + purchases_csv = Path(tmpdir) / "purchases.csv" + queue_csv = Path(tmpdir) / "review_queue.csv" + resolutions_csv = Path(tmpdir) / "review_resolutions.csv" + catalog_csv = Path(tmpdir) / "canonical_catalog.csv" + + with purchases_csv.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter( + handle, + fieldnames=[ + "purchase_date", + "retailer", + "order_id", + "line_no", + "observed_product_id", + "canonical_product_id", + "raw_item_name", + "normalized_item_name", + "image_url", + "upc", + "line_total", + ], + ) + writer.writeheader() + writer.writerow( + { + "purchase_date": "2026-03-14", + "retailer": "giant", + "order_id": "g1", + "line_no": "1", + "observed_product_id": "gobs_ice", + "canonical_product_id": "", + "raw_item_name": "SB BAGGED ICE 20LB", + "normalized_item_name": "BAGGED ICE", + "image_url": "", + "upc": "", + "line_total": "3.50", + } + ) + + with catalog_csv.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) + writer.writeheader() + + result = CliRunner().invoke( + review_products.main, + [ + "--purchases-csv", + str(purchases_csv), + "--queue-csv", + str(queue_csv), + "--resolutions-csv", + str(resolutions_csv), + "--catalog-csv", + str(catalog_csv), + ], + input="q\n", + color=True, + ) + + self.assertEqual(0, result.exit_code) + self.assertIn("no canonical_name suggestions found", result.output) + + def test_link_existing_uses_numbered_selection_and_confirmation(self): + with tempfile.TemporaryDirectory() as tmpdir: + purchases_csv = Path(tmpdir) / "purchases.csv" + queue_csv = Path(tmpdir) / "review_queue.csv" + resolutions_csv = Path(tmpdir) / "review_resolutions.csv" + catalog_csv = Path(tmpdir) / "canonical_catalog.csv" + + with purchases_csv.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter( + handle, + fieldnames=[ + "purchase_date", + "retailer", + "order_id", + "line_no", + "observed_product_id", + "canonical_product_id", + "raw_item_name", + "normalized_item_name", + "image_url", + "upc", + "line_total", + ], + ) + writer.writeheader() + writer.writerows( + [ + { + "purchase_date": "2026-03-14", + "retailer": "costco", + "order_id": "c2", + "line_no": "2", + "observed_product_id": "gobs_mix", + "canonical_product_id": "", + "raw_item_name": "MIXED PEPPER 6-PACK", + "normalized_item_name": "MIXED PEPPER", + "image_url": "", + "upc": "", + "line_total": "7.49", + }, + { + "purchase_date": "2026-03-12", + "retailer": "costco", + "order_id": "c1", + "line_no": "1", + "observed_product_id": "gobs_mix", + "canonical_product_id": "", + "raw_item_name": "MIXED PEPPER 6-PACK", + "normalized_item_name": "MIXED PEPPER", + "image_url": "", + "upc": "", + "line_total": "6.99", + }, + ] + ) + + with catalog_csv.open("w", newline="", encoding="utf-8") as handle: + writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS) + writer.writeheader() + writer.writerow( + { + "canonical_product_id": "gcan_mix", + "canonical_name": "MIXED PEPPER", + "category": "", + "product_type": "", + "brand": "", + "variant": "", + "size_value": "", + "size_unit": "", + "pack_qty": "", + "measure_type": "", + "notes": "", + "created_at": "", + "updated_at": "", + } + ) + + result = CliRunner().invoke( + review_products.main, + [ + "--purchases-csv", + str(purchases_csv), + "--queue-csv", + str(queue_csv), + "--resolutions-csv", + str(resolutions_csv), + "--catalog-csv", + str(catalog_csv), + "--limit", + "1", + ], + input="l\n1\ny\nlinked by test\n", + color=True, + ) + + self.assertEqual(0, result.exit_code) + self.assertIn("Select the canonical_name to associate 2 items with:", result.output) + self.assertIn('[1] MIXED PEPPER | gcan_mix', result.output) + self.assertIn('2 "MIXED PEPPER" items and future matches will be associated with "MIXED PEPPER".', result.output) + self.assertIn("actions: [y]es [n]o [b]ack [s]kip [q]uit", result.output) + with resolutions_csv.open(newline="", encoding="utf-8") as handle: + rows = list(csv.DictReader(handle)) + self.assertEqual("gcan_mix", rows[0]["canonical_product_id"]) + self.assertEqual("link", rows[0]["resolution_action"]) + def test_review_products_creates_canonical_and_resolution(self): with tempfile.TemporaryDirectory() as tmpdir: purchases_csv = Path(tmpdir) / "purchases.csv"