Refine product review prompt flow

This commit is contained in:
ben
2026-03-17 13:25:12 -04:00
parent 7b8141cd42
commit d39497c298
3 changed files with 288 additions and 53 deletions

View File

@@ -371,29 +371,52 @@
Clearly show current state separate from proposed future state.
** acceptance criteria
1. Display position in review queue, e.g., (1/22)
2. Add short help text header to the review item explaining that the action resolves the current observed product group
3. color-code outputs based on info, prompt/menu, warning
2. Display compact header with observed_product under review, queue position, and canonical decision, e.g.: "Resolve [n] observed product group [name] and associated items to canonical_name [name]? (\n [n] matched items)"
3. color-code outputs based on info, input/prompt, warning/error
1. color action menu/requests for input differently from display text; do not color individual options separately
2. "no canonical_name suggestions found" is informational, not a warning/error.
4. update action menu `[x]exclude` to `e[x]clude`
5. on each review item, display a list of all matched items to be linked, sorted by descending date:
1. YYYY-mm-dd, price, raw item name, normalized item name, upc, retailer
2. image URL, if exists
3. Sample:
6. on each review item, suggest (but do not auto-apply) up to 3 likely existing canonicals using determinstic rules, e.g:
1. exact normalized name match
2. prefix/contains match on canonical name
3. exact UPC
- reinforce project terminology such as raw_name, observed_name, canonical_name
7. Sample Entry:
#+begin_comment
Review 7/22: Resolve observed_product MIXED PEPPER to canonical_name [__]?
2 matched items:
[1] 2026-03-12 | 7.49 | MIXED PEPPER 6-PACK | MIXED PEPPER | [upc] | costco | [img_url]
[2] [YYYY-mm-dd] | [price] | [raw_name] | [observed_name] | [upc] | [retailer] | [img_url]
2 canonical suggestions found:
[1] BELL PEPPERS, PRODUCE
[2] PEPPER, SPICES
- reinforce project terminology such as raw_name, observed_name, canonical_name
#+end_comment
8. When link is selected, users should be able to select the number of the item in the list, e.g.:
#+begin_comment
Select the canonical_name to associate [n] items with:
[1] GRB GRADU PCH PUF1. | gcan_01b0d623aa02
[2] BTB CHICKEN | gcan_0201f0feb749
[3] LIME | gcan_02074d9e7359
#+end_comment
9. Add confirmation to link selection with instructions, "[n] [observed_name] and future observed_name matches will be associated with [canonical_name], is this ok?
actions: [Y]es [n]o [b]ack [s]kip [q]uit
** evidence
- commit:
- tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python review_products.py --help`; verified review prompt now shows queue position, matched item history, image URLs when present, and deterministic canonical suggestions
- commit: `7b8141c`, `de74218`
- tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python -m unittest tests.test_review_workflow tests.test_purchases`; `./venv/bin/python review_products.py --help`; verified compact review header, numbered matched-item display, informational no-suggestion state, numbered canonical selection, and confirmation flow
- date: 2026-03-17
** notes
- The big win here was clarifying terminology in the prompt itself: the reviewer is resolving one observed product group to a canonical_name, not linking raw rows to each other.
- Showing the full matched-item list by descending date made the review context much more legible than the old compact summary fields.
- Deterministic suggestions help, but they are intentionally conservative and shallow; this improves reviewer speed without pretending to solve product matching automatically.
- The key improvement was shifting the prompt from system metadata to reviewer intent: one observed_product, its matched retailer rows, and one canonical_name decision.
- Numbered canonical selection plus confirmation worked better than free-text id entry and should reduce accidental links.
- Deterministic suggestions remain intentionally conservative; they speed up common cases, but unresolved items still depend on human review by design.
- resolve observed product group (group id)
to canonical name:
* [ ] t1.10: add optional llm-assisted suggestion workflow for unresolved products (2-4 commits)
** acceptance criteria

View File

@@ -177,10 +177,11 @@ def build_canonical_suggestions(related_rows, catalog_rows, limit=3):
def build_display_lines(queue_row, related_rows):
lines = []
for row in sort_related_items(related_rows):
for index, row in enumerate(sort_related_items(related_rows), start=1):
lines.append(
" - {purchase_date} | {line_total} | {raw_item_name} | {normalized_item_name} | "
" [{index}] {purchase_date} | {line_total} | {raw_item_name} | {normalized_item_name} | "
"{upc} | {retailer}".format(
index=index,
purchase_date=row.get("purchase_date", ""),
line_total=row.get("line_total", ""),
raw_item_name=row.get("raw_item_name", ""),
@@ -190,47 +191,81 @@ def build_display_lines(queue_row, related_rows):
)
)
if row.get("image_url"):
lines.append(f" image: {row['image_url']}")
lines.append(f" {row['image_url']}")
if not lines:
lines.append(" - no matched item rows found")
lines.append(" [1] no matched item rows found")
return lines
def observed_name(queue_row, related_rows):
if queue_row.get("normalized_names"):
return queue_row["normalized_names"].split(" | ")[0]
for row in related_rows:
if row.get("normalized_item_name"):
return row["normalized_item_name"]
return queue_row.get("observed_product_id", "")
def choose_existing_canonical(display_rows, observed_label, matched_count):
click.secho(
f"Select the canonical_name to associate {matched_count} items with:",
fg=INFO_COLOR,
)
for index, row in enumerate(display_rows, start=1):
click.echo(f" [{index}] {row['canonical_name']} | {row['canonical_product_id']}")
choice = click.prompt(
click.style("selection", fg=PROMPT_COLOR),
type=click.IntRange(1, len(display_rows)),
)
chosen_row = display_rows[choice - 1]
click.echo(
f'{matched_count} "{observed_label}" items and future matches will be associated '
f'with "{chosen_row["canonical_name"]}".'
)
click.secho(
"actions: [y]es [n]o [b]ack [s]kip [q]uit",
fg=PROMPT_COLOR,
)
confirm = click.prompt(
click.style("confirm", fg=PROMPT_COLOR),
type=click.Choice(["y", "n", "b", "s", "q"]),
)
if confirm == "y":
return chosen_row["canonical_product_id"], ""
if confirm == "s":
return "", "skip"
if confirm == "q":
return "", "quit"
return "", "back"
def prompt_resolution(queue_row, related_rows, catalog_rows, queue_index, queue_total):
suggestions = build_canonical_suggestions(related_rows, catalog_rows)
observed_label = observed_name(queue_row, related_rows)
matched_count = len(related_rows)
click.echo("")
click.secho(
f"Review observed product ({queue_index}/{queue_total})",
f"Review {queue_index}/{queue_total}: Resolve observed_product {observed_label} "
"to canonical_name [__]?",
fg=INFO_COLOR,
)
click.echo(
"Resolve this observed product group to an existing canonical_name, "
"a new canonical_name, exclude it, or skip it."
)
click.echo(f"observed_product_id: {queue_row['observed_product_id']}")
click.echo(f"retailer: {queue_row['retailer']}")
click.echo(f"observed_name(s): {queue_row['normalized_names']}")
click.echo(f"upc(s): {queue_row['upc_values']}")
click.echo(f"seen_count: {queue_row['seen_count']}")
click.secho("matched items:", fg=INFO_COLOR)
click.echo(f"{matched_count} matched items:")
for line in build_display_lines(queue_row, related_rows):
click.echo(line)
if suggestions:
click.secho("suggested canonical_name values:", fg=INFO_COLOR)
for suggestion in suggestions:
click.echo(
f" - {suggestion['canonical_product_id']} | {suggestion['canonical_name']} "
f"({suggestion['reason']})"
)
click.echo(f"{len(suggestions)} canonical suggestions found:")
for index, suggestion in enumerate(suggestions, start=1):
click.echo(f" [{index}] {suggestion['canonical_name']}")
else:
click.secho("no deterministic canonical suggestions found", fg=WARNING_COLOR)
click.echo("no canonical_name suggestions found")
click.secho(
"actions: [l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit",
"[l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit:",
fg=PROMPT_COLOR,
)
action = click.prompt(
click.style("action", fg=PROMPT_COLOR),
"",
type=click.Choice(["l", "n", "x", "s", "q"]),
prompt_suffix=" ",
)
if action == "q":
return None, None
@@ -266,20 +301,27 @@ def prompt_resolution(queue_row, related_rows, catalog_rows, queue_index, queue_
}
for row in catalog_rows[:10]
]
click.secho("existing canonical_name values:", fg=INFO_COLOR)
for row in display_rows:
click.echo(
f" - {row['canonical_product_id']} | {row['canonical_name']} ({row['reason']})"
while True:
canonical_product_id, outcome = choose_existing_canonical(
display_rows,
observed_label,
matched_count,
)
canonical_product_id = click.prompt(
click.style("canonical product id", fg=PROMPT_COLOR),
type=str,
)
notes = click.prompt(
click.style("link notes", fg=PROMPT_COLOR),
default="",
show_default=False,
)
if outcome == "skip":
return {
"observed_product_id": queue_row["observed_product_id"],
"canonical_product_id": "",
"resolution_action": "skip",
"status": "pending",
"resolution_notes": queue_row.get("resolution_notes", ""),
"reviewed_at": str(date.today()),
}, None
if outcome == "quit":
return None, None
if outcome == "back":
continue
break
notes = click.prompt(click.style("link notes", fg=PROMPT_COLOR), default="", show_default=False)
return {
"observed_product_id": queue_row["observed_product_id"],
"canonical_product_id": canonical_product_id,

View File

@@ -158,16 +158,186 @@ class ReviewWorkflowTests(unittest.TestCase):
)
self.assertEqual(0, result.exit_code)
self.assertIn("Review observed product (1/1)", result.output)
self.assertIn("Resolve this observed product group", result.output)
self.assertIn("actions: [l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit", result.output)
first_item = result.output.index("2026-03-14 | 7.49")
second_item = result.output.index("2026-03-12 | 6.99")
self.assertIn("Review 1/1: Resolve observed_product MIXED PEPPER to canonical_name [__]?", result.output)
self.assertIn("2 matched items:", result.output)
self.assertIn("[l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit:", result.output)
first_item = result.output.index("[1] 2026-03-14 | 7.49")
second_item = result.output.index("[2] 2026-03-12 | 6.99")
self.assertLess(first_item, second_item)
self.assertIn("image: https://example.test/mixed-pepper.jpg", result.output)
self.assertIn("gcan_mix | MIXED PEPPER (exact normalized name)", result.output)
self.assertIn("https://example.test/mixed-pepper.jpg", result.output)
self.assertIn("1 canonical suggestions found:", result.output)
self.assertIn("[1] MIXED PEPPER", result.output)
self.assertIn("\x1b[", result.output)
def test_review_products_no_suggestions_is_informational(self):
with tempfile.TemporaryDirectory() as tmpdir:
purchases_csv = Path(tmpdir) / "purchases.csv"
queue_csv = Path(tmpdir) / "review_queue.csv"
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(
handle,
fieldnames=[
"purchase_date",
"retailer",
"order_id",
"line_no",
"observed_product_id",
"canonical_product_id",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
],
)
writer.writeheader()
writer.writerow(
{
"purchase_date": "2026-03-14",
"retailer": "giant",
"order_id": "g1",
"line_no": "1",
"observed_product_id": "gobs_ice",
"canonical_product_id": "",
"raw_item_name": "SB BAGGED ICE 20LB",
"normalized_item_name": "BAGGED ICE",
"image_url": "",
"upc": "",
"line_total": "3.50",
}
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
writer.writeheader()
result = CliRunner().invoke(
review_products.main,
[
"--purchases-csv",
str(purchases_csv),
"--queue-csv",
str(queue_csv),
"--resolutions-csv",
str(resolutions_csv),
"--catalog-csv",
str(catalog_csv),
],
input="q\n",
color=True,
)
self.assertEqual(0, result.exit_code)
self.assertIn("no canonical_name suggestions found", result.output)
def test_link_existing_uses_numbered_selection_and_confirmation(self):
with tempfile.TemporaryDirectory() as tmpdir:
purchases_csv = Path(tmpdir) / "purchases.csv"
queue_csv = Path(tmpdir) / "review_queue.csv"
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(
handle,
fieldnames=[
"purchase_date",
"retailer",
"order_id",
"line_no",
"observed_product_id",
"canonical_product_id",
"raw_item_name",
"normalized_item_name",
"image_url",
"upc",
"line_total",
],
)
writer.writeheader()
writer.writerows(
[
{
"purchase_date": "2026-03-14",
"retailer": "costco",
"order_id": "c2",
"line_no": "2",
"observed_product_id": "gobs_mix",
"canonical_product_id": "",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "7.49",
},
{
"purchase_date": "2026-03-12",
"retailer": "costco",
"order_id": "c1",
"line_no": "1",
"observed_product_id": "gobs_mix",
"canonical_product_id": "",
"raw_item_name": "MIXED PEPPER 6-PACK",
"normalized_item_name": "MIXED PEPPER",
"image_url": "",
"upc": "",
"line_total": "6.99",
},
]
)
with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
writer.writeheader()
writer.writerow(
{
"canonical_product_id": "gcan_mix",
"canonical_name": "MIXED PEPPER",
"category": "",
"product_type": "",
"brand": "",
"variant": "",
"size_value": "",
"size_unit": "",
"pack_qty": "",
"measure_type": "",
"notes": "",
"created_at": "",
"updated_at": "",
}
)
result = CliRunner().invoke(
review_products.main,
[
"--purchases-csv",
str(purchases_csv),
"--queue-csv",
str(queue_csv),
"--resolutions-csv",
str(resolutions_csv),
"--catalog-csv",
str(catalog_csv),
"--limit",
"1",
],
input="l\n1\ny\nlinked by test\n",
color=True,
)
self.assertEqual(0, result.exit_code)
self.assertIn("Select the canonical_name to associate 2 items with:", result.output)
self.assertIn('[1] MIXED PEPPER | gcan_mix', result.output)
self.assertIn('2 "MIXED PEPPER" items and future matches will be associated with "MIXED PEPPER".', result.output)
self.assertIn("actions: [y]es [n]o [b]ack [s]kip [q]uit", result.output)
with resolutions_csv.open(newline="", encoding="utf-8") as handle:
rows = list(csv.DictReader(handle))
self.assertEqual("gcan_mix", rows[0]["canonical_product_id"])
self.assertEqual("link", rows[0]["resolution_action"])
def test_review_products_creates_canonical_and_resolution(self):
with tempfile.TemporaryDirectory() as tmpdir:
purchases_csv = Path(tmpdir) / "purchases.csv"