Refine product review prompt flow
This commit is contained in:
41
pm/tasks.org
41
pm/tasks.org
@@ -371,29 +371,52 @@
|
||||
Clearly show current state separate from proposed future state.
|
||||
** acceptance criteria
|
||||
1. Display position in review queue, e.g., (1/22)
|
||||
2. Add short help text header to the review item explaining that the action resolves the current observed product group
|
||||
3. color-code outputs based on info, prompt/menu, warning
|
||||
2. Display compact header with observed_product under review, queue position, and canonical decision, e.g.: "Resolve [n] observed product group [name] and associated items to canonical_name [name]? (\n [n] matched items)"
|
||||
3. color-code outputs based on info, input/prompt, warning/error
|
||||
1. color action menu/requests for input differently from display text; do not color individual options separately
|
||||
2. "no canonical_name suggestions found" is informational, not a warning/error.
|
||||
4. update action menu `[x]exclude` to `e[x]clude`
|
||||
5. on each review item, display a list of all matched items to be linked, sorted by descending date:
|
||||
1. YYYY-mm-dd, price, raw item name, normalized item name, upc, retailer
|
||||
2. image URL, if exists
|
||||
3. Sample:
|
||||
6. on each review item, suggest (but do not auto-apply) up to 3 likely existing canonicals using determinstic rules, e.g:
|
||||
1. exact normalized name match
|
||||
2. prefix/contains match on canonical name
|
||||
3. exact UPC
|
||||
- reinforce project terminology such as raw_name, observed_name, canonical_name
|
||||
|
||||
7. Sample Entry:
|
||||
#+begin_comment
|
||||
Review 7/22: Resolve observed_product MIXED PEPPER to canonical_name [__]?
|
||||
2 matched items:
|
||||
[1] 2026-03-12 | 7.49 | MIXED PEPPER 6-PACK | MIXED PEPPER | [upc] | costco | [img_url]
|
||||
[2] [YYYY-mm-dd] | [price] | [raw_name] | [observed_name] | [upc] | [retailer] | [img_url]
|
||||
2 canonical suggestions found:
|
||||
[1] BELL PEPPERS, PRODUCE
|
||||
[2] PEPPER, SPICES
|
||||
- reinforce project terminology such as raw_name, observed_name, canonical_name
|
||||
#+end_comment
|
||||
8. When link is selected, users should be able to select the number of the item in the list, e.g.:
|
||||
#+begin_comment
|
||||
Select the canonical_name to associate [n] items with:
|
||||
[1] GRB GRADU PCH PUF1. | gcan_01b0d623aa02
|
||||
[2] BTB CHICKEN | gcan_0201f0feb749
|
||||
[3] LIME | gcan_02074d9e7359
|
||||
#+end_comment
|
||||
9. Add confirmation to link selection with instructions, "[n] [observed_name] and future observed_name matches will be associated with [canonical_name], is this ok?
|
||||
actions: [Y]es [n]o [b]ack [s]kip [q]uit
|
||||
** evidence
|
||||
- commit:
|
||||
- tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python review_products.py --help`; verified review prompt now shows queue position, matched item history, image URLs when present, and deterministic canonical suggestions
|
||||
- commit: `7b8141c`, `de74218`
|
||||
- tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python -m unittest tests.test_review_workflow tests.test_purchases`; `./venv/bin/python review_products.py --help`; verified compact review header, numbered matched-item display, informational no-suggestion state, numbered canonical selection, and confirmation flow
|
||||
- date: 2026-03-17
|
||||
|
||||
** notes
|
||||
- The big win here was clarifying terminology in the prompt itself: the reviewer is resolving one observed product group to a canonical_name, not linking raw rows to each other.
|
||||
- Showing the full matched-item list by descending date made the review context much more legible than the old compact summary fields.
|
||||
- Deterministic suggestions help, but they are intentionally conservative and shallow; this improves reviewer speed without pretending to solve product matching automatically.
|
||||
- The key improvement was shifting the prompt from system metadata to reviewer intent: one observed_product, its matched retailer rows, and one canonical_name decision.
|
||||
- Numbered canonical selection plus confirmation worked better than free-text id entry and should reduce accidental links.
|
||||
- Deterministic suggestions remain intentionally conservative; they speed up common cases, but unresolved items still depend on human review by design.
|
||||
|
||||
|
||||
- resolve observed product group (group id)
|
||||
to canonical name:
|
||||
* [ ] t1.10: add optional llm-assisted suggestion workflow for unresolved products (2-4 commits)
|
||||
|
||||
** acceptance criteria
|
||||
|
||||
@@ -177,10 +177,11 @@ def build_canonical_suggestions(related_rows, catalog_rows, limit=3):
|
||||
|
||||
def build_display_lines(queue_row, related_rows):
|
||||
lines = []
|
||||
for row in sort_related_items(related_rows):
|
||||
for index, row in enumerate(sort_related_items(related_rows), start=1):
|
||||
lines.append(
|
||||
" - {purchase_date} | {line_total} | {raw_item_name} | {normalized_item_name} | "
|
||||
" [{index}] {purchase_date} | {line_total} | {raw_item_name} | {normalized_item_name} | "
|
||||
"{upc} | {retailer}".format(
|
||||
index=index,
|
||||
purchase_date=row.get("purchase_date", ""),
|
||||
line_total=row.get("line_total", ""),
|
||||
raw_item_name=row.get("raw_item_name", ""),
|
||||
@@ -190,47 +191,81 @@ def build_display_lines(queue_row, related_rows):
|
||||
)
|
||||
)
|
||||
if row.get("image_url"):
|
||||
lines.append(f" image: {row['image_url']}")
|
||||
lines.append(f" {row['image_url']}")
|
||||
if not lines:
|
||||
lines.append(" - no matched item rows found")
|
||||
lines.append(" [1] no matched item rows found")
|
||||
return lines
|
||||
|
||||
|
||||
def observed_name(queue_row, related_rows):
|
||||
if queue_row.get("normalized_names"):
|
||||
return queue_row["normalized_names"].split(" | ")[0]
|
||||
for row in related_rows:
|
||||
if row.get("normalized_item_name"):
|
||||
return row["normalized_item_name"]
|
||||
return queue_row.get("observed_product_id", "")
|
||||
|
||||
|
||||
def choose_existing_canonical(display_rows, observed_label, matched_count):
|
||||
click.secho(
|
||||
f"Select the canonical_name to associate {matched_count} items with:",
|
||||
fg=INFO_COLOR,
|
||||
)
|
||||
for index, row in enumerate(display_rows, start=1):
|
||||
click.echo(f" [{index}] {row['canonical_name']} | {row['canonical_product_id']}")
|
||||
choice = click.prompt(
|
||||
click.style("selection", fg=PROMPT_COLOR),
|
||||
type=click.IntRange(1, len(display_rows)),
|
||||
)
|
||||
chosen_row = display_rows[choice - 1]
|
||||
click.echo(
|
||||
f'{matched_count} "{observed_label}" items and future matches will be associated '
|
||||
f'with "{chosen_row["canonical_name"]}".'
|
||||
)
|
||||
click.secho(
|
||||
"actions: [y]es [n]o [b]ack [s]kip [q]uit",
|
||||
fg=PROMPT_COLOR,
|
||||
)
|
||||
confirm = click.prompt(
|
||||
click.style("confirm", fg=PROMPT_COLOR),
|
||||
type=click.Choice(["y", "n", "b", "s", "q"]),
|
||||
)
|
||||
if confirm == "y":
|
||||
return chosen_row["canonical_product_id"], ""
|
||||
if confirm == "s":
|
||||
return "", "skip"
|
||||
if confirm == "q":
|
||||
return "", "quit"
|
||||
return "", "back"
|
||||
|
||||
|
||||
def prompt_resolution(queue_row, related_rows, catalog_rows, queue_index, queue_total):
|
||||
suggestions = build_canonical_suggestions(related_rows, catalog_rows)
|
||||
observed_label = observed_name(queue_row, related_rows)
|
||||
matched_count = len(related_rows)
|
||||
click.echo("")
|
||||
click.secho(
|
||||
f"Review observed product ({queue_index}/{queue_total})",
|
||||
f"Review {queue_index}/{queue_total}: Resolve observed_product {observed_label} "
|
||||
"to canonical_name [__]?",
|
||||
fg=INFO_COLOR,
|
||||
)
|
||||
click.echo(
|
||||
"Resolve this observed product group to an existing canonical_name, "
|
||||
"a new canonical_name, exclude it, or skip it."
|
||||
)
|
||||
click.echo(f"observed_product_id: {queue_row['observed_product_id']}")
|
||||
click.echo(f"retailer: {queue_row['retailer']}")
|
||||
click.echo(f"observed_name(s): {queue_row['normalized_names']}")
|
||||
click.echo(f"upc(s): {queue_row['upc_values']}")
|
||||
click.echo(f"seen_count: {queue_row['seen_count']}")
|
||||
click.secho("matched items:", fg=INFO_COLOR)
|
||||
click.echo(f"{matched_count} matched items:")
|
||||
for line in build_display_lines(queue_row, related_rows):
|
||||
click.echo(line)
|
||||
if suggestions:
|
||||
click.secho("suggested canonical_name values:", fg=INFO_COLOR)
|
||||
for suggestion in suggestions:
|
||||
click.echo(
|
||||
f" - {suggestion['canonical_product_id']} | {suggestion['canonical_name']} "
|
||||
f"({suggestion['reason']})"
|
||||
)
|
||||
click.echo(f"{len(suggestions)} canonical suggestions found:")
|
||||
for index, suggestion in enumerate(suggestions, start=1):
|
||||
click.echo(f" [{index}] {suggestion['canonical_name']}")
|
||||
else:
|
||||
click.secho("no deterministic canonical suggestions found", fg=WARNING_COLOR)
|
||||
click.echo("no canonical_name suggestions found")
|
||||
click.secho(
|
||||
"actions: [l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit",
|
||||
"[l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit:",
|
||||
fg=PROMPT_COLOR,
|
||||
)
|
||||
action = click.prompt(
|
||||
click.style("action", fg=PROMPT_COLOR),
|
||||
"",
|
||||
type=click.Choice(["l", "n", "x", "s", "q"]),
|
||||
prompt_suffix=" ",
|
||||
)
|
||||
if action == "q":
|
||||
return None, None
|
||||
@@ -266,20 +301,27 @@ def prompt_resolution(queue_row, related_rows, catalog_rows, queue_index, queue_
|
||||
}
|
||||
for row in catalog_rows[:10]
|
||||
]
|
||||
click.secho("existing canonical_name values:", fg=INFO_COLOR)
|
||||
for row in display_rows:
|
||||
click.echo(
|
||||
f" - {row['canonical_product_id']} | {row['canonical_name']} ({row['reason']})"
|
||||
while True:
|
||||
canonical_product_id, outcome = choose_existing_canonical(
|
||||
display_rows,
|
||||
observed_label,
|
||||
matched_count,
|
||||
)
|
||||
canonical_product_id = click.prompt(
|
||||
click.style("canonical product id", fg=PROMPT_COLOR),
|
||||
type=str,
|
||||
)
|
||||
notes = click.prompt(
|
||||
click.style("link notes", fg=PROMPT_COLOR),
|
||||
default="",
|
||||
show_default=False,
|
||||
)
|
||||
if outcome == "skip":
|
||||
return {
|
||||
"observed_product_id": queue_row["observed_product_id"],
|
||||
"canonical_product_id": "",
|
||||
"resolution_action": "skip",
|
||||
"status": "pending",
|
||||
"resolution_notes": queue_row.get("resolution_notes", ""),
|
||||
"reviewed_at": str(date.today()),
|
||||
}, None
|
||||
if outcome == "quit":
|
||||
return None, None
|
||||
if outcome == "back":
|
||||
continue
|
||||
break
|
||||
notes = click.prompt(click.style("link notes", fg=PROMPT_COLOR), default="", show_default=False)
|
||||
return {
|
||||
"observed_product_id": queue_row["observed_product_id"],
|
||||
"canonical_product_id": canonical_product_id,
|
||||
|
||||
@@ -158,16 +158,186 @@ class ReviewWorkflowTests(unittest.TestCase):
|
||||
)
|
||||
|
||||
self.assertEqual(0, result.exit_code)
|
||||
self.assertIn("Review observed product (1/1)", result.output)
|
||||
self.assertIn("Resolve this observed product group", result.output)
|
||||
self.assertIn("actions: [l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit", result.output)
|
||||
first_item = result.output.index("2026-03-14 | 7.49")
|
||||
second_item = result.output.index("2026-03-12 | 6.99")
|
||||
self.assertIn("Review 1/1: Resolve observed_product MIXED PEPPER to canonical_name [__]?", result.output)
|
||||
self.assertIn("2 matched items:", result.output)
|
||||
self.assertIn("[l]ink existing [n]ew canonical e[x]clude [s]kip [q]uit:", result.output)
|
||||
first_item = result.output.index("[1] 2026-03-14 | 7.49")
|
||||
second_item = result.output.index("[2] 2026-03-12 | 6.99")
|
||||
self.assertLess(first_item, second_item)
|
||||
self.assertIn("image: https://example.test/mixed-pepper.jpg", result.output)
|
||||
self.assertIn("gcan_mix | MIXED PEPPER (exact normalized name)", result.output)
|
||||
self.assertIn("https://example.test/mixed-pepper.jpg", result.output)
|
||||
self.assertIn("1 canonical suggestions found:", result.output)
|
||||
self.assertIn("[1] MIXED PEPPER", result.output)
|
||||
self.assertIn("\x1b[", result.output)
|
||||
|
||||
def test_review_products_no_suggestions_is_informational(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
purchases_csv = Path(tmpdir) / "purchases.csv"
|
||||
queue_csv = Path(tmpdir) / "review_queue.csv"
|
||||
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
|
||||
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
|
||||
|
||||
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(
|
||||
handle,
|
||||
fieldnames=[
|
||||
"purchase_date",
|
||||
"retailer",
|
||||
"order_id",
|
||||
"line_no",
|
||||
"observed_product_id",
|
||||
"canonical_product_id",
|
||||
"raw_item_name",
|
||||
"normalized_item_name",
|
||||
"image_url",
|
||||
"upc",
|
||||
"line_total",
|
||||
],
|
||||
)
|
||||
writer.writeheader()
|
||||
writer.writerow(
|
||||
{
|
||||
"purchase_date": "2026-03-14",
|
||||
"retailer": "giant",
|
||||
"order_id": "g1",
|
||||
"line_no": "1",
|
||||
"observed_product_id": "gobs_ice",
|
||||
"canonical_product_id": "",
|
||||
"raw_item_name": "SB BAGGED ICE 20LB",
|
||||
"normalized_item_name": "BAGGED ICE",
|
||||
"image_url": "",
|
||||
"upc": "",
|
||||
"line_total": "3.50",
|
||||
}
|
||||
)
|
||||
|
||||
with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
|
||||
writer.writeheader()
|
||||
|
||||
result = CliRunner().invoke(
|
||||
review_products.main,
|
||||
[
|
||||
"--purchases-csv",
|
||||
str(purchases_csv),
|
||||
"--queue-csv",
|
||||
str(queue_csv),
|
||||
"--resolutions-csv",
|
||||
str(resolutions_csv),
|
||||
"--catalog-csv",
|
||||
str(catalog_csv),
|
||||
],
|
||||
input="q\n",
|
||||
color=True,
|
||||
)
|
||||
|
||||
self.assertEqual(0, result.exit_code)
|
||||
self.assertIn("no canonical_name suggestions found", result.output)
|
||||
|
||||
def test_link_existing_uses_numbered_selection_and_confirmation(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
purchases_csv = Path(tmpdir) / "purchases.csv"
|
||||
queue_csv = Path(tmpdir) / "review_queue.csv"
|
||||
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
|
||||
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
|
||||
|
||||
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(
|
||||
handle,
|
||||
fieldnames=[
|
||||
"purchase_date",
|
||||
"retailer",
|
||||
"order_id",
|
||||
"line_no",
|
||||
"observed_product_id",
|
||||
"canonical_product_id",
|
||||
"raw_item_name",
|
||||
"normalized_item_name",
|
||||
"image_url",
|
||||
"upc",
|
||||
"line_total",
|
||||
],
|
||||
)
|
||||
writer.writeheader()
|
||||
writer.writerows(
|
||||
[
|
||||
{
|
||||
"purchase_date": "2026-03-14",
|
||||
"retailer": "costco",
|
||||
"order_id": "c2",
|
||||
"line_no": "2",
|
||||
"observed_product_id": "gobs_mix",
|
||||
"canonical_product_id": "",
|
||||
"raw_item_name": "MIXED PEPPER 6-PACK",
|
||||
"normalized_item_name": "MIXED PEPPER",
|
||||
"image_url": "",
|
||||
"upc": "",
|
||||
"line_total": "7.49",
|
||||
},
|
||||
{
|
||||
"purchase_date": "2026-03-12",
|
||||
"retailer": "costco",
|
||||
"order_id": "c1",
|
||||
"line_no": "1",
|
||||
"observed_product_id": "gobs_mix",
|
||||
"canonical_product_id": "",
|
||||
"raw_item_name": "MIXED PEPPER 6-PACK",
|
||||
"normalized_item_name": "MIXED PEPPER",
|
||||
"image_url": "",
|
||||
"upc": "",
|
||||
"line_total": "6.99",
|
||||
},
|
||||
]
|
||||
)
|
||||
|
||||
with catalog_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=review_products.build_purchases.CATALOG_FIELDS)
|
||||
writer.writeheader()
|
||||
writer.writerow(
|
||||
{
|
||||
"canonical_product_id": "gcan_mix",
|
||||
"canonical_name": "MIXED PEPPER",
|
||||
"category": "",
|
||||
"product_type": "",
|
||||
"brand": "",
|
||||
"variant": "",
|
||||
"size_value": "",
|
||||
"size_unit": "",
|
||||
"pack_qty": "",
|
||||
"measure_type": "",
|
||||
"notes": "",
|
||||
"created_at": "",
|
||||
"updated_at": "",
|
||||
}
|
||||
)
|
||||
|
||||
result = CliRunner().invoke(
|
||||
review_products.main,
|
||||
[
|
||||
"--purchases-csv",
|
||||
str(purchases_csv),
|
||||
"--queue-csv",
|
||||
str(queue_csv),
|
||||
"--resolutions-csv",
|
||||
str(resolutions_csv),
|
||||
"--catalog-csv",
|
||||
str(catalog_csv),
|
||||
"--limit",
|
||||
"1",
|
||||
],
|
||||
input="l\n1\ny\nlinked by test\n",
|
||||
color=True,
|
||||
)
|
||||
|
||||
self.assertEqual(0, result.exit_code)
|
||||
self.assertIn("Select the canonical_name to associate 2 items with:", result.output)
|
||||
self.assertIn('[1] MIXED PEPPER | gcan_mix', result.output)
|
||||
self.assertIn('2 "MIXED PEPPER" items and future matches will be associated with "MIXED PEPPER".', result.output)
|
||||
self.assertIn("actions: [y]es [n]o [b]ack [s]kip [q]uit", result.output)
|
||||
with resolutions_csv.open(newline="", encoding="utf-8") as handle:
|
||||
rows = list(csv.DictReader(handle))
|
||||
self.assertEqual("gcan_mix", rows[0]["canonical_product_id"])
|
||||
self.assertEqual("link", rows[0]["resolution_action"])
|
||||
|
||||
def test_review_products_creates_canonical_and_resolution(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
purchases_csv = Path(tmpdir) / "purchases.csv"
|
||||
|
||||
Reference in New Issue
Block a user