Add terminal review resolution workflow

2026-03-16 20:45:37 -04:00
parent 34eedff9c5
commit c7dad5489e
5 changed files with 597 additions and 9 deletions
--- a/build_purchases.py
+++ b/build_purchases.py
@@ -7,7 +7,7 @@ import build_canonical_layer
 import build_observed_products
 import validate_cross_retailer_flow
 from enrich_giant import format_decimal, to_decimal
-from layer_helpers import read_csv_rows, write_csv_rows
+from layer_helpers import read_csv_rows, stable_id, write_csv_rows


 PURCHASE_FIELDS = [
@@ -18,6 +18,8 @@ PURCHASE_FIELDS = [
    "observed_item_key",
    "observed_product_id",
    "canonical_product_id",
+    "review_status",
+    "resolution_action",
    "raw_item_name",
    "normalized_item_name",
    "retailer_item_id",
@@ -60,6 +62,31 @@ EXAMPLE_FIELDS = [
    "notes",
 ]

+CATALOG_FIELDS = [
+    "canonical_product_id",
+    "canonical_name",
+    "category",
+    "product_type",
+    "brand",
+    "variant",
+    "size_value",
+    "size_unit",
+    "pack_qty",
+    "measure_type",
+    "notes",
+    "created_at",
+    "updated_at",
+]
+
+RESOLUTION_FIELDS = [
+    "observed_product_id",
+    "canonical_product_id",
+    "resolution_action",
+    "status",
+    "resolution_notes",
+    "reviewed_at",
+]
+

 def decimal_or_zero(value):
    return to_decimal(value) or Decimal("0")
@@ -138,7 +165,50 @@ def order_lookup(rows, retailer):
    }


-def build_link_lookup(enriched_rows):
+def read_optional_csv_rows(path):
+    path = Path(path)
+    if not path.exists():
+        return []
+    return read_csv_rows(path)
+
+
+def load_resolution_lookup(resolution_rows):
+    lookup = {}
+    for row in resolution_rows:
+        if not row.get("observed_product_id"):
+            continue
+        lookup[row["observed_product_id"]] = row
+    return lookup
+
+
+def merge_catalog_rows(existing_rows, auto_rows):
+    merged = {}
+    for row in auto_rows + existing_rows:
+        canonical_product_id = row.get("canonical_product_id", "")
+        if canonical_product_id:
+            merged[canonical_product_id] = row
+    return sorted(merged.values(), key=lambda row: row["canonical_product_id"])
+
+
+def catalog_row_from_canonical(row):
+    return {
+        "canonical_product_id": row.get("canonical_product_id", ""),
+        "canonical_name": row.get("canonical_name", ""),
+        "category": row.get("category", ""),
+        "product_type": row.get("product_type", ""),
+        "brand": row.get("brand", ""),
+        "variant": row.get("variant", ""),
+        "size_value": row.get("size_value", ""),
+        "size_unit": row.get("size_unit", ""),
+        "pack_qty": row.get("pack_qty", ""),
+        "measure_type": row.get("measure_type", ""),
+        "notes": row.get("notes", ""),
+        "created_at": row.get("created_at", ""),
+        "updated_at": row.get("updated_at", ""),
+    }
+
+
+def build_link_state(enriched_rows):
    observed_rows = build_observed_products.build_observed_products(enriched_rows)
    canonical_rows, link_rows = build_canonical_layer.build_canonical_layer(observed_rows)
    giant_row, costco_row = validate_cross_retailer_flow.find_proof_pair(observed_rows)
@@ -155,12 +225,34 @@ def build_link_lookup(enriched_rows):
    canonical_id_by_observed = {
        row["observed_product_id"]: row["canonical_product_id"] for row in link_rows
    }
-    return observed_id_by_key, canonical_id_by_observed
+    return observed_rows, canonical_rows, link_rows, observed_id_by_key, canonical_id_by_observed


-def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders, costco_orders):
+def build_purchase_rows(
+    giant_enriched_rows,
+    costco_enriched_rows,
+    giant_orders,
+    costco_orders,
+    resolution_rows,
+):
    all_enriched_rows = giant_enriched_rows + costco_enriched_rows
-    observed_id_by_key, canonical_id_by_observed = build_link_lookup(all_enriched_rows)
+    (
+        observed_rows,
+        canonical_rows,
+        link_rows,
+        observed_id_by_key,
+        canonical_id_by_observed,
+    ) = build_link_state(all_enriched_rows)
+    resolution_lookup = load_resolution_lookup(resolution_rows)
+    for observed_product_id, resolution in resolution_lookup.items():
+        action = resolution.get("resolution_action", "")
+        status = resolution.get("status", "")
+        if status != "approved":
+            continue
+        if action in {"link", "create"} and resolution.get("canonical_product_id"):
+            canonical_id_by_observed[observed_product_id] = resolution["canonical_product_id"]
+        elif action == "exclude":
+            canonical_id_by_observed[observed_product_id] = ""
    orders_by_id = {}
    orders_by_id.update(order_lookup(giant_orders, "giant"))
    orders_by_id.update(order_lookup(costco_orders, "costco"))
@@ -174,6 +266,7 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
        observed_product_id = observed_id_by_key.get(observed_key, "")
        order_row = orders_by_id.get((row["retailer"], row["order_id"]), {})
        metrics = derive_metrics(row)
+        resolution = resolution_lookup.get(observed_product_id, {})
        purchase_rows.append(
            {
                "purchase_date": row["order_date"],
@@ -183,6 +276,8 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
                "observed_item_key": row["observed_item_key"],
                "observed_product_id": observed_product_id,
                "canonical_product_id": canonical_id_by_observed.get(observed_product_id, ""),
+                "review_status": resolution.get("status", ""),
+                "resolution_action": resolution.get("resolution_action", ""),
                "raw_item_name": row["item_name"],
                "normalized_item_name": row["item_name_norm"],
                "retailer_item_id": row["retailer_item_id"],
@@ -206,7 +301,33 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
                **metrics,
            }
        )
-    return purchase_rows
+    return purchase_rows, observed_rows, canonical_rows, link_rows
+
+
+def apply_manual_resolutions_to_links(link_rows, resolution_rows):
+    link_by_observed = {row["observed_product_id"]: dict(row) for row in link_rows}
+    for resolution in resolution_rows:
+        if resolution.get("status") != "approved":
+            continue
+        observed_product_id = resolution.get("observed_product_id", "")
+        action = resolution.get("resolution_action", "")
+        if not observed_product_id:
+            continue
+        if action == "exclude":
+            link_by_observed.pop(observed_product_id, None)
+            continue
+        if action in {"link", "create"} and resolution.get("canonical_product_id"):
+            link_by_observed[observed_product_id] = {
+                "observed_product_id": observed_product_id,
+                "canonical_product_id": resolution["canonical_product_id"],
+                "link_method": f"manual_{action}",
+                "link_confidence": "high",
+                "review_status": resolution.get("status", ""),
+                "reviewed_by": "",
+                "reviewed_at": resolution.get("reviewed_at", ""),
+                "link_notes": resolution.get("resolution_notes", ""),
+            }
+    return sorted(link_by_observed.values(), key=lambda row: row["observed_product_id"])


 def build_comparison_examples(purchase_rows):
@@ -245,6 +366,9 @@ def build_comparison_examples(purchase_rows):
@click.option("--costco-items-enriched-csv", default="costco_output/items_enriched.csv", show_default=True)
@click.option("--giant-orders-csv", default="giant_output/orders.csv", show_default=True)
@click.option("--costco-orders-csv", default="costco_output/orders.csv", show_default=True)
+@click.option("--resolutions-csv", default="combined_output/review_resolutions.csv", show_default=True)
+@click.option("--catalog-csv", default="combined_output/canonical_catalog.csv", show_default=True)
+@click.option("--links-csv", default="combined_output/product_links.csv", show_default=True)
@click.option("--output-csv", default="combined_output/purchases.csv", show_default=True)
@click.option("--examples-csv", default="combined_output/comparison_examples.csv", show_default=True)
 def main(
@@ -252,20 +376,34 @@ def main(
    costco_items_enriched_csv,
    giant_orders_csv,
    costco_orders_csv,
+    resolutions_csv,
+    catalog_csv,
+    links_csv,
    output_csv,
    examples_csv,
 ):
-    purchase_rows = build_purchase_rows(
+    resolution_rows = read_optional_csv_rows(resolutions_csv)
+    purchase_rows, _observed_rows, canonical_rows, link_rows = build_purchase_rows(
        read_csv_rows(giant_items_enriched_csv),
        read_csv_rows(costco_items_enriched_csv),
        read_csv_rows(giant_orders_csv),
        read_csv_rows(costco_orders_csv),
+        resolution_rows,
    )
+    existing_catalog_rows = read_optional_csv_rows(catalog_csv)
+    merged_catalog_rows = merge_catalog_rows(
+        existing_catalog_rows,
+        [catalog_row_from_canonical(row) for row in canonical_rows],
+    )
+    link_rows = apply_manual_resolutions_to_links(link_rows, resolution_rows)
    example_rows = build_comparison_examples(purchase_rows)
+    write_csv_rows(catalog_csv, merged_catalog_rows, CATALOG_FIELDS)
+    write_csv_rows(links_csv, link_rows, build_canonical_layer.LINK_FIELDS)
    write_csv_rows(output_csv, purchase_rows, PURCHASE_FIELDS)
    write_csv_rows(examples_csv, example_rows, EXAMPLE_FIELDS)
    click.echo(
-        f"wrote {len(purchase_rows)} purchase rows to {output_csv} "
+        f"wrote {len(purchase_rows)} purchase rows to {output_csv}, "
+        f"{len(merged_catalog_rows)} catalog rows to {catalog_csv}, "
        f"and {len(example_rows)} comparison examples to {examples_csv}"
    )

--- a/pm/review-workflow.org
+++ b/pm/review-workflow.org
@@ -0,0 +1,73 @@
+* review and item-resolution workflow
+
+This document defines the durable review workflow for unresolved observed
+products.
+
+** persistent files
+
+- `combined_output/purchases.csv`
+  Flat normalized purchase log. This is the review input because it retains:
+  - raw item name
+  - normalized item name
+  - observed product id
+  - canonical product id when resolved
+  - retailer/order/date/price context
+- `combined_output/review_queue.csv`
+  Current unresolved observed products grouped for review.
+- `combined_output/review_resolutions.csv`
+  Durable mapping decisions from observed products to canonical products.
+- `combined_output/canonical_catalog.csv`
+  Durable canonical item catalog used by manual review and later purchase-log
+  rebuilds.
+
+There is no separate alias file in v1. `review_resolutions.csv` is the mapping
+layer from observed products to canonical product ids.
+
+** workflow
+
+1. Run `build_purchases.py`
+   This refreshes the purchase log and seeds/updates the canonical catalog from
+   current auto-linked canonical rows.
+2. Run `review_products.py`
+   This rebuilds `review_queue.csv` from unresolved purchase rows and prompts in
+   the terminal for one observed product at a time.
+3. Choose one of:
+   - link to existing canonical
+   - create new canonical
+   - exclude
+   - skip
+4. `review_products.py` writes decisions immediately to:
+   - `review_resolutions.csv`
+   - `canonical_catalog.csv` when a new canonical item is created
+5. Rerun `build_purchases.py`
+   This reapplies approved resolutions so the final normalized purchase log now
+   carries the reviewed `canonical_product_id`.
+
+** what the human edits
+
+The primary interface is terminal prompts in `review_products.py`.
+
+The human provides:
+- existing canonical id when linking
+- canonical name/category/product type when creating a new canonical item
+- optional resolution notes
+
+The generated CSVs remain editable by hand if needed, but the intended workflow
+is terminal-first.
+
+** durability
+
+- Resolutions are keyed by `observed_product_id`, not by one-off text
+  substitution.
+- Canonical products are keyed by stable `canonical_product_id`.
+- Future runs reuse approved mappings through `review_resolutions.csv`.
+
+** retention of audit fields
+
+The final `purchases.csv` retains:
+- `raw_item_name`
+- `normalized_item_name`
+- `canonical_product_id`
+
+This preserves the raw receipt description, the deterministic parser output, and
+the human-approved canonical identity in one flat purchase log.
--- a/review_products.py
+++ b/review_products.py
@@ -0,0 +1,223 @@
+from collections import defaultdict
+from datetime import date
+from pathlib import Path
+
+import click
+
+import build_purchases
+from layer_helpers import compact_join, stable_id, write_csv_rows
+
+
+QUEUE_FIELDS = [
+    "review_id",
+    "retailer",
+    "observed_product_id",
+    "canonical_product_id",
+    "reason_code",
+    "priority",
+    "raw_item_names",
+    "normalized_names",
+    "upc_values",
+    "example_prices",
+    "seen_count",
+    "status",
+    "resolution_action",
+    "resolution_notes",
+    "created_at",
+    "updated_at",
+]
+
+
+def build_review_queue(purchase_rows, resolution_rows):
+    by_observed = defaultdict(list)
+    resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
+
+    for row in purchase_rows:
+        observed_product_id = row.get("observed_product_id", "")
+        if not observed_product_id:
+            continue
+        by_observed[observed_product_id].append(row)
+
+    today_text = str(date.today())
+    queue_rows = []
+    for observed_product_id, rows in sorted(by_observed.items()):
+        current_resolution = resolution_lookup.get(observed_product_id, {})
+        if current_resolution.get("status") == "approved":
+            continue
+        unresolved_rows = [row for row in rows if not row.get("canonical_product_id")]
+        if not unresolved_rows:
+            continue
+
+        retailers = sorted({row["retailer"] for row in rows})
+        review_id = stable_id("rvw", observed_product_id)
+        queue_rows.append(
+            {
+                "review_id": review_id,
+                "retailer": " | ".join(retailers),
+                "observed_product_id": observed_product_id,
+                "canonical_product_id": current_resolution.get("canonical_product_id", ""),
+                "reason_code": "missing_canonical_link",
+                "priority": "high",
+                "raw_item_names": compact_join(
+                    sorted({row["raw_item_name"] for row in rows if row["raw_item_name"]}),
+                    limit=8,
+                ),
+                "normalized_names": compact_join(
+                    sorted(
+                        {
+                            row["normalized_item_name"]
+                            for row in rows
+                            if row["normalized_item_name"]
+                        }
+                    ),
+                    limit=8,
+                ),
+                "upc_values": compact_join(
+                    sorted({row["upc"] for row in rows if row["upc"]}),
+                    limit=8,
+                ),
+                "example_prices": compact_join(
+                    sorted({row["line_total"] for row in rows if row["line_total"]}),
+                    limit=8,
+                ),
+                "seen_count": str(len(rows)),
+                "status": current_resolution.get("status", "pending"),
+                "resolution_action": current_resolution.get("resolution_action", ""),
+                "resolution_notes": current_resolution.get("resolution_notes", ""),
+                "created_at": current_resolution.get("reviewed_at", today_text),
+                "updated_at": today_text,
+            }
+        )
+    return queue_rows
+
+
+def save_resolution_rows(path, rows):
+    write_csv_rows(path, rows, build_purchases.RESOLUTION_FIELDS)
+
+
+def save_catalog_rows(path, rows):
+    write_csv_rows(path, rows, build_purchases.CATALOG_FIELDS)
+
+
+def prompt_resolution(queue_row, catalog_rows):
+    click.echo("")
+    click.echo(f"observed_product_id: {queue_row['observed_product_id']}")
+    click.echo(f"retailer: {queue_row['retailer']}")
+    click.echo(f"raw names: {queue_row['raw_item_names']}")
+    click.echo(f"normalized names: {queue_row['normalized_names']}")
+    click.echo(f"upcs: {queue_row['upc_values']}")
+    click.echo(f"example prices: {queue_row['example_prices']}")
+    click.echo(f"seen count: {queue_row['seen_count']}")
+    click.echo("actions: [l]ink existing  [n]ew canonical  [x]exclude  [s]kip  [q]uit")
+    action = click.prompt("action", type=click.Choice(["l", "n", "x", "s", "q"]))
+    if action == "q":
+        return None, None
+    if action == "s":
+        return {
+            "observed_product_id": queue_row["observed_product_id"],
+            "canonical_product_id": "",
+            "resolution_action": "skip",
+            "status": "pending",
+            "resolution_notes": queue_row.get("resolution_notes", ""),
+            "reviewed_at": str(date.today()),
+        }, None
+    if action == "x":
+        notes = click.prompt("exclude notes", default="", show_default=False)
+        return {
+            "observed_product_id": queue_row["observed_product_id"],
+            "canonical_product_id": "",
+            "resolution_action": "exclude",
+            "status": "approved",
+            "resolution_notes": notes,
+            "reviewed_at": str(date.today()),
+        }, None
+    if action == "l":
+        click.echo("existing canonicals:")
+        for row in catalog_rows[:10]:
+            click.echo(f"  {row['canonical_product_id']}  {row['canonical_name']}")
+        canonical_product_id = click.prompt("canonical product id", type=str)
+        notes = click.prompt("link notes", default="", show_default=False)
+        return {
+            "observed_product_id": queue_row["observed_product_id"],
+            "canonical_product_id": canonical_product_id,
+            "resolution_action": "link",
+            "status": "approved",
+            "resolution_notes": notes,
+            "reviewed_at": str(date.today()),
+        }, None
+
+    canonical_name = click.prompt("canonical name", type=str)
+    category = click.prompt("category", default="", show_default=False)
+    product_type = click.prompt("product type", default="", show_default=False)
+    notes = click.prompt("notes", default="", show_default=False)
+    canonical_product_id = stable_id("gcan", f"manual|{canonical_name}|{category}|{product_type}")
+    canonical_row = {
+        "canonical_product_id": canonical_product_id,
+        "canonical_name": canonical_name,
+        "category": category,
+        "product_type": product_type,
+        "brand": "",
+        "variant": "",
+        "size_value": "",
+        "size_unit": "",
+        "pack_qty": "",
+        "measure_type": "",
+        "notes": notes,
+        "created_at": str(date.today()),
+        "updated_at": str(date.today()),
+    }
+    resolution_row = {
+        "observed_product_id": queue_row["observed_product_id"],
+        "canonical_product_id": canonical_product_id,
+        "resolution_action": "create",
+        "status": "approved",
+        "resolution_notes": notes,
+        "reviewed_at": str(date.today()),
+    }
+    return resolution_row, canonical_row
+
+
+@click.command()
+@click.option("--purchases-csv", default="combined_output/purchases.csv", show_default=True)
+@click.option("--queue-csv", default="combined_output/review_queue.csv", show_default=True)
+@click.option("--resolutions-csv", default="combined_output/review_resolutions.csv", show_default=True)
+@click.option("--catalog-csv", default="combined_output/canonical_catalog.csv", show_default=True)
+@click.option("--limit", default=0, show_default=True, type=int)
+@click.option("--refresh-only", is_flag=True, help="Only rebuild review_queue.csv without prompting.")
+def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, limit, refresh_only):
+    purchase_rows = build_purchases.read_optional_csv_rows(purchases_csv)
+    resolution_rows = build_purchases.read_optional_csv_rows(resolutions_csv)
+    catalog_rows = build_purchases.read_optional_csv_rows(catalog_csv)
+    queue_rows = build_review_queue(purchase_rows, resolution_rows)
+    write_csv_rows(queue_csv, queue_rows, QUEUE_FIELDS)
+    click.echo(f"wrote {len(queue_rows)} rows to {queue_csv}")
+
+    if refresh_only:
+        return
+
+    resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
+    catalog_by_id = {row["canonical_product_id"]: row for row in catalog_rows if row.get("canonical_product_id")}
+    reviewed = 0
+    for queue_row in queue_rows:
+        if limit and reviewed >= limit:
+            break
+        result = prompt_resolution(queue_row, catalog_rows)
+        if result == (None, None):
+            break
+        resolution_row, canonical_row = result
+        resolution_lookup[resolution_row["observed_product_id"]] = resolution_row
+        if canonical_row and canonical_row["canonical_product_id"] not in catalog_by_id:
+            catalog_by_id[canonical_row["canonical_product_id"]] = canonical_row
+            catalog_rows.append(canonical_row)
+        reviewed += 1
+
+    save_resolution_rows(resolutions_csv, sorted(resolution_lookup.values(), key=lambda row: row["observed_product_id"]))
+    save_catalog_rows(catalog_csv, sorted(catalog_by_id.values(), key=lambda row: row["canonical_product_id"]))
+    click.echo(
+        f"saved {len(resolution_lookup)} resolution rows to {resolutions_csv} "
+        f"and {len(catalog_by_id)} catalog rows to {catalog_csv}"
+    )
+
+
+if __name__ == "__main__":
+    main()
--- a/tests/test_purchases.py
+++ b/tests/test_purchases.py
@@ -99,11 +99,12 @@ class PurchaseLogTests(unittest.TestCase):
            }
        ]

-        rows = build_purchases.build_purchase_rows(
+        rows, _observed, _canon, _links = build_purchases.build_purchase_rows(
            [giant_row],
            [costco_row],
            giant_orders,
            costco_orders,
+            [],
        )

        self.assertEqual(2, len(rows))
@@ -195,6 +196,9 @@ class PurchaseLogTests(unittest.TestCase):
                costco_items_enriched_csv=str(costco_items),
                giant_orders_csv=str(giant_orders),
                costco_orders_csv=str(costco_orders),
+                resolutions_csv=str(Path(tmpdir) / "review_resolutions.csv"),
+                catalog_csv=str(Path(tmpdir) / "canonical_catalog.csv"),
+                links_csv=str(Path(tmpdir) / "product_links.csv"),
                output_csv=str(purchases_csv),
                examples_csv=str(examples_csv),
            )
@@ -208,6 +212,56 @@ class PurchaseLogTests(unittest.TestCase):
            self.assertEqual(2, len(purchase_rows))
            self.assertEqual(1, len(example_rows))

+    def test_build_purchase_rows_applies_manual_resolution(self):
+        fieldnames = enrich_costco.OUTPUT_FIELDS
+        giant_row = {field: "" for field in fieldnames}
+        giant_row.update(
+            {
+                "retailer": "giant",
+                "order_id": "g1",
+                "line_no": "1",
+                "observed_item_key": "giant:g1:1",
+                "order_date": "2026-03-01",
+                "item_name": "SB BAGGED ICE 20LB",
+                "item_name_norm": "BAGGED ICE",
+                "retailer_item_id": "100",
+                "upc": "",
+                "qty": "1",
+                "unit": "EA",
+                "line_total": "3.50",
+                "unit_price": "3.50",
+                "measure_type": "each",
+                "raw_order_path": "giant_output/raw/g1.json",
+                "is_discount_line": "false",
+                "is_coupon_line": "false",
+                "is_fee": "false",
+            }
+        )
+        observed_rows, _canonical_rows, _link_rows, _observed_id_by_key, _canonical_by_observed = (
+            build_purchases.build_link_state([giant_row])
+        )
+        observed_product_id = observed_rows[0]["observed_product_id"]
+        rows, _observed, _canon, _links = build_purchases.build_purchase_rows(
+            [giant_row],
+            [],
+            [{"order_id": "g1", "store_name": "Giant", "store_number": "42", "store_city": "Springfield", "store_state": "VA"}],
+            [],
+            [
+                {
+                    "observed_product_id": observed_product_id,
+                    "canonical_product_id": "gcan_manual_ice",
+                    "resolution_action": "create",
+                    "status": "approved",
+                    "resolution_notes": "manual ice merge",
+                    "reviewed_at": "2026-03-16",
+                }
+            ],
+        )
+
+        self.assertEqual("gcan_manual_ice", rows[0]["canonical_product_id"])
+        self.assertEqual("approved", rows[0]["review_status"])
+        self.assertEqual("create", rows[0]["resolution_action"])
+

 if __name__ == "__main__":
    unittest.main()
--- a/tests/test_review_workflow.py
+++ b/tests/test_review_workflow.py
@@ -0,0 +1,100 @@
+import csv
+import tempfile
+import unittest
+from pathlib import Path
+from unittest import mock
+
+import review_products
+
+
+class ReviewWorkflowTests(unittest.TestCase):
+    def test_build_review_queue_groups_unresolved_purchases(self):
+        queue_rows = review_products.build_review_queue(
+            [
+                {
+                    "observed_product_id": "gobs_1",
+                    "canonical_product_id": "",
+                    "retailer": "giant",
+                    "raw_item_name": "SB BAGGED ICE 20LB",
+                    "normalized_item_name": "BAGGED ICE",
+                    "upc": "",
+                    "line_total": "3.50",
+                },
+                {
+                    "observed_product_id": "gobs_1",
+                    "canonical_product_id": "",
+                    "retailer": "giant",
+                    "raw_item_name": "SB BAG ICE CUBED 10LB",
+                    "normalized_item_name": "BAG ICE",
+                    "upc": "",
+                    "line_total": "2.50",
+                },
+            ],
+            [],
+        )
+
+        self.assertEqual(1, len(queue_rows))
+        self.assertEqual("gobs_1", queue_rows[0]["observed_product_id"])
+        self.assertIn("SB BAGGED ICE 20LB", queue_rows[0]["raw_item_names"])
+
+    def test_review_products_creates_canonical_and_resolution(self):
+        with tempfile.TemporaryDirectory() as tmpdir:
+            purchases_csv = Path(tmpdir) / "purchases.csv"
+            queue_csv = Path(tmpdir) / "review_queue.csv"
+            resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
+            catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
+
+            with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
+                writer = csv.DictWriter(
+                    handle,
+                    fieldnames=[
+                        "observed_product_id",
+                        "canonical_product_id",
+                        "retailer",
+                        "raw_item_name",
+                        "normalized_item_name",
+                        "upc",
+                        "line_total",
+                    ],
+                )
+                writer.writeheader()
+                writer.writerow(
+                    {
+                        "observed_product_id": "gobs_ice",
+                        "canonical_product_id": "",
+                        "retailer": "giant",
+                        "raw_item_name": "SB BAGGED ICE 20LB",
+                        "normalized_item_name": "BAGGED ICE",
+                        "upc": "",
+                        "line_total": "3.50",
+                    }
+                )
+
+            with mock.patch.object(
+                review_products.click,
+                "prompt",
+                side_effect=["n", "ICE", "frozen", "ice", "manual merge", "q"],
+            ):
+                review_products.main.callback(
+                    purchases_csv=str(purchases_csv),
+                    queue_csv=str(queue_csv),
+                    resolutions_csv=str(resolutions_csv),
+                    catalog_csv=str(catalog_csv),
+                    limit=1,
+                    refresh_only=False,
+                )
+
+            self.assertTrue(queue_csv.exists())
+            self.assertTrue(resolutions_csv.exists())
+            self.assertTrue(catalog_csv.exists())
+            with resolutions_csv.open(newline="", encoding="utf-8") as handle:
+                resolution_rows = list(csv.DictReader(handle))
+            with catalog_csv.open(newline="", encoding="utf-8") as handle:
+                catalog_rows = list(csv.DictReader(handle))
+            self.assertEqual("create", resolution_rows[0]["resolution_action"])
+            self.assertEqual("approved", resolution_rows[0]["status"])
+            self.assertEqual("ICE", catalog_rows[0]["canonical_name"])
+
+
+if __name__ == "__main__":
+    unittest.main()