Add terminal review resolution workflow
This commit is contained in:
@@ -7,7 +7,7 @@ import build_canonical_layer
|
|||||||
import build_observed_products
|
import build_observed_products
|
||||||
import validate_cross_retailer_flow
|
import validate_cross_retailer_flow
|
||||||
from enrich_giant import format_decimal, to_decimal
|
from enrich_giant import format_decimal, to_decimal
|
||||||
from layer_helpers import read_csv_rows, write_csv_rows
|
from layer_helpers import read_csv_rows, stable_id, write_csv_rows
|
||||||
|
|
||||||
|
|
||||||
PURCHASE_FIELDS = [
|
PURCHASE_FIELDS = [
|
||||||
@@ -18,6 +18,8 @@ PURCHASE_FIELDS = [
|
|||||||
"observed_item_key",
|
"observed_item_key",
|
||||||
"observed_product_id",
|
"observed_product_id",
|
||||||
"canonical_product_id",
|
"canonical_product_id",
|
||||||
|
"review_status",
|
||||||
|
"resolution_action",
|
||||||
"raw_item_name",
|
"raw_item_name",
|
||||||
"normalized_item_name",
|
"normalized_item_name",
|
||||||
"retailer_item_id",
|
"retailer_item_id",
|
||||||
@@ -60,6 +62,31 @@ EXAMPLE_FIELDS = [
|
|||||||
"notes",
|
"notes",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
CATALOG_FIELDS = [
|
||||||
|
"canonical_product_id",
|
||||||
|
"canonical_name",
|
||||||
|
"category",
|
||||||
|
"product_type",
|
||||||
|
"brand",
|
||||||
|
"variant",
|
||||||
|
"size_value",
|
||||||
|
"size_unit",
|
||||||
|
"pack_qty",
|
||||||
|
"measure_type",
|
||||||
|
"notes",
|
||||||
|
"created_at",
|
||||||
|
"updated_at",
|
||||||
|
]
|
||||||
|
|
||||||
|
RESOLUTION_FIELDS = [
|
||||||
|
"observed_product_id",
|
||||||
|
"canonical_product_id",
|
||||||
|
"resolution_action",
|
||||||
|
"status",
|
||||||
|
"resolution_notes",
|
||||||
|
"reviewed_at",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
def decimal_or_zero(value):
|
def decimal_or_zero(value):
|
||||||
return to_decimal(value) or Decimal("0")
|
return to_decimal(value) or Decimal("0")
|
||||||
@@ -138,7 +165,50 @@ def order_lookup(rows, retailer):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
def build_link_lookup(enriched_rows):
|
def read_optional_csv_rows(path):
|
||||||
|
path = Path(path)
|
||||||
|
if not path.exists():
|
||||||
|
return []
|
||||||
|
return read_csv_rows(path)
|
||||||
|
|
||||||
|
|
||||||
|
def load_resolution_lookup(resolution_rows):
|
||||||
|
lookup = {}
|
||||||
|
for row in resolution_rows:
|
||||||
|
if not row.get("observed_product_id"):
|
||||||
|
continue
|
||||||
|
lookup[row["observed_product_id"]] = row
|
||||||
|
return lookup
|
||||||
|
|
||||||
|
|
||||||
|
def merge_catalog_rows(existing_rows, auto_rows):
|
||||||
|
merged = {}
|
||||||
|
for row in auto_rows + existing_rows:
|
||||||
|
canonical_product_id = row.get("canonical_product_id", "")
|
||||||
|
if canonical_product_id:
|
||||||
|
merged[canonical_product_id] = row
|
||||||
|
return sorted(merged.values(), key=lambda row: row["canonical_product_id"])
|
||||||
|
|
||||||
|
|
||||||
|
def catalog_row_from_canonical(row):
|
||||||
|
return {
|
||||||
|
"canonical_product_id": row.get("canonical_product_id", ""),
|
||||||
|
"canonical_name": row.get("canonical_name", ""),
|
||||||
|
"category": row.get("category", ""),
|
||||||
|
"product_type": row.get("product_type", ""),
|
||||||
|
"brand": row.get("brand", ""),
|
||||||
|
"variant": row.get("variant", ""),
|
||||||
|
"size_value": row.get("size_value", ""),
|
||||||
|
"size_unit": row.get("size_unit", ""),
|
||||||
|
"pack_qty": row.get("pack_qty", ""),
|
||||||
|
"measure_type": row.get("measure_type", ""),
|
||||||
|
"notes": row.get("notes", ""),
|
||||||
|
"created_at": row.get("created_at", ""),
|
||||||
|
"updated_at": row.get("updated_at", ""),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
def build_link_state(enriched_rows):
|
||||||
observed_rows = build_observed_products.build_observed_products(enriched_rows)
|
observed_rows = build_observed_products.build_observed_products(enriched_rows)
|
||||||
canonical_rows, link_rows = build_canonical_layer.build_canonical_layer(observed_rows)
|
canonical_rows, link_rows = build_canonical_layer.build_canonical_layer(observed_rows)
|
||||||
giant_row, costco_row = validate_cross_retailer_flow.find_proof_pair(observed_rows)
|
giant_row, costco_row = validate_cross_retailer_flow.find_proof_pair(observed_rows)
|
||||||
@@ -155,12 +225,34 @@ def build_link_lookup(enriched_rows):
|
|||||||
canonical_id_by_observed = {
|
canonical_id_by_observed = {
|
||||||
row["observed_product_id"]: row["canonical_product_id"] for row in link_rows
|
row["observed_product_id"]: row["canonical_product_id"] for row in link_rows
|
||||||
}
|
}
|
||||||
return observed_id_by_key, canonical_id_by_observed
|
return observed_rows, canonical_rows, link_rows, observed_id_by_key, canonical_id_by_observed
|
||||||
|
|
||||||
|
|
||||||
def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders, costco_orders):
|
def build_purchase_rows(
|
||||||
|
giant_enriched_rows,
|
||||||
|
costco_enriched_rows,
|
||||||
|
giant_orders,
|
||||||
|
costco_orders,
|
||||||
|
resolution_rows,
|
||||||
|
):
|
||||||
all_enriched_rows = giant_enriched_rows + costco_enriched_rows
|
all_enriched_rows = giant_enriched_rows + costco_enriched_rows
|
||||||
observed_id_by_key, canonical_id_by_observed = build_link_lookup(all_enriched_rows)
|
(
|
||||||
|
observed_rows,
|
||||||
|
canonical_rows,
|
||||||
|
link_rows,
|
||||||
|
observed_id_by_key,
|
||||||
|
canonical_id_by_observed,
|
||||||
|
) = build_link_state(all_enriched_rows)
|
||||||
|
resolution_lookup = load_resolution_lookup(resolution_rows)
|
||||||
|
for observed_product_id, resolution in resolution_lookup.items():
|
||||||
|
action = resolution.get("resolution_action", "")
|
||||||
|
status = resolution.get("status", "")
|
||||||
|
if status != "approved":
|
||||||
|
continue
|
||||||
|
if action in {"link", "create"} and resolution.get("canonical_product_id"):
|
||||||
|
canonical_id_by_observed[observed_product_id] = resolution["canonical_product_id"]
|
||||||
|
elif action == "exclude":
|
||||||
|
canonical_id_by_observed[observed_product_id] = ""
|
||||||
orders_by_id = {}
|
orders_by_id = {}
|
||||||
orders_by_id.update(order_lookup(giant_orders, "giant"))
|
orders_by_id.update(order_lookup(giant_orders, "giant"))
|
||||||
orders_by_id.update(order_lookup(costco_orders, "costco"))
|
orders_by_id.update(order_lookup(costco_orders, "costco"))
|
||||||
@@ -174,6 +266,7 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
|
|||||||
observed_product_id = observed_id_by_key.get(observed_key, "")
|
observed_product_id = observed_id_by_key.get(observed_key, "")
|
||||||
order_row = orders_by_id.get((row["retailer"], row["order_id"]), {})
|
order_row = orders_by_id.get((row["retailer"], row["order_id"]), {})
|
||||||
metrics = derive_metrics(row)
|
metrics = derive_metrics(row)
|
||||||
|
resolution = resolution_lookup.get(observed_product_id, {})
|
||||||
purchase_rows.append(
|
purchase_rows.append(
|
||||||
{
|
{
|
||||||
"purchase_date": row["order_date"],
|
"purchase_date": row["order_date"],
|
||||||
@@ -183,6 +276,8 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
|
|||||||
"observed_item_key": row["observed_item_key"],
|
"observed_item_key": row["observed_item_key"],
|
||||||
"observed_product_id": observed_product_id,
|
"observed_product_id": observed_product_id,
|
||||||
"canonical_product_id": canonical_id_by_observed.get(observed_product_id, ""),
|
"canonical_product_id": canonical_id_by_observed.get(observed_product_id, ""),
|
||||||
|
"review_status": resolution.get("status", ""),
|
||||||
|
"resolution_action": resolution.get("resolution_action", ""),
|
||||||
"raw_item_name": row["item_name"],
|
"raw_item_name": row["item_name"],
|
||||||
"normalized_item_name": row["item_name_norm"],
|
"normalized_item_name": row["item_name_norm"],
|
||||||
"retailer_item_id": row["retailer_item_id"],
|
"retailer_item_id": row["retailer_item_id"],
|
||||||
@@ -206,7 +301,33 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
|
|||||||
**metrics,
|
**metrics,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
return purchase_rows
|
return purchase_rows, observed_rows, canonical_rows, link_rows
|
||||||
|
|
||||||
|
|
||||||
|
def apply_manual_resolutions_to_links(link_rows, resolution_rows):
|
||||||
|
link_by_observed = {row["observed_product_id"]: dict(row) for row in link_rows}
|
||||||
|
for resolution in resolution_rows:
|
||||||
|
if resolution.get("status") != "approved":
|
||||||
|
continue
|
||||||
|
observed_product_id = resolution.get("observed_product_id", "")
|
||||||
|
action = resolution.get("resolution_action", "")
|
||||||
|
if not observed_product_id:
|
||||||
|
continue
|
||||||
|
if action == "exclude":
|
||||||
|
link_by_observed.pop(observed_product_id, None)
|
||||||
|
continue
|
||||||
|
if action in {"link", "create"} and resolution.get("canonical_product_id"):
|
||||||
|
link_by_observed[observed_product_id] = {
|
||||||
|
"observed_product_id": observed_product_id,
|
||||||
|
"canonical_product_id": resolution["canonical_product_id"],
|
||||||
|
"link_method": f"manual_{action}",
|
||||||
|
"link_confidence": "high",
|
||||||
|
"review_status": resolution.get("status", ""),
|
||||||
|
"reviewed_by": "",
|
||||||
|
"reviewed_at": resolution.get("reviewed_at", ""),
|
||||||
|
"link_notes": resolution.get("resolution_notes", ""),
|
||||||
|
}
|
||||||
|
return sorted(link_by_observed.values(), key=lambda row: row["observed_product_id"])
|
||||||
|
|
||||||
|
|
||||||
def build_comparison_examples(purchase_rows):
|
def build_comparison_examples(purchase_rows):
|
||||||
@@ -245,6 +366,9 @@ def build_comparison_examples(purchase_rows):
|
|||||||
@click.option("--costco-items-enriched-csv", default="costco_output/items_enriched.csv", show_default=True)
|
@click.option("--costco-items-enriched-csv", default="costco_output/items_enriched.csv", show_default=True)
|
||||||
@click.option("--giant-orders-csv", default="giant_output/orders.csv", show_default=True)
|
@click.option("--giant-orders-csv", default="giant_output/orders.csv", show_default=True)
|
||||||
@click.option("--costco-orders-csv", default="costco_output/orders.csv", show_default=True)
|
@click.option("--costco-orders-csv", default="costco_output/orders.csv", show_default=True)
|
||||||
|
@click.option("--resolutions-csv", default="combined_output/review_resolutions.csv", show_default=True)
|
||||||
|
@click.option("--catalog-csv", default="combined_output/canonical_catalog.csv", show_default=True)
|
||||||
|
@click.option("--links-csv", default="combined_output/product_links.csv", show_default=True)
|
||||||
@click.option("--output-csv", default="combined_output/purchases.csv", show_default=True)
|
@click.option("--output-csv", default="combined_output/purchases.csv", show_default=True)
|
||||||
@click.option("--examples-csv", default="combined_output/comparison_examples.csv", show_default=True)
|
@click.option("--examples-csv", default="combined_output/comparison_examples.csv", show_default=True)
|
||||||
def main(
|
def main(
|
||||||
@@ -252,20 +376,34 @@ def main(
|
|||||||
costco_items_enriched_csv,
|
costco_items_enriched_csv,
|
||||||
giant_orders_csv,
|
giant_orders_csv,
|
||||||
costco_orders_csv,
|
costco_orders_csv,
|
||||||
|
resolutions_csv,
|
||||||
|
catalog_csv,
|
||||||
|
links_csv,
|
||||||
output_csv,
|
output_csv,
|
||||||
examples_csv,
|
examples_csv,
|
||||||
):
|
):
|
||||||
purchase_rows = build_purchase_rows(
|
resolution_rows = read_optional_csv_rows(resolutions_csv)
|
||||||
|
purchase_rows, _observed_rows, canonical_rows, link_rows = build_purchase_rows(
|
||||||
read_csv_rows(giant_items_enriched_csv),
|
read_csv_rows(giant_items_enriched_csv),
|
||||||
read_csv_rows(costco_items_enriched_csv),
|
read_csv_rows(costco_items_enriched_csv),
|
||||||
read_csv_rows(giant_orders_csv),
|
read_csv_rows(giant_orders_csv),
|
||||||
read_csv_rows(costco_orders_csv),
|
read_csv_rows(costco_orders_csv),
|
||||||
|
resolution_rows,
|
||||||
)
|
)
|
||||||
|
existing_catalog_rows = read_optional_csv_rows(catalog_csv)
|
||||||
|
merged_catalog_rows = merge_catalog_rows(
|
||||||
|
existing_catalog_rows,
|
||||||
|
[catalog_row_from_canonical(row) for row in canonical_rows],
|
||||||
|
)
|
||||||
|
link_rows = apply_manual_resolutions_to_links(link_rows, resolution_rows)
|
||||||
example_rows = build_comparison_examples(purchase_rows)
|
example_rows = build_comparison_examples(purchase_rows)
|
||||||
|
write_csv_rows(catalog_csv, merged_catalog_rows, CATALOG_FIELDS)
|
||||||
|
write_csv_rows(links_csv, link_rows, build_canonical_layer.LINK_FIELDS)
|
||||||
write_csv_rows(output_csv, purchase_rows, PURCHASE_FIELDS)
|
write_csv_rows(output_csv, purchase_rows, PURCHASE_FIELDS)
|
||||||
write_csv_rows(examples_csv, example_rows, EXAMPLE_FIELDS)
|
write_csv_rows(examples_csv, example_rows, EXAMPLE_FIELDS)
|
||||||
click.echo(
|
click.echo(
|
||||||
f"wrote {len(purchase_rows)} purchase rows to {output_csv} "
|
f"wrote {len(purchase_rows)} purchase rows to {output_csv}, "
|
||||||
|
f"{len(merged_catalog_rows)} catalog rows to {catalog_csv}, "
|
||||||
f"and {len(example_rows)} comparison examples to {examples_csv}"
|
f"and {len(example_rows)} comparison examples to {examples_csv}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
73
pm/review-workflow.org
Normal file
73
pm/review-workflow.org
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
* review and item-resolution workflow
|
||||||
|
|
||||||
|
This document defines the durable review workflow for unresolved observed
|
||||||
|
products.
|
||||||
|
|
||||||
|
** persistent files
|
||||||
|
|
||||||
|
- `combined_output/purchases.csv`
|
||||||
|
Flat normalized purchase log. This is the review input because it retains:
|
||||||
|
- raw item name
|
||||||
|
- normalized item name
|
||||||
|
- observed product id
|
||||||
|
- canonical product id when resolved
|
||||||
|
- retailer/order/date/price context
|
||||||
|
- `combined_output/review_queue.csv`
|
||||||
|
Current unresolved observed products grouped for review.
|
||||||
|
- `combined_output/review_resolutions.csv`
|
||||||
|
Durable mapping decisions from observed products to canonical products.
|
||||||
|
- `combined_output/canonical_catalog.csv`
|
||||||
|
Durable canonical item catalog used by manual review and later purchase-log
|
||||||
|
rebuilds.
|
||||||
|
|
||||||
|
There is no separate alias file in v1. `review_resolutions.csv` is the mapping
|
||||||
|
layer from observed products to canonical product ids.
|
||||||
|
|
||||||
|
** workflow
|
||||||
|
|
||||||
|
1. Run `build_purchases.py`
|
||||||
|
This refreshes the purchase log and seeds/updates the canonical catalog from
|
||||||
|
current auto-linked canonical rows.
|
||||||
|
2. Run `review_products.py`
|
||||||
|
This rebuilds `review_queue.csv` from unresolved purchase rows and prompts in
|
||||||
|
the terminal for one observed product at a time.
|
||||||
|
3. Choose one of:
|
||||||
|
- link to existing canonical
|
||||||
|
- create new canonical
|
||||||
|
- exclude
|
||||||
|
- skip
|
||||||
|
4. `review_products.py` writes decisions immediately to:
|
||||||
|
- `review_resolutions.csv`
|
||||||
|
- `canonical_catalog.csv` when a new canonical item is created
|
||||||
|
5. Rerun `build_purchases.py`
|
||||||
|
This reapplies approved resolutions so the final normalized purchase log now
|
||||||
|
carries the reviewed `canonical_product_id`.
|
||||||
|
|
||||||
|
** what the human edits
|
||||||
|
|
||||||
|
The primary interface is terminal prompts in `review_products.py`.
|
||||||
|
|
||||||
|
The human provides:
|
||||||
|
- existing canonical id when linking
|
||||||
|
- canonical name/category/product type when creating a new canonical item
|
||||||
|
- optional resolution notes
|
||||||
|
|
||||||
|
The generated CSVs remain editable by hand if needed, but the intended workflow
|
||||||
|
is terminal-first.
|
||||||
|
|
||||||
|
** durability
|
||||||
|
|
||||||
|
- Resolutions are keyed by `observed_product_id`, not by one-off text
|
||||||
|
substitution.
|
||||||
|
- Canonical products are keyed by stable `canonical_product_id`.
|
||||||
|
- Future runs reuse approved mappings through `review_resolutions.csv`.
|
||||||
|
|
||||||
|
** retention of audit fields
|
||||||
|
|
||||||
|
The final `purchases.csv` retains:
|
||||||
|
- `raw_item_name`
|
||||||
|
- `normalized_item_name`
|
||||||
|
- `canonical_product_id`
|
||||||
|
|
||||||
|
This preserves the raw receipt description, the deterministic parser output, and
|
||||||
|
the human-approved canonical identity in one flat purchase log.
|
||||||
223
review_products.py
Normal file
223
review_products.py
Normal file
@@ -0,0 +1,223 @@
|
|||||||
|
from collections import defaultdict
|
||||||
|
from datetime import date
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
|
import click
|
||||||
|
|
||||||
|
import build_purchases
|
||||||
|
from layer_helpers import compact_join, stable_id, write_csv_rows
|
||||||
|
|
||||||
|
|
||||||
|
QUEUE_FIELDS = [
|
||||||
|
"review_id",
|
||||||
|
"retailer",
|
||||||
|
"observed_product_id",
|
||||||
|
"canonical_product_id",
|
||||||
|
"reason_code",
|
||||||
|
"priority",
|
||||||
|
"raw_item_names",
|
||||||
|
"normalized_names",
|
||||||
|
"upc_values",
|
||||||
|
"example_prices",
|
||||||
|
"seen_count",
|
||||||
|
"status",
|
||||||
|
"resolution_action",
|
||||||
|
"resolution_notes",
|
||||||
|
"created_at",
|
||||||
|
"updated_at",
|
||||||
|
]
|
||||||
|
|
||||||
|
|
||||||
|
def build_review_queue(purchase_rows, resolution_rows):
|
||||||
|
by_observed = defaultdict(list)
|
||||||
|
resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
|
||||||
|
|
||||||
|
for row in purchase_rows:
|
||||||
|
observed_product_id = row.get("observed_product_id", "")
|
||||||
|
if not observed_product_id:
|
||||||
|
continue
|
||||||
|
by_observed[observed_product_id].append(row)
|
||||||
|
|
||||||
|
today_text = str(date.today())
|
||||||
|
queue_rows = []
|
||||||
|
for observed_product_id, rows in sorted(by_observed.items()):
|
||||||
|
current_resolution = resolution_lookup.get(observed_product_id, {})
|
||||||
|
if current_resolution.get("status") == "approved":
|
||||||
|
continue
|
||||||
|
unresolved_rows = [row for row in rows if not row.get("canonical_product_id")]
|
||||||
|
if not unresolved_rows:
|
||||||
|
continue
|
||||||
|
|
||||||
|
retailers = sorted({row["retailer"] for row in rows})
|
||||||
|
review_id = stable_id("rvw", observed_product_id)
|
||||||
|
queue_rows.append(
|
||||||
|
{
|
||||||
|
"review_id": review_id,
|
||||||
|
"retailer": " | ".join(retailers),
|
||||||
|
"observed_product_id": observed_product_id,
|
||||||
|
"canonical_product_id": current_resolution.get("canonical_product_id", ""),
|
||||||
|
"reason_code": "missing_canonical_link",
|
||||||
|
"priority": "high",
|
||||||
|
"raw_item_names": compact_join(
|
||||||
|
sorted({row["raw_item_name"] for row in rows if row["raw_item_name"]}),
|
||||||
|
limit=8,
|
||||||
|
),
|
||||||
|
"normalized_names": compact_join(
|
||||||
|
sorted(
|
||||||
|
{
|
||||||
|
row["normalized_item_name"]
|
||||||
|
for row in rows
|
||||||
|
if row["normalized_item_name"]
|
||||||
|
}
|
||||||
|
),
|
||||||
|
limit=8,
|
||||||
|
),
|
||||||
|
"upc_values": compact_join(
|
||||||
|
sorted({row["upc"] for row in rows if row["upc"]}),
|
||||||
|
limit=8,
|
||||||
|
),
|
||||||
|
"example_prices": compact_join(
|
||||||
|
sorted({row["line_total"] for row in rows if row["line_total"]}),
|
||||||
|
limit=8,
|
||||||
|
),
|
||||||
|
"seen_count": str(len(rows)),
|
||||||
|
"status": current_resolution.get("status", "pending"),
|
||||||
|
"resolution_action": current_resolution.get("resolution_action", ""),
|
||||||
|
"resolution_notes": current_resolution.get("resolution_notes", ""),
|
||||||
|
"created_at": current_resolution.get("reviewed_at", today_text),
|
||||||
|
"updated_at": today_text,
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return queue_rows
|
||||||
|
|
||||||
|
|
||||||
|
def save_resolution_rows(path, rows):
|
||||||
|
write_csv_rows(path, rows, build_purchases.RESOLUTION_FIELDS)
|
||||||
|
|
||||||
|
|
||||||
|
def save_catalog_rows(path, rows):
|
||||||
|
write_csv_rows(path, rows, build_purchases.CATALOG_FIELDS)
|
||||||
|
|
||||||
|
|
||||||
|
def prompt_resolution(queue_row, catalog_rows):
|
||||||
|
click.echo("")
|
||||||
|
click.echo(f"observed_product_id: {queue_row['observed_product_id']}")
|
||||||
|
click.echo(f"retailer: {queue_row['retailer']}")
|
||||||
|
click.echo(f"raw names: {queue_row['raw_item_names']}")
|
||||||
|
click.echo(f"normalized names: {queue_row['normalized_names']}")
|
||||||
|
click.echo(f"upcs: {queue_row['upc_values']}")
|
||||||
|
click.echo(f"example prices: {queue_row['example_prices']}")
|
||||||
|
click.echo(f"seen count: {queue_row['seen_count']}")
|
||||||
|
click.echo("actions: [l]ink existing [n]ew canonical [x]exclude [s]kip [q]uit")
|
||||||
|
action = click.prompt("action", type=click.Choice(["l", "n", "x", "s", "q"]))
|
||||||
|
if action == "q":
|
||||||
|
return None, None
|
||||||
|
if action == "s":
|
||||||
|
return {
|
||||||
|
"observed_product_id": queue_row["observed_product_id"],
|
||||||
|
"canonical_product_id": "",
|
||||||
|
"resolution_action": "skip",
|
||||||
|
"status": "pending",
|
||||||
|
"resolution_notes": queue_row.get("resolution_notes", ""),
|
||||||
|
"reviewed_at": str(date.today()),
|
||||||
|
}, None
|
||||||
|
if action == "x":
|
||||||
|
notes = click.prompt("exclude notes", default="", show_default=False)
|
||||||
|
return {
|
||||||
|
"observed_product_id": queue_row["observed_product_id"],
|
||||||
|
"canonical_product_id": "",
|
||||||
|
"resolution_action": "exclude",
|
||||||
|
"status": "approved",
|
||||||
|
"resolution_notes": notes,
|
||||||
|
"reviewed_at": str(date.today()),
|
||||||
|
}, None
|
||||||
|
if action == "l":
|
||||||
|
click.echo("existing canonicals:")
|
||||||
|
for row in catalog_rows[:10]:
|
||||||
|
click.echo(f" {row['canonical_product_id']} {row['canonical_name']}")
|
||||||
|
canonical_product_id = click.prompt("canonical product id", type=str)
|
||||||
|
notes = click.prompt("link notes", default="", show_default=False)
|
||||||
|
return {
|
||||||
|
"observed_product_id": queue_row["observed_product_id"],
|
||||||
|
"canonical_product_id": canonical_product_id,
|
||||||
|
"resolution_action": "link",
|
||||||
|
"status": "approved",
|
||||||
|
"resolution_notes": notes,
|
||||||
|
"reviewed_at": str(date.today()),
|
||||||
|
}, None
|
||||||
|
|
||||||
|
canonical_name = click.prompt("canonical name", type=str)
|
||||||
|
category = click.prompt("category", default="", show_default=False)
|
||||||
|
product_type = click.prompt("product type", default="", show_default=False)
|
||||||
|
notes = click.prompt("notes", default="", show_default=False)
|
||||||
|
canonical_product_id = stable_id("gcan", f"manual|{canonical_name}|{category}|{product_type}")
|
||||||
|
canonical_row = {
|
||||||
|
"canonical_product_id": canonical_product_id,
|
||||||
|
"canonical_name": canonical_name,
|
||||||
|
"category": category,
|
||||||
|
"product_type": product_type,
|
||||||
|
"brand": "",
|
||||||
|
"variant": "",
|
||||||
|
"size_value": "",
|
||||||
|
"size_unit": "",
|
||||||
|
"pack_qty": "",
|
||||||
|
"measure_type": "",
|
||||||
|
"notes": notes,
|
||||||
|
"created_at": str(date.today()),
|
||||||
|
"updated_at": str(date.today()),
|
||||||
|
}
|
||||||
|
resolution_row = {
|
||||||
|
"observed_product_id": queue_row["observed_product_id"],
|
||||||
|
"canonical_product_id": canonical_product_id,
|
||||||
|
"resolution_action": "create",
|
||||||
|
"status": "approved",
|
||||||
|
"resolution_notes": notes,
|
||||||
|
"reviewed_at": str(date.today()),
|
||||||
|
}
|
||||||
|
return resolution_row, canonical_row
|
||||||
|
|
||||||
|
|
||||||
|
@click.command()
|
||||||
|
@click.option("--purchases-csv", default="combined_output/purchases.csv", show_default=True)
|
||||||
|
@click.option("--queue-csv", default="combined_output/review_queue.csv", show_default=True)
|
||||||
|
@click.option("--resolutions-csv", default="combined_output/review_resolutions.csv", show_default=True)
|
||||||
|
@click.option("--catalog-csv", default="combined_output/canonical_catalog.csv", show_default=True)
|
||||||
|
@click.option("--limit", default=0, show_default=True, type=int)
|
||||||
|
@click.option("--refresh-only", is_flag=True, help="Only rebuild review_queue.csv without prompting.")
|
||||||
|
def main(purchases_csv, queue_csv, resolutions_csv, catalog_csv, limit, refresh_only):
|
||||||
|
purchase_rows = build_purchases.read_optional_csv_rows(purchases_csv)
|
||||||
|
resolution_rows = build_purchases.read_optional_csv_rows(resolutions_csv)
|
||||||
|
catalog_rows = build_purchases.read_optional_csv_rows(catalog_csv)
|
||||||
|
queue_rows = build_review_queue(purchase_rows, resolution_rows)
|
||||||
|
write_csv_rows(queue_csv, queue_rows, QUEUE_FIELDS)
|
||||||
|
click.echo(f"wrote {len(queue_rows)} rows to {queue_csv}")
|
||||||
|
|
||||||
|
if refresh_only:
|
||||||
|
return
|
||||||
|
|
||||||
|
resolution_lookup = build_purchases.load_resolution_lookup(resolution_rows)
|
||||||
|
catalog_by_id = {row["canonical_product_id"]: row for row in catalog_rows if row.get("canonical_product_id")}
|
||||||
|
reviewed = 0
|
||||||
|
for queue_row in queue_rows:
|
||||||
|
if limit and reviewed >= limit:
|
||||||
|
break
|
||||||
|
result = prompt_resolution(queue_row, catalog_rows)
|
||||||
|
if result == (None, None):
|
||||||
|
break
|
||||||
|
resolution_row, canonical_row = result
|
||||||
|
resolution_lookup[resolution_row["observed_product_id"]] = resolution_row
|
||||||
|
if canonical_row and canonical_row["canonical_product_id"] not in catalog_by_id:
|
||||||
|
catalog_by_id[canonical_row["canonical_product_id"]] = canonical_row
|
||||||
|
catalog_rows.append(canonical_row)
|
||||||
|
reviewed += 1
|
||||||
|
|
||||||
|
save_resolution_rows(resolutions_csv, sorted(resolution_lookup.values(), key=lambda row: row["observed_product_id"]))
|
||||||
|
save_catalog_rows(catalog_csv, sorted(catalog_by_id.values(), key=lambda row: row["canonical_product_id"]))
|
||||||
|
click.echo(
|
||||||
|
f"saved {len(resolution_lookup)} resolution rows to {resolutions_csv} "
|
||||||
|
f"and {len(catalog_by_id)} catalog rows to {catalog_csv}"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
main()
|
||||||
@@ -99,11 +99,12 @@ class PurchaseLogTests(unittest.TestCase):
|
|||||||
}
|
}
|
||||||
]
|
]
|
||||||
|
|
||||||
rows = build_purchases.build_purchase_rows(
|
rows, _observed, _canon, _links = build_purchases.build_purchase_rows(
|
||||||
[giant_row],
|
[giant_row],
|
||||||
[costco_row],
|
[costco_row],
|
||||||
giant_orders,
|
giant_orders,
|
||||||
costco_orders,
|
costco_orders,
|
||||||
|
[],
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(2, len(rows))
|
self.assertEqual(2, len(rows))
|
||||||
@@ -195,6 +196,9 @@ class PurchaseLogTests(unittest.TestCase):
|
|||||||
costco_items_enriched_csv=str(costco_items),
|
costco_items_enriched_csv=str(costco_items),
|
||||||
giant_orders_csv=str(giant_orders),
|
giant_orders_csv=str(giant_orders),
|
||||||
costco_orders_csv=str(costco_orders),
|
costco_orders_csv=str(costco_orders),
|
||||||
|
resolutions_csv=str(Path(tmpdir) / "review_resolutions.csv"),
|
||||||
|
catalog_csv=str(Path(tmpdir) / "canonical_catalog.csv"),
|
||||||
|
links_csv=str(Path(tmpdir) / "product_links.csv"),
|
||||||
output_csv=str(purchases_csv),
|
output_csv=str(purchases_csv),
|
||||||
examples_csv=str(examples_csv),
|
examples_csv=str(examples_csv),
|
||||||
)
|
)
|
||||||
@@ -208,6 +212,56 @@ class PurchaseLogTests(unittest.TestCase):
|
|||||||
self.assertEqual(2, len(purchase_rows))
|
self.assertEqual(2, len(purchase_rows))
|
||||||
self.assertEqual(1, len(example_rows))
|
self.assertEqual(1, len(example_rows))
|
||||||
|
|
||||||
|
def test_build_purchase_rows_applies_manual_resolution(self):
|
||||||
|
fieldnames = enrich_costco.OUTPUT_FIELDS
|
||||||
|
giant_row = {field: "" for field in fieldnames}
|
||||||
|
giant_row.update(
|
||||||
|
{
|
||||||
|
"retailer": "giant",
|
||||||
|
"order_id": "g1",
|
||||||
|
"line_no": "1",
|
||||||
|
"observed_item_key": "giant:g1:1",
|
||||||
|
"order_date": "2026-03-01",
|
||||||
|
"item_name": "SB BAGGED ICE 20LB",
|
||||||
|
"item_name_norm": "BAGGED ICE",
|
||||||
|
"retailer_item_id": "100",
|
||||||
|
"upc": "",
|
||||||
|
"qty": "1",
|
||||||
|
"unit": "EA",
|
||||||
|
"line_total": "3.50",
|
||||||
|
"unit_price": "3.50",
|
||||||
|
"measure_type": "each",
|
||||||
|
"raw_order_path": "giant_output/raw/g1.json",
|
||||||
|
"is_discount_line": "false",
|
||||||
|
"is_coupon_line": "false",
|
||||||
|
"is_fee": "false",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
observed_rows, _canonical_rows, _link_rows, _observed_id_by_key, _canonical_by_observed = (
|
||||||
|
build_purchases.build_link_state([giant_row])
|
||||||
|
)
|
||||||
|
observed_product_id = observed_rows[0]["observed_product_id"]
|
||||||
|
rows, _observed, _canon, _links = build_purchases.build_purchase_rows(
|
||||||
|
[giant_row],
|
||||||
|
[],
|
||||||
|
[{"order_id": "g1", "store_name": "Giant", "store_number": "42", "store_city": "Springfield", "store_state": "VA"}],
|
||||||
|
[],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"observed_product_id": observed_product_id,
|
||||||
|
"canonical_product_id": "gcan_manual_ice",
|
||||||
|
"resolution_action": "create",
|
||||||
|
"status": "approved",
|
||||||
|
"resolution_notes": "manual ice merge",
|
||||||
|
"reviewed_at": "2026-03-16",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual("gcan_manual_ice", rows[0]["canonical_product_id"])
|
||||||
|
self.assertEqual("approved", rows[0]["review_status"])
|
||||||
|
self.assertEqual("create", rows[0]["resolution_action"])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
100
tests/test_review_workflow.py
Normal file
100
tests/test_review_workflow.py
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
import csv
|
||||||
|
import tempfile
|
||||||
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
from unittest import mock
|
||||||
|
|
||||||
|
import review_products
|
||||||
|
|
||||||
|
|
||||||
|
class ReviewWorkflowTests(unittest.TestCase):
|
||||||
|
def test_build_review_queue_groups_unresolved_purchases(self):
|
||||||
|
queue_rows = review_products.build_review_queue(
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"observed_product_id": "gobs_1",
|
||||||
|
"canonical_product_id": "",
|
||||||
|
"retailer": "giant",
|
||||||
|
"raw_item_name": "SB BAGGED ICE 20LB",
|
||||||
|
"normalized_item_name": "BAGGED ICE",
|
||||||
|
"upc": "",
|
||||||
|
"line_total": "3.50",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"observed_product_id": "gobs_1",
|
||||||
|
"canonical_product_id": "",
|
||||||
|
"retailer": "giant",
|
||||||
|
"raw_item_name": "SB BAG ICE CUBED 10LB",
|
||||||
|
"normalized_item_name": "BAG ICE",
|
||||||
|
"upc": "",
|
||||||
|
"line_total": "2.50",
|
||||||
|
},
|
||||||
|
],
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertEqual(1, len(queue_rows))
|
||||||
|
self.assertEqual("gobs_1", queue_rows[0]["observed_product_id"])
|
||||||
|
self.assertIn("SB BAGGED ICE 20LB", queue_rows[0]["raw_item_names"])
|
||||||
|
|
||||||
|
def test_review_products_creates_canonical_and_resolution(self):
|
||||||
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
|
purchases_csv = Path(tmpdir) / "purchases.csv"
|
||||||
|
queue_csv = Path(tmpdir) / "review_queue.csv"
|
||||||
|
resolutions_csv = Path(tmpdir) / "review_resolutions.csv"
|
||||||
|
catalog_csv = Path(tmpdir) / "canonical_catalog.csv"
|
||||||
|
|
||||||
|
with purchases_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||||
|
writer = csv.DictWriter(
|
||||||
|
handle,
|
||||||
|
fieldnames=[
|
||||||
|
"observed_product_id",
|
||||||
|
"canonical_product_id",
|
||||||
|
"retailer",
|
||||||
|
"raw_item_name",
|
||||||
|
"normalized_item_name",
|
||||||
|
"upc",
|
||||||
|
"line_total",
|
||||||
|
],
|
||||||
|
)
|
||||||
|
writer.writeheader()
|
||||||
|
writer.writerow(
|
||||||
|
{
|
||||||
|
"observed_product_id": "gobs_ice",
|
||||||
|
"canonical_product_id": "",
|
||||||
|
"retailer": "giant",
|
||||||
|
"raw_item_name": "SB BAGGED ICE 20LB",
|
||||||
|
"normalized_item_name": "BAGGED ICE",
|
||||||
|
"upc": "",
|
||||||
|
"line_total": "3.50",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
with mock.patch.object(
|
||||||
|
review_products.click,
|
||||||
|
"prompt",
|
||||||
|
side_effect=["n", "ICE", "frozen", "ice", "manual merge", "q"],
|
||||||
|
):
|
||||||
|
review_products.main.callback(
|
||||||
|
purchases_csv=str(purchases_csv),
|
||||||
|
queue_csv=str(queue_csv),
|
||||||
|
resolutions_csv=str(resolutions_csv),
|
||||||
|
catalog_csv=str(catalog_csv),
|
||||||
|
limit=1,
|
||||||
|
refresh_only=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
self.assertTrue(queue_csv.exists())
|
||||||
|
self.assertTrue(resolutions_csv.exists())
|
||||||
|
self.assertTrue(catalog_csv.exists())
|
||||||
|
with resolutions_csv.open(newline="", encoding="utf-8") as handle:
|
||||||
|
resolution_rows = list(csv.DictReader(handle))
|
||||||
|
with catalog_csv.open(newline="", encoding="utf-8") as handle:
|
||||||
|
catalog_rows = list(csv.DictReader(handle))
|
||||||
|
self.assertEqual("create", resolution_rows[0]["resolution_action"])
|
||||||
|
self.assertEqual("approved", resolution_rows[0]["status"])
|
||||||
|
self.assertEqual("ICE", catalog_rows[0]["canonical_name"])
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
unittest.main()
|
||||||
Reference in New Issue
Block a user