build_purchases rev1

added git notes
added t1.12 scope to simplify review process
2026-03-17 12:21:44 -04:00 · 2026-03-17 12:21:24 -04:00 · 2026-03-17 12:20:48 -04:00
3 changed files with 30 additions and 52 deletions
--- a/build_purchases.py
+++ b/build_purchases.py
@@ -7,11 +7,7 @@ import build_canonical_layer
 import build_observed_products
 import validate_cross_retailer_flow
 from enrich_giant import format_decimal, to_decimal
-<<<<<<< HEAD
 from layer_helpers import read_csv_rows, stable_id, write_csv_rows
-=======
-from layer_helpers import read_csv_rows, write_csv_rows
->>>>>>> be1bf63 (Build pivot-ready purchase log)


 PURCHASE_FIELDS = [
@@ -22,11 +18,8 @@ PURCHASE_FIELDS = [
    "observed_item_key",
    "observed_product_id",
    "canonical_product_id",
-<<<<<<< HEAD
    "review_status",
    "resolution_action",
-=======
->>>>>>> be1bf63 (Build pivot-ready purchase log)
    "raw_item_name",
    "normalized_item_name",
    "retailer_item_id",
@@ -69,7 +62,6 @@ EXAMPLE_FIELDS = [
    "notes",
 ]

-<<<<<<< HEAD
 CATALOG_FIELDS = [
    "canonical_product_id",
    "canonical_name",
@@ -95,8 +87,6 @@ RESOLUTION_FIELDS = [
    "reviewed_at",
 ]

-=======
->>>>>>> be1bf63 (Build pivot-ready purchase log)

 def decimal_or_zero(value):
    return to_decimal(value) or Decimal("0")
@@ -175,7 +165,6 @@ def order_lookup(rows, retailer):
    }


-<<<<<<< HEAD
 def read_optional_csv_rows(path):
    path = Path(path)
    if not path.exists():
@@ -220,9 +209,6 @@ def catalog_row_from_canonical(row):


 def build_link_state(enriched_rows):
-=======
-def build_link_lookup(enriched_rows):
->>>>>>> be1bf63 (Build pivot-ready purchase log)
    observed_rows = build_observed_products.build_observed_products(enriched_rows)
    canonical_rows, link_rows = build_canonical_layer.build_canonical_layer(observed_rows)
    giant_row, costco_row = validate_cross_retailer_flow.find_proof_pair(observed_rows)
@@ -239,7 +225,6 @@ def build_link_lookup(enriched_rows):
    canonical_id_by_observed = {
        row["observed_product_id"]: row["canonical_product_id"] for row in link_rows
    }
-<<<<<<< HEAD
    return observed_rows, canonical_rows, link_rows, observed_id_by_key, canonical_id_by_observed


@@ -268,14 +253,6 @@ def build_purchase_rows(
            canonical_id_by_observed[observed_product_id] = resolution["canonical_product_id"]
        elif action == "exclude":
            canonical_id_by_observed[observed_product_id] = ""
-=======
-    return observed_id_by_key, canonical_id_by_observed
-
-
-def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders, costco_orders):
-    all_enriched_rows = giant_enriched_rows + costco_enriched_rows
-    observed_id_by_key, canonical_id_by_observed = build_link_lookup(all_enriched_rows)
->>>>>>> be1bf63 (Build pivot-ready purchase log)
    orders_by_id = {}
    orders_by_id.update(order_lookup(giant_orders, "giant"))
    orders_by_id.update(order_lookup(costco_orders, "costco"))
@@ -289,10 +266,7 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
        observed_product_id = observed_id_by_key.get(observed_key, "")
        order_row = orders_by_id.get((row["retailer"], row["order_id"]), {})
        metrics = derive_metrics(row)
-<<<<<<< HEAD
        resolution = resolution_lookup.get(observed_product_id, {})
-=======
->>>>>>> be1bf63 (Build pivot-ready purchase log)
        purchase_rows.append(
            {
                "purchase_date": row["order_date"],
@@ -302,11 +276,8 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
                "observed_item_key": row["observed_item_key"],
                "observed_product_id": observed_product_id,
                "canonical_product_id": canonical_id_by_observed.get(observed_product_id, ""),
-<<<<<<< HEAD
                "review_status": resolution.get("status", ""),
                "resolution_action": resolution.get("resolution_action", ""),
-=======
->>>>>>> be1bf63 (Build pivot-ready purchase log)
                "raw_item_name": row["item_name"],
                "normalized_item_name": row["item_name_norm"],
                "retailer_item_id": row["retailer_item_id"],
@@ -330,7 +301,6 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
                **metrics,
            }
        )
-<<<<<<< HEAD
    return purchase_rows, observed_rows, canonical_rows, link_rows


@@ -358,9 +328,6 @@ def apply_manual_resolutions_to_links(link_rows, resolution_rows):
                "link_notes": resolution.get("resolution_notes", ""),
            }
    return sorted(link_by_observed.values(), key=lambda row: row["observed_product_id"])
-=======
-    return purchase_rows
->>>>>>> be1bf63 (Build pivot-ready purchase log)


 def build_comparison_examples(purchase_rows):
@@ -399,12 +366,9 @@ def build_comparison_examples(purchase_rows):
@click.option("--costco-items-enriched-csv", default="costco_output/items_enriched.csv", show_default=True)
@click.option("--giant-orders-csv", default="giant_output/orders.csv", show_default=True)
@click.option("--costco-orders-csv", default="costco_output/orders.csv", show_default=True)
-<<<<<<< HEAD
@click.option("--resolutions-csv", default="combined_output/review_resolutions.csv", show_default=True)
@click.option("--catalog-csv", default="combined_output/canonical_catalog.csv", show_default=True)
@click.option("--links-csv", default="combined_output/product_links.csv", show_default=True)
-=======
->>>>>>> be1bf63 (Build pivot-ready purchase log)
@click.option("--output-csv", default="combined_output/purchases.csv", show_default=True)
@click.option("--examples-csv", default="combined_output/comparison_examples.csv", show_default=True)
 def main(
@@ -412,7 +376,6 @@ def main(
    costco_items_enriched_csv,
    giant_orders_csv,
    costco_orders_csv,
-<<<<<<< HEAD
    resolutions_csv,
    catalog_csv,
    links_csv,
@@ -421,17 +384,10 @@ def main(
 ):
    resolution_rows = read_optional_csv_rows(resolutions_csv)
    purchase_rows, _observed_rows, canonical_rows, link_rows = build_purchase_rows(
-=======
-    output_csv,
-    examples_csv,
-):
-    purchase_rows = build_purchase_rows(
->>>>>>> be1bf63 (Build pivot-ready purchase log)
        read_csv_rows(giant_items_enriched_csv),
        read_csv_rows(costco_items_enriched_csv),
        read_csv_rows(giant_orders_csv),
        read_csv_rows(costco_orders_csv),
-<<<<<<< HEAD
        resolution_rows,
    )
    existing_catalog_rows = read_optional_csv_rows(catalog_csv)
@@ -448,14 +404,6 @@ def main(
    click.echo(
        f"wrote {len(purchase_rows)} purchase rows to {output_csv}, "
        f"{len(merged_catalog_rows)} catalog rows to {catalog_csv}, "
-=======
-    )
-    example_rows = build_comparison_examples(purchase_rows)
-    write_csv_rows(output_csv, purchase_rows, PURCHASE_FIELDS)
-    write_csv_rows(examples_csv, example_rows, EXAMPLE_FIELDS)
-    click.echo(
-        f"wrote {len(purchase_rows)} purchase rows to {output_csv} "
->>>>>>> be1bf63 (Build pivot-ready purchase log)
        f"and {len(example_rows)} comparison examples to {examples_csv}"
    )

--- a/pm/scrape-giant.org
+++ b/pm/scrape-giant.org
@@ -65,6 +65,12 @@ A A : cherry pick commit to current branch
      the final commit was not shown by hash, just the branch cx
       since (local) cx was caught up with that branch

+** reverting a branch
+b l : switch to local branch (cx)
+l l : open local reflog
+put point on the commit; highlighted remote gitea/cx
+X   : reset branch; prompts you, selected cx
+       
 * giant requests
 ** item:
 get:
--- a/pm/tasks.org
+++ b/pm/tasks.org
@@ -367,6 +367,30 @@
 - commit: `c7dad54` on branch `cx`
 - tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python build_purchases.py`; `./venv/bin/python review_products.py --refresh-only`; verified `combined_output/review_queue.csv`, `combined_output/review_resolutions.csv` workflow, and `combined_output/canonical_catalog.csv`
 - date: 2026-03-16
+* [ ] t1.12: simplify review process display
+Clearly show current state separate from proposed future state.
+** acceptance criteria
+1. Display position in review queue, e.g., (1/22)
+2. Add short help text header to the review item explaining that the action resolves the current observed product group
+3. color-code outputs based on info, prompt/menu, warning
+   1. color action menu/requests for input differently from display text; do not color individual options separately
+4. update action menu `[x]exclude` to `e[x]clude`
+5. on each review item, display a list of all matched items to be linked, sorted by descending date:
+   1. YYYY-mm-dd, price, raw item name, normalized item name, upc, retailer
+   2. image URL, if exists
+6. on each review item, suggest (but do not auto-apply) up to 3 likely existing canonicals using determinstic rules, e.g:
+   1. exact normalized name match
+   2. prefix/contains match on canonical name
+   3. exact UPC
+- reinforce project terminology such as raw_name, observed_name, canonical_name
+  
+** evidence
+- commit:
+- tests:
+- date:
+
+** notes
+
 * [ ] t1.10: add optional llm-assisted suggestion workflow for unresolved products (2-4 commits)

 ** acceptance criteria
Author	SHA1	Message	Date
ben	e494386e64	build_purchases rev1	2026-03-17 12:21:44 -04:00
ben	7527fe37eb	added git notes	2026-03-17 12:21:24 -04:00
ben	a1fafa3885	added t1.12 scope to simplify review process	2026-03-17 12:20:48 -04:00