Compare commits

...

3 Commits

Author SHA1 Message Date
ben
e494386e64 build_purchases rev1 2026-03-17 12:21:44 -04:00
ben
7527fe37eb added git notes 2026-03-17 12:21:24 -04:00
ben
a1fafa3885 added t1.12 scope to simplify review process 2026-03-17 12:20:48 -04:00
3 changed files with 30 additions and 52 deletions

View File

@@ -7,11 +7,7 @@ import build_canonical_layer
import build_observed_products
import validate_cross_retailer_flow
from enrich_giant import format_decimal, to_decimal
<<<<<<< HEAD
from layer_helpers import read_csv_rows, stable_id, write_csv_rows
=======
from layer_helpers import read_csv_rows, write_csv_rows
>>>>>>> be1bf63 (Build pivot-ready purchase log)
PURCHASE_FIELDS = [
@@ -22,11 +18,8 @@ PURCHASE_FIELDS = [
"observed_item_key",
"observed_product_id",
"canonical_product_id",
<<<<<<< HEAD
"review_status",
"resolution_action",
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
"raw_item_name",
"normalized_item_name",
"retailer_item_id",
@@ -69,7 +62,6 @@ EXAMPLE_FIELDS = [
"notes",
]
<<<<<<< HEAD
CATALOG_FIELDS = [
"canonical_product_id",
"canonical_name",
@@ -95,8 +87,6 @@ RESOLUTION_FIELDS = [
"reviewed_at",
]
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
def decimal_or_zero(value):
return to_decimal(value) or Decimal("0")
@@ -175,7 +165,6 @@ def order_lookup(rows, retailer):
}
<<<<<<< HEAD
def read_optional_csv_rows(path):
path = Path(path)
if not path.exists():
@@ -220,9 +209,6 @@ def catalog_row_from_canonical(row):
def build_link_state(enriched_rows):
=======
def build_link_lookup(enriched_rows):
>>>>>>> be1bf63 (Build pivot-ready purchase log)
observed_rows = build_observed_products.build_observed_products(enriched_rows)
canonical_rows, link_rows = build_canonical_layer.build_canonical_layer(observed_rows)
giant_row, costco_row = validate_cross_retailer_flow.find_proof_pair(observed_rows)
@@ -239,7 +225,6 @@ def build_link_lookup(enriched_rows):
canonical_id_by_observed = {
row["observed_product_id"]: row["canonical_product_id"] for row in link_rows
}
<<<<<<< HEAD
return observed_rows, canonical_rows, link_rows, observed_id_by_key, canonical_id_by_observed
@@ -268,14 +253,6 @@ def build_purchase_rows(
canonical_id_by_observed[observed_product_id] = resolution["canonical_product_id"]
elif action == "exclude":
canonical_id_by_observed[observed_product_id] = ""
=======
return observed_id_by_key, canonical_id_by_observed
def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders, costco_orders):
all_enriched_rows = giant_enriched_rows + costco_enriched_rows
observed_id_by_key, canonical_id_by_observed = build_link_lookup(all_enriched_rows)
>>>>>>> be1bf63 (Build pivot-ready purchase log)
orders_by_id = {}
orders_by_id.update(order_lookup(giant_orders, "giant"))
orders_by_id.update(order_lookup(costco_orders, "costco"))
@@ -289,10 +266,7 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
observed_product_id = observed_id_by_key.get(observed_key, "")
order_row = orders_by_id.get((row["retailer"], row["order_id"]), {})
metrics = derive_metrics(row)
<<<<<<< HEAD
resolution = resolution_lookup.get(observed_product_id, {})
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
purchase_rows.append(
{
"purchase_date": row["order_date"],
@@ -302,11 +276,8 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
"observed_item_key": row["observed_item_key"],
"observed_product_id": observed_product_id,
"canonical_product_id": canonical_id_by_observed.get(observed_product_id, ""),
<<<<<<< HEAD
"review_status": resolution.get("status", ""),
"resolution_action": resolution.get("resolution_action", ""),
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
"raw_item_name": row["item_name"],
"normalized_item_name": row["item_name_norm"],
"retailer_item_id": row["retailer_item_id"],
@@ -330,7 +301,6 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
**metrics,
}
)
<<<<<<< HEAD
return purchase_rows, observed_rows, canonical_rows, link_rows
@@ -358,9 +328,6 @@ def apply_manual_resolutions_to_links(link_rows, resolution_rows):
"link_notes": resolution.get("resolution_notes", ""),
}
return sorted(link_by_observed.values(), key=lambda row: row["observed_product_id"])
=======
return purchase_rows
>>>>>>> be1bf63 (Build pivot-ready purchase log)
def build_comparison_examples(purchase_rows):
@@ -399,12 +366,9 @@ def build_comparison_examples(purchase_rows):
@click.option("--costco-items-enriched-csv", default="costco_output/items_enriched.csv", show_default=True)
@click.option("--giant-orders-csv", default="giant_output/orders.csv", show_default=True)
@click.option("--costco-orders-csv", default="costco_output/orders.csv", show_default=True)
<<<<<<< HEAD
@click.option("--resolutions-csv", default="combined_output/review_resolutions.csv", show_default=True)
@click.option("--catalog-csv", default="combined_output/canonical_catalog.csv", show_default=True)
@click.option("--links-csv", default="combined_output/product_links.csv", show_default=True)
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
@click.option("--output-csv", default="combined_output/purchases.csv", show_default=True)
@click.option("--examples-csv", default="combined_output/comparison_examples.csv", show_default=True)
def main(
@@ -412,7 +376,6 @@ def main(
costco_items_enriched_csv,
giant_orders_csv,
costco_orders_csv,
<<<<<<< HEAD
resolutions_csv,
catalog_csv,
links_csv,
@@ -421,17 +384,10 @@ def main(
):
resolution_rows = read_optional_csv_rows(resolutions_csv)
purchase_rows, _observed_rows, canonical_rows, link_rows = build_purchase_rows(
=======
output_csv,
examples_csv,
):
purchase_rows = build_purchase_rows(
>>>>>>> be1bf63 (Build pivot-ready purchase log)
read_csv_rows(giant_items_enriched_csv),
read_csv_rows(costco_items_enriched_csv),
read_csv_rows(giant_orders_csv),
read_csv_rows(costco_orders_csv),
<<<<<<< HEAD
resolution_rows,
)
existing_catalog_rows = read_optional_csv_rows(catalog_csv)
@@ -448,14 +404,6 @@ def main(
click.echo(
f"wrote {len(purchase_rows)} purchase rows to {output_csv}, "
f"{len(merged_catalog_rows)} catalog rows to {catalog_csv}, "
=======
)
example_rows = build_comparison_examples(purchase_rows)
write_csv_rows(output_csv, purchase_rows, PURCHASE_FIELDS)
write_csv_rows(examples_csv, example_rows, EXAMPLE_FIELDS)
click.echo(
f"wrote {len(purchase_rows)} purchase rows to {output_csv} "
>>>>>>> be1bf63 (Build pivot-ready purchase log)
f"and {len(example_rows)} comparison examples to {examples_csv}"
)

View File

@@ -65,6 +65,12 @@ A A : cherry pick commit to current branch
the final commit was not shown by hash, just the branch cx
since (local) cx was caught up with that branch
** reverting a branch
b l : switch to local branch (cx)
l l : open local reflog
put point on the commit; highlighted remote gitea/cx
X : reset branch; prompts you, selected cx
* giant requests
** item:
get:

View File

@@ -367,6 +367,30 @@
- commit: `c7dad54` on branch `cx`
- tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python build_purchases.py`; `./venv/bin/python review_products.py --refresh-only`; verified `combined_output/review_queue.csv`, `combined_output/review_resolutions.csv` workflow, and `combined_output/canonical_catalog.csv`
- date: 2026-03-16
* [ ] t1.12: simplify review process display
Clearly show current state separate from proposed future state.
** acceptance criteria
1. Display position in review queue, e.g., (1/22)
2. Add short help text header to the review item explaining that the action resolves the current observed product group
3. color-code outputs based on info, prompt/menu, warning
1. color action menu/requests for input differently from display text; do not color individual options separately
4. update action menu `[x]exclude` to `e[x]clude`
5. on each review item, display a list of all matched items to be linked, sorted by descending date:
1. YYYY-mm-dd, price, raw item name, normalized item name, upc, retailer
2. image URL, if exists
6. on each review item, suggest (but do not auto-apply) up to 3 likely existing canonicals using determinstic rules, e.g:
1. exact normalized name match
2. prefix/contains match on canonical name
3. exact UPC
- reinforce project terminology such as raw_name, observed_name, canonical_name
** evidence
- commit:
- tests:
- date:
** notes
* [ ] t1.10: add optional llm-assisted suggestion workflow for unresolved products (2-4 commits)
** acceptance criteria