Compare commits

..

3 Commits

Author SHA1 Message Date
ben
e494386e64 build_purchases rev1 2026-03-17 12:21:44 -04:00
ben
7527fe37eb added git notes 2026-03-17 12:21:24 -04:00
ben
a1fafa3885 added t1.12 scope to simplify review process 2026-03-17 12:20:48 -04:00
3 changed files with 30 additions and 52 deletions

View File

@@ -7,11 +7,7 @@ import build_canonical_layer
import build_observed_products import build_observed_products
import validate_cross_retailer_flow import validate_cross_retailer_flow
from enrich_giant import format_decimal, to_decimal from enrich_giant import format_decimal, to_decimal
<<<<<<< HEAD
from layer_helpers import read_csv_rows, stable_id, write_csv_rows from layer_helpers import read_csv_rows, stable_id, write_csv_rows
=======
from layer_helpers import read_csv_rows, write_csv_rows
>>>>>>> be1bf63 (Build pivot-ready purchase log)
PURCHASE_FIELDS = [ PURCHASE_FIELDS = [
@@ -22,11 +18,8 @@ PURCHASE_FIELDS = [
"observed_item_key", "observed_item_key",
"observed_product_id", "observed_product_id",
"canonical_product_id", "canonical_product_id",
<<<<<<< HEAD
"review_status", "review_status",
"resolution_action", "resolution_action",
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
"raw_item_name", "raw_item_name",
"normalized_item_name", "normalized_item_name",
"retailer_item_id", "retailer_item_id",
@@ -69,7 +62,6 @@ EXAMPLE_FIELDS = [
"notes", "notes",
] ]
<<<<<<< HEAD
CATALOG_FIELDS = [ CATALOG_FIELDS = [
"canonical_product_id", "canonical_product_id",
"canonical_name", "canonical_name",
@@ -95,8 +87,6 @@ RESOLUTION_FIELDS = [
"reviewed_at", "reviewed_at",
] ]
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
def decimal_or_zero(value): def decimal_or_zero(value):
return to_decimal(value) or Decimal("0") return to_decimal(value) or Decimal("0")
@@ -175,7 +165,6 @@ def order_lookup(rows, retailer):
} }
<<<<<<< HEAD
def read_optional_csv_rows(path): def read_optional_csv_rows(path):
path = Path(path) path = Path(path)
if not path.exists(): if not path.exists():
@@ -220,9 +209,6 @@ def catalog_row_from_canonical(row):
def build_link_state(enriched_rows): def build_link_state(enriched_rows):
=======
def build_link_lookup(enriched_rows):
>>>>>>> be1bf63 (Build pivot-ready purchase log)
observed_rows = build_observed_products.build_observed_products(enriched_rows) observed_rows = build_observed_products.build_observed_products(enriched_rows)
canonical_rows, link_rows = build_canonical_layer.build_canonical_layer(observed_rows) canonical_rows, link_rows = build_canonical_layer.build_canonical_layer(observed_rows)
giant_row, costco_row = validate_cross_retailer_flow.find_proof_pair(observed_rows) giant_row, costco_row = validate_cross_retailer_flow.find_proof_pair(observed_rows)
@@ -239,7 +225,6 @@ def build_link_lookup(enriched_rows):
canonical_id_by_observed = { canonical_id_by_observed = {
row["observed_product_id"]: row["canonical_product_id"] for row in link_rows row["observed_product_id"]: row["canonical_product_id"] for row in link_rows
} }
<<<<<<< HEAD
return observed_rows, canonical_rows, link_rows, observed_id_by_key, canonical_id_by_observed return observed_rows, canonical_rows, link_rows, observed_id_by_key, canonical_id_by_observed
@@ -268,14 +253,6 @@ def build_purchase_rows(
canonical_id_by_observed[observed_product_id] = resolution["canonical_product_id"] canonical_id_by_observed[observed_product_id] = resolution["canonical_product_id"]
elif action == "exclude": elif action == "exclude":
canonical_id_by_observed[observed_product_id] = "" canonical_id_by_observed[observed_product_id] = ""
=======
return observed_id_by_key, canonical_id_by_observed
def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders, costco_orders):
all_enriched_rows = giant_enriched_rows + costco_enriched_rows
observed_id_by_key, canonical_id_by_observed = build_link_lookup(all_enriched_rows)
>>>>>>> be1bf63 (Build pivot-ready purchase log)
orders_by_id = {} orders_by_id = {}
orders_by_id.update(order_lookup(giant_orders, "giant")) orders_by_id.update(order_lookup(giant_orders, "giant"))
orders_by_id.update(order_lookup(costco_orders, "costco")) orders_by_id.update(order_lookup(costco_orders, "costco"))
@@ -289,10 +266,7 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
observed_product_id = observed_id_by_key.get(observed_key, "") observed_product_id = observed_id_by_key.get(observed_key, "")
order_row = orders_by_id.get((row["retailer"], row["order_id"]), {}) order_row = orders_by_id.get((row["retailer"], row["order_id"]), {})
metrics = derive_metrics(row) metrics = derive_metrics(row)
<<<<<<< HEAD
resolution = resolution_lookup.get(observed_product_id, {}) resolution = resolution_lookup.get(observed_product_id, {})
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
purchase_rows.append( purchase_rows.append(
{ {
"purchase_date": row["order_date"], "purchase_date": row["order_date"],
@@ -302,11 +276,8 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
"observed_item_key": row["observed_item_key"], "observed_item_key": row["observed_item_key"],
"observed_product_id": observed_product_id, "observed_product_id": observed_product_id,
"canonical_product_id": canonical_id_by_observed.get(observed_product_id, ""), "canonical_product_id": canonical_id_by_observed.get(observed_product_id, ""),
<<<<<<< HEAD
"review_status": resolution.get("status", ""), "review_status": resolution.get("status", ""),
"resolution_action": resolution.get("resolution_action", ""), "resolution_action": resolution.get("resolution_action", ""),
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
"raw_item_name": row["item_name"], "raw_item_name": row["item_name"],
"normalized_item_name": row["item_name_norm"], "normalized_item_name": row["item_name_norm"],
"retailer_item_id": row["retailer_item_id"], "retailer_item_id": row["retailer_item_id"],
@@ -330,7 +301,6 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
**metrics, **metrics,
} }
) )
<<<<<<< HEAD
return purchase_rows, observed_rows, canonical_rows, link_rows return purchase_rows, observed_rows, canonical_rows, link_rows
@@ -358,9 +328,6 @@ def apply_manual_resolutions_to_links(link_rows, resolution_rows):
"link_notes": resolution.get("resolution_notes", ""), "link_notes": resolution.get("resolution_notes", ""),
} }
return sorted(link_by_observed.values(), key=lambda row: row["observed_product_id"]) return sorted(link_by_observed.values(), key=lambda row: row["observed_product_id"])
=======
return purchase_rows
>>>>>>> be1bf63 (Build pivot-ready purchase log)
def build_comparison_examples(purchase_rows): def build_comparison_examples(purchase_rows):
@@ -399,12 +366,9 @@ def build_comparison_examples(purchase_rows):
@click.option("--costco-items-enriched-csv", default="costco_output/items_enriched.csv", show_default=True) @click.option("--costco-items-enriched-csv", default="costco_output/items_enriched.csv", show_default=True)
@click.option("--giant-orders-csv", default="giant_output/orders.csv", show_default=True) @click.option("--giant-orders-csv", default="giant_output/orders.csv", show_default=True)
@click.option("--costco-orders-csv", default="costco_output/orders.csv", show_default=True) @click.option("--costco-orders-csv", default="costco_output/orders.csv", show_default=True)
<<<<<<< HEAD
@click.option("--resolutions-csv", default="combined_output/review_resolutions.csv", show_default=True) @click.option("--resolutions-csv", default="combined_output/review_resolutions.csv", show_default=True)
@click.option("--catalog-csv", default="combined_output/canonical_catalog.csv", show_default=True) @click.option("--catalog-csv", default="combined_output/canonical_catalog.csv", show_default=True)
@click.option("--links-csv", default="combined_output/product_links.csv", show_default=True) @click.option("--links-csv", default="combined_output/product_links.csv", show_default=True)
=======
>>>>>>> be1bf63 (Build pivot-ready purchase log)
@click.option("--output-csv", default="combined_output/purchases.csv", show_default=True) @click.option("--output-csv", default="combined_output/purchases.csv", show_default=True)
@click.option("--examples-csv", default="combined_output/comparison_examples.csv", show_default=True) @click.option("--examples-csv", default="combined_output/comparison_examples.csv", show_default=True)
def main( def main(
@@ -412,7 +376,6 @@ def main(
costco_items_enriched_csv, costco_items_enriched_csv,
giant_orders_csv, giant_orders_csv,
costco_orders_csv, costco_orders_csv,
<<<<<<< HEAD
resolutions_csv, resolutions_csv,
catalog_csv, catalog_csv,
links_csv, links_csv,
@@ -421,17 +384,10 @@ def main(
): ):
resolution_rows = read_optional_csv_rows(resolutions_csv) resolution_rows = read_optional_csv_rows(resolutions_csv)
purchase_rows, _observed_rows, canonical_rows, link_rows = build_purchase_rows( purchase_rows, _observed_rows, canonical_rows, link_rows = build_purchase_rows(
=======
output_csv,
examples_csv,
):
purchase_rows = build_purchase_rows(
>>>>>>> be1bf63 (Build pivot-ready purchase log)
read_csv_rows(giant_items_enriched_csv), read_csv_rows(giant_items_enriched_csv),
read_csv_rows(costco_items_enriched_csv), read_csv_rows(costco_items_enriched_csv),
read_csv_rows(giant_orders_csv), read_csv_rows(giant_orders_csv),
read_csv_rows(costco_orders_csv), read_csv_rows(costco_orders_csv),
<<<<<<< HEAD
resolution_rows, resolution_rows,
) )
existing_catalog_rows = read_optional_csv_rows(catalog_csv) existing_catalog_rows = read_optional_csv_rows(catalog_csv)
@@ -448,14 +404,6 @@ def main(
click.echo( click.echo(
f"wrote {len(purchase_rows)} purchase rows to {output_csv}, " f"wrote {len(purchase_rows)} purchase rows to {output_csv}, "
f"{len(merged_catalog_rows)} catalog rows to {catalog_csv}, " f"{len(merged_catalog_rows)} catalog rows to {catalog_csv}, "
=======
)
example_rows = build_comparison_examples(purchase_rows)
write_csv_rows(output_csv, purchase_rows, PURCHASE_FIELDS)
write_csv_rows(examples_csv, example_rows, EXAMPLE_FIELDS)
click.echo(
f"wrote {len(purchase_rows)} purchase rows to {output_csv} "
>>>>>>> be1bf63 (Build pivot-ready purchase log)
f"and {len(example_rows)} comparison examples to {examples_csv}" f"and {len(example_rows)} comparison examples to {examples_csv}"
) )

View File

@@ -65,6 +65,12 @@ A A : cherry pick commit to current branch
the final commit was not shown by hash, just the branch cx the final commit was not shown by hash, just the branch cx
since (local) cx was caught up with that branch since (local) cx was caught up with that branch
** reverting a branch
b l : switch to local branch (cx)
l l : open local reflog
put point on the commit; highlighted remote gitea/cx
X : reset branch; prompts you, selected cx
* giant requests * giant requests
** item: ** item:
get: get:

View File

@@ -367,6 +367,30 @@
- commit: `c7dad54` on branch `cx` - commit: `c7dad54` on branch `cx`
- tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python build_purchases.py`; `./venv/bin/python review_products.py --refresh-only`; verified `combined_output/review_queue.csv`, `combined_output/review_resolutions.csv` workflow, and `combined_output/canonical_catalog.csv` - tests: `./venv/bin/python -m unittest discover -s tests`; `./venv/bin/python build_purchases.py`; `./venv/bin/python review_products.py --refresh-only`; verified `combined_output/review_queue.csv`, `combined_output/review_resolutions.csv` workflow, and `combined_output/canonical_catalog.csv`
- date: 2026-03-16 - date: 2026-03-16
* [ ] t1.12: simplify review process display
Clearly show current state separate from proposed future state.
** acceptance criteria
1. Display position in review queue, e.g., (1/22)
2. Add short help text header to the review item explaining that the action resolves the current observed product group
3. color-code outputs based on info, prompt/menu, warning
1. color action menu/requests for input differently from display text; do not color individual options separately
4. update action menu `[x]exclude` to `e[x]clude`
5. on each review item, display a list of all matched items to be linked, sorted by descending date:
1. YYYY-mm-dd, price, raw item name, normalized item name, upc, retailer
2. image URL, if exists
6. on each review item, suggest (but do not auto-apply) up to 3 likely existing canonicals using determinstic rules, e.g:
1. exact normalized name match
2. prefix/contains match on canonical name
3. exact UPC
- reinforce project terminology such as raw_name, observed_name, canonical_name
** evidence
- commit:
- tests:
- date:
** notes
* [ ] t1.10: add optional llm-assisted suggestion workflow for unresolved products (2-4 commits) * [ ] t1.10: add optional llm-assisted suggestion workflow for unresolved products (2-4 commits)
** acceptance criteria ** acceptance criteria