build_purchases rev1
This commit is contained in:
@@ -7,11 +7,7 @@ import build_canonical_layer
|
|||||||
import build_observed_products
|
import build_observed_products
|
||||||
import validate_cross_retailer_flow
|
import validate_cross_retailer_flow
|
||||||
from enrich_giant import format_decimal, to_decimal
|
from enrich_giant import format_decimal, to_decimal
|
||||||
<<<<<<< HEAD
|
|
||||||
from layer_helpers import read_csv_rows, stable_id, write_csv_rows
|
from layer_helpers import read_csv_rows, stable_id, write_csv_rows
|
||||||
=======
|
|
||||||
from layer_helpers import read_csv_rows, write_csv_rows
|
|
||||||
>>>>>>> be1bf63 (Build pivot-ready purchase log)
|
|
||||||
|
|
||||||
|
|
||||||
PURCHASE_FIELDS = [
|
PURCHASE_FIELDS = [
|
||||||
@@ -22,11 +18,8 @@ PURCHASE_FIELDS = [
|
|||||||
"observed_item_key",
|
"observed_item_key",
|
||||||
"observed_product_id",
|
"observed_product_id",
|
||||||
"canonical_product_id",
|
"canonical_product_id",
|
||||||
<<<<<<< HEAD
|
|
||||||
"review_status",
|
"review_status",
|
||||||
"resolution_action",
|
"resolution_action",
|
||||||
=======
|
|
||||||
>>>>>>> be1bf63 (Build pivot-ready purchase log)
|
|
||||||
"raw_item_name",
|
"raw_item_name",
|
||||||
"normalized_item_name",
|
"normalized_item_name",
|
||||||
"retailer_item_id",
|
"retailer_item_id",
|
||||||
@@ -69,7 +62,6 @@ EXAMPLE_FIELDS = [
|
|||||||
"notes",
|
"notes",
|
||||||
]
|
]
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
CATALOG_FIELDS = [
|
CATALOG_FIELDS = [
|
||||||
"canonical_product_id",
|
"canonical_product_id",
|
||||||
"canonical_name",
|
"canonical_name",
|
||||||
@@ -95,8 +87,6 @@ RESOLUTION_FIELDS = [
|
|||||||
"reviewed_at",
|
"reviewed_at",
|
||||||
]
|
]
|
||||||
|
|
||||||
=======
|
|
||||||
>>>>>>> be1bf63 (Build pivot-ready purchase log)
|
|
||||||
|
|
||||||
def decimal_or_zero(value):
|
def decimal_or_zero(value):
|
||||||
return to_decimal(value) or Decimal("0")
|
return to_decimal(value) or Decimal("0")
|
||||||
@@ -175,7 +165,6 @@ def order_lookup(rows, retailer):
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
<<<<<<< HEAD
|
|
||||||
def read_optional_csv_rows(path):
|
def read_optional_csv_rows(path):
|
||||||
path = Path(path)
|
path = Path(path)
|
||||||
if not path.exists():
|
if not path.exists():
|
||||||
@@ -220,9 +209,6 @@ def catalog_row_from_canonical(row):
|
|||||||
|
|
||||||
|
|
||||||
def build_link_state(enriched_rows):
|
def build_link_state(enriched_rows):
|
||||||
=======
|
|
||||||
def build_link_lookup(enriched_rows):
|
|
||||||
>>>>>>> be1bf63 (Build pivot-ready purchase log)
|
|
||||||
observed_rows = build_observed_products.build_observed_products(enriched_rows)
|
observed_rows = build_observed_products.build_observed_products(enriched_rows)
|
||||||
canonical_rows, link_rows = build_canonical_layer.build_canonical_layer(observed_rows)
|
canonical_rows, link_rows = build_canonical_layer.build_canonical_layer(observed_rows)
|
||||||
giant_row, costco_row = validate_cross_retailer_flow.find_proof_pair(observed_rows)
|
giant_row, costco_row = validate_cross_retailer_flow.find_proof_pair(observed_rows)
|
||||||
@@ -239,7 +225,6 @@ def build_link_lookup(enriched_rows):
|
|||||||
canonical_id_by_observed = {
|
canonical_id_by_observed = {
|
||||||
row["observed_product_id"]: row["canonical_product_id"] for row in link_rows
|
row["observed_product_id"]: row["canonical_product_id"] for row in link_rows
|
||||||
}
|
}
|
||||||
<<<<<<< HEAD
|
|
||||||
return observed_rows, canonical_rows, link_rows, observed_id_by_key, canonical_id_by_observed
|
return observed_rows, canonical_rows, link_rows, observed_id_by_key, canonical_id_by_observed
|
||||||
|
|
||||||
|
|
||||||
@@ -268,14 +253,6 @@ def build_purchase_rows(
|
|||||||
canonical_id_by_observed[observed_product_id] = resolution["canonical_product_id"]
|
canonical_id_by_observed[observed_product_id] = resolution["canonical_product_id"]
|
||||||
elif action == "exclude":
|
elif action == "exclude":
|
||||||
canonical_id_by_observed[observed_product_id] = ""
|
canonical_id_by_observed[observed_product_id] = ""
|
||||||
=======
|
|
||||||
return observed_id_by_key, canonical_id_by_observed
|
|
||||||
|
|
||||||
|
|
||||||
def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders, costco_orders):
|
|
||||||
all_enriched_rows = giant_enriched_rows + costco_enriched_rows
|
|
||||||
observed_id_by_key, canonical_id_by_observed = build_link_lookup(all_enriched_rows)
|
|
||||||
>>>>>>> be1bf63 (Build pivot-ready purchase log)
|
|
||||||
orders_by_id = {}
|
orders_by_id = {}
|
||||||
orders_by_id.update(order_lookup(giant_orders, "giant"))
|
orders_by_id.update(order_lookup(giant_orders, "giant"))
|
||||||
orders_by_id.update(order_lookup(costco_orders, "costco"))
|
orders_by_id.update(order_lookup(costco_orders, "costco"))
|
||||||
@@ -289,10 +266,7 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
|
|||||||
observed_product_id = observed_id_by_key.get(observed_key, "")
|
observed_product_id = observed_id_by_key.get(observed_key, "")
|
||||||
order_row = orders_by_id.get((row["retailer"], row["order_id"]), {})
|
order_row = orders_by_id.get((row["retailer"], row["order_id"]), {})
|
||||||
metrics = derive_metrics(row)
|
metrics = derive_metrics(row)
|
||||||
<<<<<<< HEAD
|
|
||||||
resolution = resolution_lookup.get(observed_product_id, {})
|
resolution = resolution_lookup.get(observed_product_id, {})
|
||||||
=======
|
|
||||||
>>>>>>> be1bf63 (Build pivot-ready purchase log)
|
|
||||||
purchase_rows.append(
|
purchase_rows.append(
|
||||||
{
|
{
|
||||||
"purchase_date": row["order_date"],
|
"purchase_date": row["order_date"],
|
||||||
@@ -302,11 +276,8 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
|
|||||||
"observed_item_key": row["observed_item_key"],
|
"observed_item_key": row["observed_item_key"],
|
||||||
"observed_product_id": observed_product_id,
|
"observed_product_id": observed_product_id,
|
||||||
"canonical_product_id": canonical_id_by_observed.get(observed_product_id, ""),
|
"canonical_product_id": canonical_id_by_observed.get(observed_product_id, ""),
|
||||||
<<<<<<< HEAD
|
|
||||||
"review_status": resolution.get("status", ""),
|
"review_status": resolution.get("status", ""),
|
||||||
"resolution_action": resolution.get("resolution_action", ""),
|
"resolution_action": resolution.get("resolution_action", ""),
|
||||||
=======
|
|
||||||
>>>>>>> be1bf63 (Build pivot-ready purchase log)
|
|
||||||
"raw_item_name": row["item_name"],
|
"raw_item_name": row["item_name"],
|
||||||
"normalized_item_name": row["item_name_norm"],
|
"normalized_item_name": row["item_name_norm"],
|
||||||
"retailer_item_id": row["retailer_item_id"],
|
"retailer_item_id": row["retailer_item_id"],
|
||||||
@@ -330,7 +301,6 @@ def build_purchase_rows(giant_enriched_rows, costco_enriched_rows, giant_orders,
|
|||||||
**metrics,
|
**metrics,
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
<<<<<<< HEAD
|
|
||||||
return purchase_rows, observed_rows, canonical_rows, link_rows
|
return purchase_rows, observed_rows, canonical_rows, link_rows
|
||||||
|
|
||||||
|
|
||||||
@@ -358,9 +328,6 @@ def apply_manual_resolutions_to_links(link_rows, resolution_rows):
|
|||||||
"link_notes": resolution.get("resolution_notes", ""),
|
"link_notes": resolution.get("resolution_notes", ""),
|
||||||
}
|
}
|
||||||
return sorted(link_by_observed.values(), key=lambda row: row["observed_product_id"])
|
return sorted(link_by_observed.values(), key=lambda row: row["observed_product_id"])
|
||||||
=======
|
|
||||||
return purchase_rows
|
|
||||||
>>>>>>> be1bf63 (Build pivot-ready purchase log)
|
|
||||||
|
|
||||||
|
|
||||||
def build_comparison_examples(purchase_rows):
|
def build_comparison_examples(purchase_rows):
|
||||||
@@ -399,12 +366,9 @@ def build_comparison_examples(purchase_rows):
|
|||||||
@click.option("--costco-items-enriched-csv", default="costco_output/items_enriched.csv", show_default=True)
|
@click.option("--costco-items-enriched-csv", default="costco_output/items_enriched.csv", show_default=True)
|
||||||
@click.option("--giant-orders-csv", default="giant_output/orders.csv", show_default=True)
|
@click.option("--giant-orders-csv", default="giant_output/orders.csv", show_default=True)
|
||||||
@click.option("--costco-orders-csv", default="costco_output/orders.csv", show_default=True)
|
@click.option("--costco-orders-csv", default="costco_output/orders.csv", show_default=True)
|
||||||
<<<<<<< HEAD
|
|
||||||
@click.option("--resolutions-csv", default="combined_output/review_resolutions.csv", show_default=True)
|
@click.option("--resolutions-csv", default="combined_output/review_resolutions.csv", show_default=True)
|
||||||
@click.option("--catalog-csv", default="combined_output/canonical_catalog.csv", show_default=True)
|
@click.option("--catalog-csv", default="combined_output/canonical_catalog.csv", show_default=True)
|
||||||
@click.option("--links-csv", default="combined_output/product_links.csv", show_default=True)
|
@click.option("--links-csv", default="combined_output/product_links.csv", show_default=True)
|
||||||
=======
|
|
||||||
>>>>>>> be1bf63 (Build pivot-ready purchase log)
|
|
||||||
@click.option("--output-csv", default="combined_output/purchases.csv", show_default=True)
|
@click.option("--output-csv", default="combined_output/purchases.csv", show_default=True)
|
||||||
@click.option("--examples-csv", default="combined_output/comparison_examples.csv", show_default=True)
|
@click.option("--examples-csv", default="combined_output/comparison_examples.csv", show_default=True)
|
||||||
def main(
|
def main(
|
||||||
@@ -412,7 +376,6 @@ def main(
|
|||||||
costco_items_enriched_csv,
|
costco_items_enriched_csv,
|
||||||
giant_orders_csv,
|
giant_orders_csv,
|
||||||
costco_orders_csv,
|
costco_orders_csv,
|
||||||
<<<<<<< HEAD
|
|
||||||
resolutions_csv,
|
resolutions_csv,
|
||||||
catalog_csv,
|
catalog_csv,
|
||||||
links_csv,
|
links_csv,
|
||||||
@@ -421,17 +384,10 @@ def main(
|
|||||||
):
|
):
|
||||||
resolution_rows = read_optional_csv_rows(resolutions_csv)
|
resolution_rows = read_optional_csv_rows(resolutions_csv)
|
||||||
purchase_rows, _observed_rows, canonical_rows, link_rows = build_purchase_rows(
|
purchase_rows, _observed_rows, canonical_rows, link_rows = build_purchase_rows(
|
||||||
=======
|
|
||||||
output_csv,
|
|
||||||
examples_csv,
|
|
||||||
):
|
|
||||||
purchase_rows = build_purchase_rows(
|
|
||||||
>>>>>>> be1bf63 (Build pivot-ready purchase log)
|
|
||||||
read_csv_rows(giant_items_enriched_csv),
|
read_csv_rows(giant_items_enriched_csv),
|
||||||
read_csv_rows(costco_items_enriched_csv),
|
read_csv_rows(costco_items_enriched_csv),
|
||||||
read_csv_rows(giant_orders_csv),
|
read_csv_rows(giant_orders_csv),
|
||||||
read_csv_rows(costco_orders_csv),
|
read_csv_rows(costco_orders_csv),
|
||||||
<<<<<<< HEAD
|
|
||||||
resolution_rows,
|
resolution_rows,
|
||||||
)
|
)
|
||||||
existing_catalog_rows = read_optional_csv_rows(catalog_csv)
|
existing_catalog_rows = read_optional_csv_rows(catalog_csv)
|
||||||
@@ -448,14 +404,6 @@ def main(
|
|||||||
click.echo(
|
click.echo(
|
||||||
f"wrote {len(purchase_rows)} purchase rows to {output_csv}, "
|
f"wrote {len(purchase_rows)} purchase rows to {output_csv}, "
|
||||||
f"{len(merged_catalog_rows)} catalog rows to {catalog_csv}, "
|
f"{len(merged_catalog_rows)} catalog rows to {catalog_csv}, "
|
||||||
=======
|
|
||||||
)
|
|
||||||
example_rows = build_comparison_examples(purchase_rows)
|
|
||||||
write_csv_rows(output_csv, purchase_rows, PURCHASE_FIELDS)
|
|
||||||
write_csv_rows(examples_csv, example_rows, EXAMPLE_FIELDS)
|
|
||||||
click.echo(
|
|
||||||
f"wrote {len(purchase_rows)} purchase rows to {output_csv} "
|
|
||||||
>>>>>>> be1bf63 (Build pivot-ready purchase log)
|
|
||||||
f"and {len(example_rows)} comparison examples to {examples_csv}"
|
f"and {len(example_rows)} comparison examples to {examples_csv}"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user