Document visit-level purchase analysis
This commit is contained in:
13
README.md
13
README.md
@@ -129,6 +129,19 @@ Combined:
|
||||
- `data/review/pipeline_status.json`
|
||||
- `data/catalog.csv`
|
||||
|
||||
`data/review/purchases.csv` is the main analysis artifact. It is designed to support both:
|
||||
- item-level price analysis
|
||||
- visit-level analysis such as spend by visit, items per visit, category spend by visit, and retailer/store breakdown
|
||||
|
||||
The visit fields are carried directly in `purchases.csv`, so you can pivot on them without extra joins:
|
||||
- `order_id`
|
||||
- `purchase_date`
|
||||
- `retailer`
|
||||
- `store_name`
|
||||
- `store_number`
|
||||
- `store_city`
|
||||
- `store_state`
|
||||
|
||||
## Review Workflow
|
||||
|
||||
Run `review_products.py` to cleanup unresolved or weakly unified items:
|
||||
|
||||
@@ -167,6 +167,11 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
self.assertEqual("1", rows[0]["normalized_quantity"])
|
||||
self.assertEqual("lb", rows[0]["normalized_quantity_unit"])
|
||||
self.assertEqual("lb", rows[0]["effective_price_unit"])
|
||||
self.assertEqual("g1", rows[0]["order_id"])
|
||||
self.assertEqual("Giant", rows[0]["store_name"])
|
||||
self.assertEqual("42", rows[0]["store_number"])
|
||||
self.assertEqual("Springfield", rows[0]["store_city"])
|
||||
self.assertEqual("VA", rows[0]["store_state"])
|
||||
|
||||
def test_main_writes_purchase_and_example_csvs(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
@@ -624,6 +629,94 @@ class PurchaseLogTests(unittest.TestCase):
|
||||
self.assertEqual("", rows[0]["effective_price"])
|
||||
self.assertEqual("", rows[0]["effective_price_unit"])
|
||||
|
||||
def test_purchase_rows_support_visit_level_grouping_without_extra_joins(self):
|
||||
fieldnames = enrich_costco.OUTPUT_FIELDS
|
||||
|
||||
def base_row():
|
||||
return {field: "" for field in fieldnames}
|
||||
|
||||
row_one = base_row()
|
||||
row_one.update(
|
||||
{
|
||||
"retailer": "giant",
|
||||
"order_id": "g1",
|
||||
"line_no": "1",
|
||||
"normalized_row_id": "giant:g1:1",
|
||||
"normalized_item_id": "gnorm:first",
|
||||
"order_date": "2026-03-01",
|
||||
"item_name": "FIRST ITEM",
|
||||
"item_name_norm": "FIRST ITEM",
|
||||
"qty": "1",
|
||||
"unit": "EA",
|
||||
"normalized_quantity": "1",
|
||||
"normalized_quantity_unit": "each",
|
||||
"line_total": "3.50",
|
||||
"measure_type": "each",
|
||||
"raw_order_path": "data/giant-web/raw/g1.json",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"is_fee": "false",
|
||||
}
|
||||
)
|
||||
row_two = base_row()
|
||||
row_two.update(
|
||||
{
|
||||
"retailer": "giant",
|
||||
"order_id": "g1",
|
||||
"line_no": "2",
|
||||
"normalized_row_id": "giant:g1:2",
|
||||
"normalized_item_id": "gnorm:second",
|
||||
"order_date": "2026-03-01",
|
||||
"item_name": "SECOND ITEM",
|
||||
"item_name_norm": "SECOND ITEM",
|
||||
"qty": "1",
|
||||
"unit": "EA",
|
||||
"normalized_quantity": "1",
|
||||
"normalized_quantity_unit": "each",
|
||||
"line_total": "2.00",
|
||||
"measure_type": "each",
|
||||
"raw_order_path": "data/giant-web/raw/g1.json",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"is_fee": "false",
|
||||
}
|
||||
)
|
||||
|
||||
rows, _links = build_purchases.build_purchase_rows(
|
||||
[row_one, row_two],
|
||||
[],
|
||||
[
|
||||
{
|
||||
"order_id": "g1",
|
||||
"store_name": "Giant",
|
||||
"store_number": "42",
|
||||
"store_city": "Springfield",
|
||||
"store_state": "VA",
|
||||
}
|
||||
],
|
||||
[],
|
||||
[],
|
||||
[],
|
||||
[],
|
||||
)
|
||||
|
||||
visit_key = {
|
||||
(
|
||||
row["retailer"],
|
||||
row["order_id"],
|
||||
row["purchase_date"],
|
||||
row["store_name"],
|
||||
row["store_number"],
|
||||
row["store_city"],
|
||||
row["store_state"],
|
||||
)
|
||||
for row in rows
|
||||
}
|
||||
visit_total = sum(float(row["net_line_total"]) for row in rows)
|
||||
|
||||
self.assertEqual(1, len(visit_key))
|
||||
self.assertEqual(5.5, visit_total)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
||||
Reference in New Issue
Block a user