Document visit-level purchase analysis
This commit is contained in:
13
README.md
13
README.md
@@ -129,6 +129,19 @@ Combined:
|
|||||||
- `data/review/pipeline_status.json`
|
- `data/review/pipeline_status.json`
|
||||||
- `data/catalog.csv`
|
- `data/catalog.csv`
|
||||||
|
|
||||||
|
`data/review/purchases.csv` is the main analysis artifact. It is designed to support both:
|
||||||
|
- item-level price analysis
|
||||||
|
- visit-level analysis such as spend by visit, items per visit, category spend by visit, and retailer/store breakdown
|
||||||
|
|
||||||
|
The visit fields are carried directly in `purchases.csv`, so you can pivot on them without extra joins:
|
||||||
|
- `order_id`
|
||||||
|
- `purchase_date`
|
||||||
|
- `retailer`
|
||||||
|
- `store_name`
|
||||||
|
- `store_number`
|
||||||
|
- `store_city`
|
||||||
|
- `store_state`
|
||||||
|
|
||||||
## Review Workflow
|
## Review Workflow
|
||||||
|
|
||||||
Run `review_products.py` to cleanup unresolved or weakly unified items:
|
Run `review_products.py` to cleanup unresolved or weakly unified items:
|
||||||
|
|||||||
@@ -167,6 +167,11 @@ class PurchaseLogTests(unittest.TestCase):
|
|||||||
self.assertEqual("1", rows[0]["normalized_quantity"])
|
self.assertEqual("1", rows[0]["normalized_quantity"])
|
||||||
self.assertEqual("lb", rows[0]["normalized_quantity_unit"])
|
self.assertEqual("lb", rows[0]["normalized_quantity_unit"])
|
||||||
self.assertEqual("lb", rows[0]["effective_price_unit"])
|
self.assertEqual("lb", rows[0]["effective_price_unit"])
|
||||||
|
self.assertEqual("g1", rows[0]["order_id"])
|
||||||
|
self.assertEqual("Giant", rows[0]["store_name"])
|
||||||
|
self.assertEqual("42", rows[0]["store_number"])
|
||||||
|
self.assertEqual("Springfield", rows[0]["store_city"])
|
||||||
|
self.assertEqual("VA", rows[0]["store_state"])
|
||||||
|
|
||||||
def test_main_writes_purchase_and_example_csvs(self):
|
def test_main_writes_purchase_and_example_csvs(self):
|
||||||
with tempfile.TemporaryDirectory() as tmpdir:
|
with tempfile.TemporaryDirectory() as tmpdir:
|
||||||
@@ -624,6 +629,94 @@ class PurchaseLogTests(unittest.TestCase):
|
|||||||
self.assertEqual("", rows[0]["effective_price"])
|
self.assertEqual("", rows[0]["effective_price"])
|
||||||
self.assertEqual("", rows[0]["effective_price_unit"])
|
self.assertEqual("", rows[0]["effective_price_unit"])
|
||||||
|
|
||||||
|
def test_purchase_rows_support_visit_level_grouping_without_extra_joins(self):
|
||||||
|
fieldnames = enrich_costco.OUTPUT_FIELDS
|
||||||
|
|
||||||
|
def base_row():
|
||||||
|
return {field: "" for field in fieldnames}
|
||||||
|
|
||||||
|
row_one = base_row()
|
||||||
|
row_one.update(
|
||||||
|
{
|
||||||
|
"retailer": "giant",
|
||||||
|
"order_id": "g1",
|
||||||
|
"line_no": "1",
|
||||||
|
"normalized_row_id": "giant:g1:1",
|
||||||
|
"normalized_item_id": "gnorm:first",
|
||||||
|
"order_date": "2026-03-01",
|
||||||
|
"item_name": "FIRST ITEM",
|
||||||
|
"item_name_norm": "FIRST ITEM",
|
||||||
|
"qty": "1",
|
||||||
|
"unit": "EA",
|
||||||
|
"normalized_quantity": "1",
|
||||||
|
"normalized_quantity_unit": "each",
|
||||||
|
"line_total": "3.50",
|
||||||
|
"measure_type": "each",
|
||||||
|
"raw_order_path": "data/giant-web/raw/g1.json",
|
||||||
|
"is_discount_line": "false",
|
||||||
|
"is_coupon_line": "false",
|
||||||
|
"is_fee": "false",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
row_two = base_row()
|
||||||
|
row_two.update(
|
||||||
|
{
|
||||||
|
"retailer": "giant",
|
||||||
|
"order_id": "g1",
|
||||||
|
"line_no": "2",
|
||||||
|
"normalized_row_id": "giant:g1:2",
|
||||||
|
"normalized_item_id": "gnorm:second",
|
||||||
|
"order_date": "2026-03-01",
|
||||||
|
"item_name": "SECOND ITEM",
|
||||||
|
"item_name_norm": "SECOND ITEM",
|
||||||
|
"qty": "1",
|
||||||
|
"unit": "EA",
|
||||||
|
"normalized_quantity": "1",
|
||||||
|
"normalized_quantity_unit": "each",
|
||||||
|
"line_total": "2.00",
|
||||||
|
"measure_type": "each",
|
||||||
|
"raw_order_path": "data/giant-web/raw/g1.json",
|
||||||
|
"is_discount_line": "false",
|
||||||
|
"is_coupon_line": "false",
|
||||||
|
"is_fee": "false",
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
rows, _links = build_purchases.build_purchase_rows(
|
||||||
|
[row_one, row_two],
|
||||||
|
[],
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"order_id": "g1",
|
||||||
|
"store_name": "Giant",
|
||||||
|
"store_number": "42",
|
||||||
|
"store_city": "Springfield",
|
||||||
|
"store_state": "VA",
|
||||||
|
}
|
||||||
|
],
|
||||||
|
[],
|
||||||
|
[],
|
||||||
|
[],
|
||||||
|
[],
|
||||||
|
)
|
||||||
|
|
||||||
|
visit_key = {
|
||||||
|
(
|
||||||
|
row["retailer"],
|
||||||
|
row["order_id"],
|
||||||
|
row["purchase_date"],
|
||||||
|
row["store_name"],
|
||||||
|
row["store_number"],
|
||||||
|
row["store_city"],
|
||||||
|
row["store_state"],
|
||||||
|
)
|
||||||
|
for row in rows
|
||||||
|
}
|
||||||
|
visit_total = sum(float(row["net_line_total"]) for row in rows)
|
||||||
|
|
||||||
|
self.assertEqual(1, len(visit_key))
|
||||||
|
self.assertEqual(5.5, visit_total)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|||||||
Reference in New Issue
Block a user