Build Giant item enricher

This commit is contained in:
ben
2026-03-16 00:28:28 -04:00
parent 42dbae1d2e
commit 14f2cc2bac
3 changed files with 620 additions and 4 deletions

190
tests/test_enrich_giant.py Normal file
View File

@@ -0,0 +1,190 @@
import csv
import json
import tempfile
import unittest
from pathlib import Path
import enrich_giant
class EnrichGiantTests(unittest.TestCase):
def test_parse_size_and_pack_handles_pack_and_weight_tokens(self):
size_value, size_unit, pack_qty = enrich_giant.parse_size_and_pack(
"COKE CHERRY 6PK 7.5Z"
)
self.assertEqual("7.5", size_value)
self.assertEqual("oz", size_unit)
self.assertEqual("6", pack_qty)
def test_parse_item_marks_store_brand_fee_and_weight_prices(self):
row = enrich_giant.parse_item(
order_id="abc123",
order_date="2026-03-01",
raw_path=Path("raw/abc123.json"),
line_no=1,
item={
"podId": 1,
"shipQy": 1,
"totalPickedWeight": 2,
"unitPrice": 3.98,
"itemName": "+SB GALA APPLE 5 LB",
"lbEachCd": "LB",
"groceryAmount": 3.98,
"primUpcCd": "111",
"mvpSavings": 0,
"rewardSavings": 0,
"couponSavings": 0,
"couponPrice": 0,
"categoryId": "1",
"categoryDesc": "Grocery",
"image": {"large": "https://example.test/apple.jpg"},
},
)
self.assertEqual("SB", row["brand_guess"])
self.assertEqual("GALA APPLE", row["item_name_norm"])
self.assertEqual("5", row["size_value"])
self.assertEqual("lb", row["size_unit"])
self.assertEqual("weight", row["measure_type"])
self.assertEqual("true", row["is_store_brand"])
self.assertEqual("1.99", row["price_per_lb"])
self.assertEqual("0.1244", row["price_per_oz"])
self.assertEqual("https://example.test/apple.jpg", row["image_url"])
fee_row = enrich_giant.parse_item(
order_id="abc123",
order_date="2026-03-01",
raw_path=Path("raw/abc123.json"),
line_no=2,
item={
"podId": 2,
"shipQy": 1,
"totalPickedWeight": 0,
"unitPrice": 0.05,
"itemName": "GL BAG CHARGE",
"lbEachCd": "EA",
"groceryAmount": 0.05,
"primUpcCd": "",
"mvpSavings": 0,
"rewardSavings": 0,
"couponSavings": 0,
"couponPrice": 0,
"categoryId": "1",
"categoryDesc": "Grocery",
},
)
self.assertEqual("true", fee_row["is_fee"])
self.assertEqual("GL BAG CHARGE", fee_row["item_name_norm"])
def test_parse_item_derives_packaged_weight_prices_from_size_tokens(self):
row = enrich_giant.parse_item(
order_id="abc123",
order_date="2026-03-01",
raw_path=Path("raw/abc123.json"),
line_no=1,
item={
"podId": 1,
"shipQy": 2,
"totalPickedWeight": 0,
"unitPrice": 3.0,
"itemName": "PEPSI 6PK 7.5Z",
"lbEachCd": "EA",
"groceryAmount": 6.0,
"primUpcCd": "111",
"mvpSavings": 0,
"rewardSavings": 0,
"couponSavings": 0,
"couponPrice": 0,
"categoryId": "1",
"categoryDesc": "Grocery",
},
)
self.assertEqual("weight", row["measure_type"])
self.assertEqual("6", row["pack_qty"])
self.assertEqual("7.5", row["size_value"])
self.assertEqual("0.0667", row["price_per_oz"])
self.assertEqual("1.0667", row["price_per_lb"])
def test_build_items_enriched_reads_raw_order_files_and_writes_csv(self):
with tempfile.TemporaryDirectory() as tmpdir:
raw_dir = Path(tmpdir) / "raw"
raw_dir.mkdir()
(raw_dir / "history.json").write_text("{}", encoding="utf-8")
(raw_dir / "order-2.json").write_text(
json.dumps(
{
"orderId": "order-2",
"orderDate": "2026-03-02",
"items": [
{
"podId": 20,
"shipQy": 1,
"totalPickedWeight": 0,
"unitPrice": 2.99,
"itemName": "SB ROTINI 16Z",
"lbEachCd": "EA",
"groceryAmount": 2.99,
"primUpcCd": "222",
"mvpSavings": 0,
"rewardSavings": 0,
"couponSavings": 0,
"couponPrice": 0,
"categoryId": "1",
"categoryDesc": "Grocery",
"image": {"small": "https://example.test/rotini.jpg"},
}
],
}
),
encoding="utf-8",
)
(raw_dir / "order-1.json").write_text(
json.dumps(
{
"orderId": "order-1",
"orderDate": "2026-03-01",
"items": [
{
"podId": 10,
"shipQy": 2,
"totalPickedWeight": 0,
"unitPrice": 1.5,
"itemName": "PEPSI 6PK 7.5Z",
"lbEachCd": "EA",
"groceryAmount": 3.0,
"primUpcCd": "111",
"mvpSavings": 0,
"rewardSavings": 0,
"couponSavings": 0,
"couponPrice": 0,
"categoryId": "1",
"categoryDesc": "Grocery",
}
],
}
),
encoding="utf-8",
)
rows = enrich_giant.build_items_enriched(raw_dir)
output_csv = Path(tmpdir) / "items_enriched.csv"
enrich_giant.write_csv(output_csv, rows)
self.assertEqual(["order-1", "order-2"], [row["order_id"] for row in rows])
self.assertEqual("PEPSI", rows[0]["item_name_norm"])
self.assertEqual("6", rows[0]["pack_qty"])
self.assertEqual("7.5", rows[0]["size_value"])
self.assertEqual("true", rows[1]["is_store_brand"])
with output_csv.open(newline="", encoding="utf-8") as handle:
written_rows = list(csv.DictReader(handle))
self.assertEqual(2, len(written_rows))
self.assertEqual(enrich_giant.OUTPUT_FIELDS, list(written_rows[0].keys()))
if __name__ == "__main__":
unittest.main()