129 lines
4.5 KiB
Python
129 lines
4.5 KiB
Python
import csv
|
|
import tempfile
|
|
import unittest
|
|
from pathlib import Path
|
|
|
|
import scraper
|
|
|
|
|
|
class ScraperTests(unittest.TestCase):
|
|
def test_flatten_orders_extracts_order_and_item_rows(self):
|
|
history = {
|
|
"records": [
|
|
{
|
|
"orderId": "abc123",
|
|
"serviceType": "PICKUP",
|
|
}
|
|
]
|
|
}
|
|
details = [
|
|
{
|
|
"orderId": "abc123",
|
|
"orderDate": "2026-03-01",
|
|
"deliveryDate": "2026-03-02",
|
|
"orderTotal": "12.34",
|
|
"paymentMethod": "VISA",
|
|
"totalItemCount": 1,
|
|
"totalSavings": "1.00",
|
|
"yourSavingsTotal": "1.00",
|
|
"couponsDiscountsTotal": "0.50",
|
|
"refundOrder": False,
|
|
"ebtOrder": False,
|
|
"pup": {
|
|
"storeName": "Giant",
|
|
"aholdStoreNumber": "42",
|
|
"storeAddress1": "123 Main",
|
|
"storeCity": "Springfield",
|
|
"storeState": "VA",
|
|
"storeZipcode": "22150",
|
|
},
|
|
"items": [
|
|
{
|
|
"podId": "pod-1",
|
|
"itemName": "Bananas",
|
|
"primUpcCd": "111",
|
|
"categoryId": "produce",
|
|
"categoryDesc": "Produce",
|
|
"shipQy": "2",
|
|
"lbEachCd": "EA",
|
|
"unitPrice": "0.59",
|
|
"groceryAmount": "1.18",
|
|
"totalPickedWeight": "",
|
|
"mvpSavings": "0.10",
|
|
"rewardSavings": "0.00",
|
|
"couponSavings": "0.00",
|
|
"couponPrice": "",
|
|
}
|
|
],
|
|
}
|
|
]
|
|
|
|
orders, items = scraper.flatten_orders(
|
|
history,
|
|
details,
|
|
history_path=Path("data/giant-web/raw/history.json"),
|
|
raw_dir=Path("data/giant-web/raw"),
|
|
)
|
|
|
|
self.assertEqual(1, len(orders))
|
|
self.assertEqual("abc123", orders[0]["order_id"])
|
|
self.assertEqual("giant", orders[0]["retailer"])
|
|
self.assertEqual("PICKUP", orders[0]["service_type"])
|
|
self.assertEqual("data/giant-web/raw/history.json", orders[0]["raw_history_path"])
|
|
self.assertEqual("data/giant-web/raw/abc123.json", orders[0]["raw_order_path"])
|
|
self.assertEqual(1, len(items))
|
|
self.assertEqual("1", items[0]["line_no"])
|
|
self.assertEqual("Bananas", items[0]["item_name"])
|
|
self.assertEqual("giant", items[0]["retailer"])
|
|
self.assertEqual("data/giant-web/raw/abc123.json", items[0]["raw_order_path"])
|
|
self.assertEqual("false", items[0]["is_discount_line"])
|
|
|
|
def test_append_dedup_replaces_duplicate_rows_and_preserves_new_values(self):
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
path = Path(tmpdir) / "orders.csv"
|
|
|
|
scraper.append_dedup(
|
|
path,
|
|
[
|
|
{"order_id": "1", "order_total": "10.00"},
|
|
{"order_id": "2", "order_total": "20.00"},
|
|
],
|
|
subset=["order_id"],
|
|
fieldnames=["order_id", "order_total"],
|
|
)
|
|
|
|
merged = scraper.append_dedup(
|
|
path,
|
|
[
|
|
{"order_id": "2", "order_total": "21.50"},
|
|
{"order_id": "3", "order_total": "30.00"},
|
|
],
|
|
subset=["order_id"],
|
|
fieldnames=["order_id", "order_total"],
|
|
)
|
|
|
|
self.assertEqual(
|
|
[
|
|
{"order_id": "1", "order_total": "10.00"},
|
|
{"order_id": "2", "order_total": "21.50"},
|
|
{"order_id": "3", "order_total": "30.00"},
|
|
],
|
|
merged,
|
|
)
|
|
|
|
with path.open(newline="", encoding="utf-8") as handle:
|
|
rows = list(csv.DictReader(handle))
|
|
|
|
self.assertEqual(merged, rows)
|
|
|
|
def test_read_existing_order_ids_returns_known_ids(self):
|
|
with tempfile.TemporaryDirectory() as tmpdir:
|
|
path = Path(tmpdir) / "orders.csv"
|
|
path.write_text("order_id,order_total\n1,10.00\n2,20.00\n", encoding="utf-8")
|
|
|
|
self.assertEqual({"1", "2"}, scraper.read_existing_order_ids(path))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|