Compare commits
2 Commits
23d0c7e5cd
...
861955557a
| Author | SHA1 | Date | |
|---|---|---|---|
| 861955557a | |||
| 6e1cde2c83 |
@@ -213,9 +213,11 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
||||
|
||||
def iter_costco_rows(raw_dir):
|
||||
for path in discover_json_files(raw_dir):
|
||||
if path.name == "summary.json":
|
||||
if path.name in {"summary.json", "summary_requests.json"}:
|
||||
continue
|
||||
payload = json.loads(path.read_text(encoding="utf-8"))
|
||||
if not isinstance(payload, dict):
|
||||
continue
|
||||
receipts = payload.get("data", {}).get("receiptsWithCounts", {}).get("receipts", [])
|
||||
for receipt in receipts:
|
||||
order_id = receipt["transactionBarcode"]
|
||||
|
||||
@@ -212,3 +212,14 @@ request:
|
||||
- pull all orders by default
|
||||
- add online orders
|
||||
- copy header data from browser using selenium
|
||||
|
||||
* how to run
|
||||
|
||||
python scrape_giant.py
|
||||
python enrich_giant.py
|
||||
python scrape_costco.py
|
||||
python enrich_costco.py
|
||||
python build_observed_products.py
|
||||
python build_review_queue.py
|
||||
python build_canonical_layer.py
|
||||
python validate_cross_retailer_flow.py
|
||||
|
||||
Reference in New Issue
Block a user