diff --git a/history.json b/history.json deleted file mode 100644 index b06d9b1..0000000 --- a/history.json +++ /dev/null @@ -1 +0,0 @@ -{"pagination":{"page":1,"pageSize":50,"pageTotal":0,"recordTotal":0},"records":[{"basketId":"69a2e44a16be1142e74ad3cc","orderId":"69a2e44a16be1142e74ad3cc","serviceType":"INSTORE","orderTotal":27.31,"deliveryDate":"2026-02-27","orderDate":"2026-02-27"},{"basketId":"697f42031c28e23df08d95f9","orderId":"697f42031c28e23df08d95f9","serviceType":"INSTORE","orderTotal":28.81,"deliveryDate":"2026-01-31","orderDate":"2026-01-31"},{"basketId":"696f6ac42cdea31ec04f6e07","orderId":"696f6ac42cdea31ec04f6e07","serviceType":"INSTORE","orderTotal":21.23,"deliveryDate":"2026-01-19","orderDate":"2026-01-19"},{"basketId":"696ccb7d20876c60f4f752e6","orderId":"696ccb7d20876c60f4f752e6","serviceType":"INSTORE","orderTotal":65.82,"deliveryDate":"2026-01-17","orderDate":"2026-01-17"},{"basketId":"69495d642fe72d698ad3171b","orderId":"69495d642fe72d698ad3171b","serviceType":"INSTORE","orderTotal":76.96,"deliveryDate":"2025-12-21","orderDate":"2025-12-21"},{"basketId":"69382a09e623053b887765ab","orderId":"69382a09e623053b887765ab","serviceType":"INSTORE","orderTotal":26.48,"deliveryDate":"2025-12-08","orderDate":"2025-12-08"},{"basketId":"692855d81ba32d25afc0c511","orderId":"692855d81ba32d25afc0c511","serviceType":"INSTORE","orderTotal":46.76,"deliveryDate":"2025-11-26","orderDate":"2025-11-26"},{"basketId":"6921b5a4d4205145417bb621","orderId":"6921b5a4d4205145417bb621","serviceType":"INSTORE","orderTotal":44.46,"deliveryDate":"2025-11-21","orderDate":"2025-11-21"},{"basketId":"6914890c2961482388a88d1a","orderId":"6914890c2961482388a88d1a","serviceType":"INSTORE","orderTotal":8.67,"deliveryDate":"2025-11-11","orderDate":"2025-11-11"},{"basketId":"68fcc44cb336eb511922b7ed","orderId":"68fcc44cb336eb511922b7ed","serviceType":"INSTORE","orderTotal":7.82,"deliveryDate":"2025-10-24","orderDate":"2025-10-24"},{"basketId":"68f6319f400fca6119c9a313","orderId":"68f6319f400fca6119c9a313","serviceType":"INSTORE","orderTotal":24.75,"deliveryDate":"2025-10-19","orderDate":"2025-10-19"},{"basketId":"68ea50fde9542e44ec1cdb52","orderId":"68ea50fde9542e44ec1cdb52","serviceType":"INSTORE","orderTotal":5.04,"deliveryDate":"2025-10-10","orderDate":"2025-10-10"},{"basketId":"68e3b96588cb4f6da225f48f","orderId":"68e3b96588cb4f6da225f48f","serviceType":"INSTORE","orderTotal":96.15,"deliveryDate":"2025-10-05","orderDate":"2025-10-05"},{"basketId":"68d7d4b7e5c59b5b3365bf7e","orderId":"68d7d4b7e5c59b5b3365bf7e","serviceType":"INSTORE","orderTotal":3.02,"deliveryDate":"2025-09-26","orderDate":"2025-09-26"},{"basketId":"68c6b40ffffd8a423b89d919","orderId":"68c6b40ffffd8a423b89d919","serviceType":"INSTORE","orderTotal":16.33,"deliveryDate":"2025-09-13","orderDate":"2025-09-13"},{"basketId":"68becde1ccd2af698d3fe5b4","orderId":"68becde1ccd2af698d3fe5b4","serviceType":"INSTORE","orderTotal":74.62,"deliveryDate":"2025-09-07","orderDate":"2025-09-07"},{"basketId":"68a9ae8b96f36b5f5209f280","orderId":"68a9ae8b96f36b5f5209f280","serviceType":"INSTORE","orderTotal":48.44,"deliveryDate":"2025-08-22","orderDate":"2025-08-22"},{"basketId":"689dd4ae529a816cac118c1e","orderId":"689dd4ae529a816cac118c1e","serviceType":"INSTORE","orderTotal":27.92,"deliveryDate":"2025-08-13","orderDate":"2025-08-13"},{"basketId":"688a0bad30d67e273e4cd5e5","orderId":"688a0bad30d67e273e4cd5e5","serviceType":"INSTORE","orderTotal":24.71,"deliveryDate":"2025-07-29","orderDate":"2025-07-29"},{"basketId":"68876f8e79c6017b5046c26d","orderId":"68876f8e79c6017b5046c26d","serviceType":"INSTORE","orderTotal":43.06,"deliveryDate":"2025-07-27","orderDate":"2025-07-27"},{"basketId":"68838591bbab65024dec2f7d","orderId":"68838591bbab65024dec2f7d","serviceType":"INSTORE","orderTotal":56.82,"deliveryDate":"2025-07-24","orderDate":"2025-07-24"},{"basketId":"687f85ce785eda50ec23eb13","orderId":"687f85ce785eda50ec23eb13","serviceType":"INSTORE","orderTotal":29.95,"deliveryDate":"2025-07-21","orderDate":"2025-07-21"},{"basketId":"6873e7ca3f23b2027b9cf880","orderId":"6873e7ca3f23b2027b9cf880","serviceType":"INSTORE","orderTotal":9.07,"deliveryDate":"2025-07-12","orderDate":"2025-07-12"},{"basketId":"687253132f6978797925d210","orderId":"687253132f6978797925d210","serviceType":"INSTORE","orderTotal":35.01,"deliveryDate":"2025-07-11","orderDate":"2025-07-11"},{"basketId":"686bc56771b0fd0349db364c","orderId":"686bc56771b0fd0349db364c","serviceType":"INSTORE","orderTotal":19.60,"deliveryDate":"2025-07-06","orderDate":"2025-07-06"},{"basketId":"686132b3a545464a32dd7b74","orderId":"686132b3a545464a32dd7b74","serviceType":"INSTORE","orderTotal":59.64,"deliveryDate":"2025-06-28","orderDate":"2025-06-28"},{"basketId":"685d3e3308042c4229743f6f","orderId":"685d3e3308042c4229743f6f","serviceType":"INSTORE","orderTotal":25.15,"deliveryDate":"2025-06-25","orderDate":"2025-06-25"},{"basketId":"685a9ecc675d4a20066cc150","orderId":"685a9ecc675d4a20066cc150","serviceType":"INSTORE","orderTotal":31.92,"deliveryDate":"2025-06-23","orderDate":"2025-06-23"},{"basketId":"684c1ad5e14a1918b99d4f87","orderId":"684c1ad5e14a1918b99d4f87","serviceType":"INSTORE","orderTotal":22.59,"deliveryDate":"2025-06-12","orderDate":"2025-06-12"},{"basketId":"683c50c7416f053a312c71ec","orderId":"683c50c7416f053a312c71ec","serviceType":"INSTORE","orderTotal":70.69,"deliveryDate":"2025-05-31","orderDate":"2025-05-31"},{"basketId":"6835bbdb4b7fd514324c6625","orderId":"6835bbdb4b7fd514324c6625","serviceType":"INSTORE","orderTotal":49.42,"deliveryDate":"2025-05-26","orderDate":"2025-05-26"},{"basketId":"6829b9576722dc05e996eb93","orderId":"6829b9576722dc05e996eb93","serviceType":"INSTORE","orderTotal":28.90,"deliveryDate":"2025-05-17","orderDate":"2025-05-17"},{"basketId":"6828ac932d43c8022c6b7795","orderId":"6828ac932d43c8022c6b7795","serviceType":"INSTORE","orderTotal":3.82,"deliveryDate":"2025-05-16","orderDate":"2025-05-16"},{"basketId":"68091ab1cb64de5e1466b2d8","orderId":"68091ab1cb64de5e1466b2d8","serviceType":"INSTORE","orderTotal":36.39,"deliveryDate":"2025-04-22","orderDate":"2025-04-22"},{"basketId":"67f39a0bc173f12e98864767","orderId":"67f39a0bc173f12e98864767","serviceType":"INSTORE","orderTotal":102.09,"deliveryDate":"2025-04-06","orderDate":"2025-04-06"},{"basketId":"67e6826b61fb347f3c895ec3","orderId":"67e6826b61fb347f3c895ec3","serviceType":"INSTORE","orderTotal":19.07,"deliveryDate":"2025-03-27","orderDate":"2025-03-27"},{"basketId":"67cac77d33c90c3c56b0227c","orderId":"67cac77d33c90c3c56b0227c","serviceType":"INSTORE","orderTotal":32.57,"deliveryDate":"2025-03-06","orderDate":"2025-03-06"},{"basketId":"67bc4a871cd6946b8be5d859","orderId":"67bc4a871cd6946b8be5d859","serviceType":"INSTORE","orderTotal":68.26,"deliveryDate":"2025-02-23","orderDate":"2025-02-23"},{"basketId":"67b8577626e22977f5ecda93","orderId":"67b8577626e22977f5ecda93","serviceType":"INSTORE","orderTotal":42.50,"deliveryDate":"2025-02-20","orderDate":"2025-02-20"},{"basketId":"67b344d64e6e9b2d9e04e2f8","orderId":"67b344d64e6e9b2d9e04e2f8","serviceType":"INSTORE","orderTotal":117.15,"deliveryDate":"2025-02-16","orderDate":"2025-02-16"},{"basketId":"67ae06df56a2d07eb3a6c683","orderId":"67ae06df56a2d07eb3a6c683","serviceType":"INSTORE","orderTotal":45.99,"deliveryDate":"2025-02-12","orderDate":"2025-02-12"},{"basketId":"6797784df812645adee14db6","orderId":"6797784df812645adee14db6","serviceType":"INSTORE","orderTotal":26.07,"deliveryDate":"2025-01-26","orderDate":"2025-01-26"},{"basketId":"67977bd0b9238636c5509a76","orderId":"67977bd0b9238636c5509a76","serviceType":"INSTORE","orderTotal":26.58,"deliveryDate":"2025-01-26","orderDate":"2025-01-26"},{"basketId":"677e750eaa2144427592c538","orderId":"677e750eaa2144427592c538","serviceType":"INSTORE","orderTotal":70.87,"deliveryDate":"2025-01-07","orderDate":"2025-01-07"},{"basketId":"677a635849b7946a7745e64f","orderId":"677a635849b7946a7745e64f","serviceType":"INSTORE","orderTotal":29.07,"deliveryDate":"2025-01-04","orderDate":"2025-01-04"},{"basketId":"6776ce9595a5c5016152ec36","orderId":"6776ce9595a5c5016152ec36","serviceType":"INSTORE","orderTotal":29.93,"deliveryDate":"2025-01-01","orderDate":"2025-01-01"},{"basketId":"676aab23b2176e4610407543","orderId":"676aab23b2176e4610407543","serviceType":"INSTORE","orderTotal":35.31,"deliveryDate":"2024-12-23","orderDate":"2024-12-23"},{"basketId":"67619f5a9897e17ff4667587","orderId":"67619f5a9897e17ff4667587","serviceType":"INSTORE","orderTotal":115.99,"deliveryDate":"2024-12-16","orderDate":"2024-12-16"},{"basketId":"675c1afa7e39b315dfe964c9","orderId":"675c1afa7e39b315dfe964c9","serviceType":"INSTORE","orderTotal":40.01,"deliveryDate":"2024-12-12","orderDate":"2024-12-12"},{"basketId":"6754219b43b5730cdd275100","orderId":"6754219b43b5730cdd275100","serviceType":"INSTORE","orderTotal":103.44,"deliveryDate":"2024-12-06","orderDate":"2024-12-06"}]} diff --git a/scrape-click.py b/scrape-click.py new file mode 100644 index 0000000..fdf34dd --- /dev/null +++ b/scrape-click.py @@ -0,0 +1,254 @@ +import json +import time +from pathlib import Path + +import browser_cookie3 +import click +import pandas as pd +from curl_cffi import requests +from dotenv import load_dotenv +import os + + +BASE = "https://giantfood.com" +ACCOUNT_PAGE = f"{BASE}/account/history/invoice/in-store" + + +def load_config(): + load_dotenv() + return { + "user_id": os.getenv("GIANT_USER_ID", "").strip(), + "loyalty": os.getenv("GIANT_LOYALTY_NUMBER", "").strip(), + } + + +def build_session(): + s = requests.Session() + s.cookies.update(browser_cookie3.firefox(domain_name="giantfood.com")) + s.headers.update({ + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0", + "accept": "application/json, text/plain, */*", + "accept-language": "en-US,en;q=0.9", + "referer": ACCOUNT_PAGE, + }) + return s + + +def safe_get(session, url, **kwargs): + last_response = None + + for attempt in range(3): + try: + r = session.get( + url, + impersonate="firefox", + timeout=30, + **kwargs, + ) + last_response = r + + if r.status_code == 200: + return r + + click.echo(f"retry {attempt + 1}/3 status={r.status_code}") + except Exception as e: + click.echo(f"retry {attempt + 1}/3 error={e}") + + time.sleep(3) + + if last_response is not None: + last_response.raise_for_status() + + raise RuntimeError(f"failed to fetch {url}") + + +def get_history(session, user_id, loyalty): + url = f"{BASE}/api/v6.0/user/{user_id}/order/history" + r = safe_get( + session, + url, + params={ + "filter": "instore", + "loyaltyNumber": loyalty, + }, + ) + return r.json() + + +def get_order_detail(session, user_id, order_id): + url = f"{BASE}/api/v6.0/user/{user_id}/order/history/detail/{order_id}" + r = safe_get( + session, + url, + params={"isInStore": "true"}, + ) + return r.json() + + +def flatten_orders(history, details): + orders = [] + items = [] + + history_lookup = { + r["orderId"]: r + for r in history.get("records", []) + } + + for d in details: + hist = history_lookup.get(d["orderId"], {}) + pup = d.get("pup", {}) + + orders.append({ + "order_id": d["orderId"], + "order_date": d.get("orderDate"), + "delivery_date": d.get("deliveryDate"), + "service_type": hist.get("serviceType"), + "order_total": d.get("orderTotal"), + "payment_method": d.get("paymentMethod"), + "total_item_count": d.get("totalItemCount"), + "total_savings": d.get("totalSavings"), + "your_savings_total": d.get("yourSavingsTotal"), + "coupons_discounts_total": d.get("couponsDiscountsTotal"), + "store_name": pup.get("storeName"), + "store_number": pup.get("aholdStoreNumber"), + "store_address1": pup.get("storeAddress1"), + "store_city": pup.get("storeCity"), + "store_state": pup.get("storeState"), + "store_zipcode": pup.get("storeZipcode"), + "refund_order": d.get("refundOrder"), + "ebt_order": d.get("ebtOrder"), + }) + + for i, item in enumerate(d.get("items", []), start=1): + items.append({ + "order_id": d["orderId"], + "order_date": d.get("orderDate"), + "line_no": i, + "pod_id": item.get("podId"), + "item_name": item.get("itemName"), + "upc": item.get("primUpcCd"), + "category_id": item.get("categoryId"), + "category": item.get("categoryDesc"), + "qty": item.get("shipQy"), + "unit": item.get("lbEachCd"), + "unit_price": item.get("unitPrice"), + "line_total": item.get("groceryAmount"), + "picked_weight": item.get("totalPickedWeight"), + "mvp_savings": item.get("mvpSavings"), + "reward_savings": item.get("rewardSavings"), + "coupon_savings": item.get("couponSavings"), + "coupon_price": item.get("couponPrice"), + }) + + return pd.DataFrame(orders), pd.DataFrame(items) + + +def read_existing_order_ids(orders_csv: Path) -> set[str]: + if not orders_csv.exists(): + return set() + + try: + df = pd.read_csv(orders_csv, dtype={"order_id": str}) + if "order_id" not in df.columns: + return set() + return set(df["order_id"].dropna().astype(str)) + except Exception: + return set() + + +def append_dedup(existing_path: Path, new_df: pd.DataFrame, subset: list[str]) -> pd.DataFrame: + if existing_path.exists(): + old_df = pd.read_csv(existing_path, dtype=str) + combined = pd.concat([old_df, new_df.astype(str)], ignore_index=True) + else: + combined = new_df.astype(str).copy() + + combined = combined.drop_duplicates(subset=subset, keep="last") + combined.to_csv(existing_path, index=False) + return combined + + +@click.command() +@click.option("--user-id", default=None, help="giant user id") +@click.option("--loyalty", default=None, help="giant loyalty number") +@click.option("--outdir", default="giant_output", show_default=True, help="output directory") +@click.option("--sleep-seconds", default=1.5, show_default=True, type=float, help="delay between detail requests") +def main(user_id, loyalty, outdir, sleep_seconds): + cfg = load_config() + + user_id = user_id or cfg["user_id"] or click.prompt("giant user id", type=str) + loyalty = loyalty or cfg["loyalty"] or click.prompt("giant loyalty number", type=str) + + outdir = Path(outdir) + rawdir = outdir / "raw" + rawdir.mkdir(parents=True, exist_ok=True) + + orders_csv = outdir / "orders.csv" + items_csv = outdir / "items.csv" + + click.echo("using cookies from your current firefox profile.") + click.echo(f"open giant here, make sure you're logged in, then return: {ACCOUNT_PAGE}") + click.pause(info="press any key once giant is open and logged in") + + session = build_session() + + click.echo("fetching order history...") + history = get_history(session, user_id, loyalty) + + (rawdir / "history.json").write_text( + json.dumps(history, indent=2), + encoding="utf-8", + ) + + records = history.get("records", []) + click.echo(f"history returned {len(records)} visits") + click.echo("tip: giant appears to expose only the most recent 50 visits, so run this periodically if you want full continuity.") + + history_order_ids = [str(r["orderId"]) for r in records] + existing_order_ids = read_existing_order_ids(orders_csv) + new_order_ids = [oid for oid in history_order_ids if oid not in existing_order_ids] + + click.echo(f"existing orders in csv: {len(existing_order_ids)}") + click.echo(f"new orders to fetch: {len(new_order_ids)}") + + if not new_order_ids: + click.echo("no new orders found. done.") + return + + details = [] + for order_id in new_order_ids: + click.echo(f"fetching {order_id}") + d = get_order_detail(session, user_id, order_id) + details.append(d) + + (rawdir / f"{order_id}.json").write_text( + json.dumps(d, indent=2), + encoding="utf-8", + ) + + time.sleep(sleep_seconds) + + click.echo("flattening new data...") + orders_df, items_df = flatten_orders(history, details) + + orders_all = append_dedup( + orders_csv, + orders_df, + subset=["order_id"], + ) + + items_all = append_dedup( + items_csv, + items_df, + subset=["order_id", "line_no", "item_name", "upc", "line_total"], + ) + + click.echo("done") + click.echo(f"orders csv: {orders_csv}") + click.echo(f"items csv: {items_csv}") + click.echo(f"total orders stored: {len(orders_all)}") + click.echo(f"total item rows stored: {len(items_all)}") + + +if __name__ == "__main__": + main() diff --git a/scrape-giant.org b/scrape-giant.org deleted file mode 100644 index 30b637f..0000000 --- a/scrape-giant.org +++ /dev/null @@ -1,83 +0,0 @@ - - -* python setuyp -venv install playwright, pandas -playwright install -* item: -get: - /api/v6.0/user/369513017/order/history/detail/69a2e44a16be1142e74ad3cc - -headers: - request: -GET /api/v6.0/user/369513017/order/history/detail/69a2e44a16be1142e74ad3cc?isInStore=true HTTP/2 -Host: giantfood.com -User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0 -Accept: application/json, text/plain, */* -Accept-Language: en-US,en;q=0.9 -Accept-Encoding: gzip, deflate, br, zstd -DNT: 1 -Sec-GPC: 1 -Connection: keep-alive -Referer: https://giantfood.com/account/history/invoice/in-store -Cookie: datadome=rDtvd3J2hO5AeghJMSFRRxGc6ifKCQYgMLcqPNr9rWiz2rdcXb032AY6GIZn8tUmYB96BKKbzh3_jSjEzYWLj8hDjl3oGYYAiu4jwdaxpf3vh2v4f7KH7kbqgsMWpkjt; cf_clearance=WEPyQokx9f0qoyS4Svsw4EkZ1TYOxjOwcUHspT3.rXw-1773348940-1.2.1.1-fPvERGxBlFUaBW83sUppbUWpwvFG7mZivag5vBvZb3kxUQv2WSVIV1tON0HV2n8bkVY0U8_BBl62a00Np.oJylYQcGME540gZlYEoL.gMs4WynLqApFe5BOXAEwOm01_6h6b62H90bl4ypRehVb_TXEi4qHaPLVSZhjZK_h.fv6RBqjgYch2j_8XnHe5HXvLziVjl1k2aJskozqy04KOyeHyc3OyIPTZd5On_KAzFIM; dvrctk=MnjKJVShVraEtbrBkkxWxLaZrXnIGNQlwB7QtZVPFeA=; __cflb=0H28vXMLFyydRmDMNgcPHijM6auXkCspCkuh58tVuJ3; __cf_bm=C6QbqiEvbbwdrYBpoJOkcWcedf60vcOfPfTPPbZzKbM-1773348202-1.0.1.1-cSHoYwi8ZjIHTdBItXQP_iXJdRJS6FYjFsGdl1eGHvS5pgfbcT4Lg19P6UStX.bZz1u0OXiS5ykdipPBtwP6OvZr68k4XSmjYpir05jNLhw; _dd_s=rum=0&expire=1773349846445; ppdtk=Uog72CR22mD85C7U4iZHlgOQeRmvHEYp0OdQc+0lEes1c5/LeqGT+ZUlXpSC6FpW; cartId=3820547 -Sec-Fetch-Dest: empty -Sec-Fetch-Mode: cors -Sec-Fetch-Site: same-origin -Priority: u=0 -TE: trailers - - response: -HTTP/2 200 -date: Thu, 12 Mar 2026 20:55:47 GMT -content-type: application/json -server: cloudflare -cf-ray: 9db5b3a5d84aff28-IAD -cf-cache-status: DYNAMIC -content-encoding: gzip -set-cookie: datadome=MXMri0hss6PlQ0_oS7gG2iMdOKnNkbDmGvOxelgN~nCcupgkJQOqjcjcgdprIaI7hSlt_w8E9Ri_RAzPFrGqtUfqAJ_szB_aNZ2FdC26qmI3870Nn4~T0vtx8Gj3dEZR; Max-Age=31536000; Domain=.giantfood.com; Path=/; Secure; SameSite=Lax -strict-transport-security: max-age=31536000; includeSubDomains -vary: Origin, Access-Control-Request-Method, Access-Control-Request-Headers, accept-encoding -accept-ch: Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory -x-datadome: protected -request-context: appId=cid-v1:75750625-0c81-4f08-9f5d-ce4f73198e54 -X-Firefox-Spdy: h2 - -* history: -GET - https://giantfood.com/api/v6.0/user/369513017/order/history?filter=instore&loyaltyNumber=440155630880 - -headers: - request: -GET /api/v6.0/user/369513017/order/history?filter=instore&loyaltyNumber=440155630880 HTTP/2 -Host: giantfood.com -User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0 -Accept: application/json, text/plain, */* -Accept-Language: en-US,en;q=0.9 -Accept-Encoding: gzip, deflate, br, zstd -DNT: 1 -Sec-GPC: 1 -Connection: keep-alive -Referer: https://giantfood.com/account/history/invoice/in-store -Cookie: datadome=OH2XjtCoI6XjE3Qsz_b0F1YULKLatAC0Ea~VMeDGBP0N9Z~CeI3RqEbvkGmNW_VCOU~vRb6p0kqibvF2tLbWnzyAGIdO7jsC41KiYbp7USpJDnefZhIg0e1ypAugvDSw; cf_clearance=WEPyQokx9f0qoyS4Svsw4EkZ1TYOxjOwcUHspT3.rXw-1773348940-1.2.1.1-fPvERGxBlFUaBW83sUppbUWpwvFG7mZivag5vBvZb3kxUQv2WSVIV1tON0HV2n8bkVY0U8_BBl62a00Np.oJylYQcGME540gZlYEoL.gMs4WynLqApFe5BOXAEwOm01_6h6b62H90bl4ypRehVb_TXEi4qHaPLVSZhjZK_h.fv6RBqjgYch2j_8XnHe5HXvLziVjl1k2aJskozqy04KOyeHyc3OyIPTZd5On_KAzFIM; dvrctk=MnjKJVShVraEtbrBkkxWxLaZrXnIGNQlwB7QtZVPFeA=; __cflb=0H28vXMLFyydRmDMNgcPHijM6auXkCspCkuh58tVuJ3; __cf_bm=C6QbqiEvbbwdrYBpoJOkcWcedf60vcOfPfTPPbZzKbM-1773348202-1.0.1.1-cSHoYwi8ZjIHTdBItXQP_iXJdRJS6FYjFsGdl1eGHvS5pgfbcT4Lg19P6UStX.bZz1u0OXiS5ykdipPBtwP6OvZr68k4XSmjYpir05jNLhw; _dd_s=rum=0&expire=1773349842848; ppdtk=Uog72CR22mD85C7U4iZHlgOQeRmvHEYp0OdQc+0lEes1c5/LeqGT+ZUlXpSC6FpW; cartId=3820547 -Sec-Fetch-Dest: empty -Sec-Fetch-Mode: cors -Sec-Fetch-Site: same-origin -Priority: u=0 -TE: trailers - - - response: - HTTP/2 200 -date: Thu, 12 Mar 2026 20:55:43 GMT -content-type: application/json -server: cloudflare -cf-ray: 9db5b38f7eebff28-IAD -cf-cache-status: DYNAMIC -content-encoding: gzip -set-cookie: datadome=rDtvd3J2hO5AeghJMSFRRxGc6ifKCQYgMLcqPNr9rWiz2rdcXb032AY6GIZn8tUmYB96BKKbzh3_jSjEzYWLj8hDjl3oGYYAiu4jwdaxpf3vh2v4f7KH7kbqgsMWpkjt; Max-Age=31536000; Domain=.giantfood.com; Path=/; Secure; SameSite=Lax -strict-transport-security: max-age=31536000; includeSubDomains -vary: Origin, Access-Control-Request-Method, Access-Control-Request-Headers, accept-encoding -accept-ch: Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory -x-datadome: protected -request-context: appId=cid-v1:75750625-0c81-4f08-9f5d-ce4f73198e54 -X-Firefox-Spdy: h2 diff --git a/scraper.py b/scraper.py new file mode 100644 index 0000000..fe7c57f --- /dev/null +++ b/scraper.py @@ -0,0 +1,181 @@ +import json +import time +from pathlib import Path + +import browser_cookie3 +import pandas as pd +from curl_cffi import requests + + +BASE = "https://giantfood.com" +ACCOUNT_PAGE = f"{BASE}/account/history/invoice/in-store" + +USER_ID = "369513017" +LOYALTY = "440155630880" + + +def build_session(): + s = requests.Session() + s.cookies.update(browser_cookie3.firefox(domain_name="giantfood.com")) + s.headers.update({ + "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0", + "accept": "application/json, text/plain, */*", + "accept-language": "en-US,en;q=0.9", + "referer": ACCOUNT_PAGE, + }) + return s + + +def safe_get(session, url, **kwargs): + last_response = None + + for attempt in range(3): + try: + r = session.get( + url, + impersonate="firefox", + timeout=30, + **kwargs, + ) + last_response = r + + if r.status_code == 200: + return r + + print(f"retry {attempt + 1}/3 status={r.status_code}") + except Exception as e: + print(f"retry {attempt + 1}/3 error={e}") + + time.sleep(3) + + if last_response is not None: + last_response.raise_for_status() + + raise RuntimeError(f"failed to fetch {url}") + + +def get_history(session): + url = f"{BASE}/api/v6.0/user/{USER_ID}/order/history" + r = safe_get( + session, + url, + params={ + "filter": "instore", + "loyaltyNumber": LOYALTY, + }, + ) + return r.json() + + +def get_order_detail(session, order_id): + url = f"{BASE}/api/v6.0/user/{USER_ID}/order/history/detail/{order_id}" + r = safe_get( + session, + url, + params={"isInStore": "true"}, + ) + return r.json() + + +def flatten_orders(history, details): + orders = [] + items = [] + + history_lookup = { + r["orderId"]: r + for r in history.get("records", []) + } + + for d in details: + hist = history_lookup.get(d["orderId"], {}) + pup = d.get("pup", {}) + + orders.append({ + "order_id": d["orderId"], + "order_date": d.get("orderDate"), + "delivery_date": d.get("deliveryDate"), + "service_type": hist.get("serviceType"), + "order_total": d.get("orderTotal"), + "payment_method": d.get("paymentMethod"), + "total_item_count": d.get("totalItemCount"), + "total_savings": d.get("totalSavings"), + "your_savings_total": d.get("yourSavingsTotal"), + "coupons_discounts_total": d.get("couponsDiscountsTotal"), + "store_name": pup.get("storeName"), + "store_number": pup.get("aholdStoreNumber"), + "store_address1": pup.get("storeAddress1"), + "store_city": pup.get("storeCity"), + "store_state": pup.get("storeState"), + "store_zipcode": pup.get("storeZipcode"), + "refund_order": d.get("refundOrder"), + "ebt_order": d.get("ebtOrder"), + }) + + for i, item in enumerate(d.get("items", []), start=1): + items.append({ + "order_id": d["orderId"], + "order_date": d.get("orderDate"), + "line_no": i, + "pod_id": item.get("podId"), + "item_name": item.get("itemName"), + "upc": item.get("primUpcCd"), + "category_id": item.get("categoryId"), + "category": item.get("categoryDesc"), + "qty": item.get("shipQy"), + "unit": item.get("lbEachCd"), + "unit_price": item.get("unitPrice"), + "line_total": item.get("groceryAmount"), + "picked_weight": item.get("totalPickedWeight"), + "mvp_savings": item.get("mvpSavings"), + "reward_savings": item.get("rewardSavings"), + "coupon_savings": item.get("couponSavings"), + "coupon_price": item.get("couponPrice"), + }) + + return pd.DataFrame(orders), pd.DataFrame(items) + + +def main(): + outdir = Path("giant_output") + rawdir = outdir / "raw" + rawdir.mkdir(parents=True, exist_ok=True) + + session = build_session() + + print("fetching order history...") + history = get_history(session) + + (rawdir / "history.json").write_text( + json.dumps(history, indent=2), + encoding="utf-8", + ) + + order_ids = [r["orderId"] for r in history.get("records", [])] + print(f"{len(order_ids)} orders found") + + details = [] + for order_id in order_ids: + print(f"fetching {order_id}") + d = get_order_detail(session, order_id) + details.append(d) + + (rawdir / f"{order_id}.json").write_text( + json.dumps(d, indent=2), + encoding="utf-8", + ) + + time.sleep(1.5) + + print("flattening data...") + orders_df, items_df = flatten_orders(history, details) + + orders_df.to_csv(outdir / "orders.csv", index=False) + items_df.to_csv(outdir / "items.csv", index=False) + + print("done") + print(f"{len(orders_df)} orders written to {outdir / 'orders.csv'}") + print(f"{len(items_df)} items written to {outdir / 'items.csv'}") + + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/site/My Account _ Giant.htm b/site/My Account _ Giant.htm deleted file mode 100644 index 8c5b48b..0000000 --- a/site/My Account _ Giant.htm +++ /dev/null @@ -1,273 +0,0 @@ - - -
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -