initial commit

This commit is contained in:
2026-03-12 17:18:22 -04:00
commit f264c54966
8 changed files with 825 additions and 0 deletions

32
.gitignore vendored Normal file
View File

@@ -0,0 +1,32 @@
# --- python bytecode ---
__pycache__/
*.py[cod]
*$py.class
# --- virtual environments ---
.venv/
venv/
env/
# --- environment files ---
.env
.env.*
*.local
# --- emacs ---
*~
\#*\#
.\#*
*.elc
# --- project private data ---
/private/
/pm/
# --- django ---
db.sqlite3
staticfiles/
media/
# --- misc ---
.DS_Store

1
history.json Normal file

File diff suppressed because one or more lines are too long

BIN
requirements.txt Normal file

Binary file not shown.

83
scrape-giant.org Normal file
View File

@@ -0,0 +1,83 @@
* python setuyp
venv install playwright, pandas
playwright install
* item:
get:
/api/v6.0/user/369513017/order/history/detail/69a2e44a16be1142e74ad3cc
headers:
request:
GET /api/v6.0/user/369513017/order/history/detail/69a2e44a16be1142e74ad3cc?isInStore=true HTTP/2
Host: giantfood.com
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0
Accept: application/json, text/plain, */*
Accept-Language: en-US,en;q=0.9
Accept-Encoding: gzip, deflate, br, zstd
DNT: 1
Sec-GPC: 1
Connection: keep-alive
Referer: https://giantfood.com/account/history/invoice/in-store
Cookie: datadome=rDtvd3J2hO5AeghJMSFRRxGc6ifKCQYgMLcqPNr9rWiz2rdcXb032AY6GIZn8tUmYB96BKKbzh3_jSjEzYWLj8hDjl3oGYYAiu4jwdaxpf3vh2v4f7KH7kbqgsMWpkjt; cf_clearance=WEPyQokx9f0qoyS4Svsw4EkZ1TYOxjOwcUHspT3.rXw-1773348940-1.2.1.1-fPvERGxBlFUaBW83sUppbUWpwvFG7mZivag5vBvZb3kxUQv2WSVIV1tON0HV2n8bkVY0U8_BBl62a00Np.oJylYQcGME540gZlYEoL.gMs4WynLqApFe5BOXAEwOm01_6h6b62H90bl4ypRehVb_TXEi4qHaPLVSZhjZK_h.fv6RBqjgYch2j_8XnHe5HXvLziVjl1k2aJskozqy04KOyeHyc3OyIPTZd5On_KAzFIM; dvrctk=MnjKJVShVraEtbrBkkxWxLaZrXnIGNQlwB7QtZVPFeA=; __cflb=0H28vXMLFyydRmDMNgcPHijM6auXkCspCkuh58tVuJ3; __cf_bm=C6QbqiEvbbwdrYBpoJOkcWcedf60vcOfPfTPPbZzKbM-1773348202-1.0.1.1-cSHoYwi8ZjIHTdBItXQP_iXJdRJS6FYjFsGdl1eGHvS5pgfbcT4Lg19P6UStX.bZz1u0OXiS5ykdipPBtwP6OvZr68k4XSmjYpir05jNLhw; _dd_s=rum=0&expire=1773349846445; ppdtk=Uog72CR22mD85C7U4iZHlgOQeRmvHEYp0OdQc+0lEes1c5/LeqGT+ZUlXpSC6FpW; cartId=3820547
Sec-Fetch-Dest: empty
Sec-Fetch-Mode: cors
Sec-Fetch-Site: same-origin
Priority: u=0
TE: trailers
response:
HTTP/2 200
date: Thu, 12 Mar 2026 20:55:47 GMT
content-type: application/json
server: cloudflare
cf-ray: 9db5b3a5d84aff28-IAD
cf-cache-status: DYNAMIC
content-encoding: gzip
set-cookie: datadome=MXMri0hss6PlQ0_oS7gG2iMdOKnNkbDmGvOxelgN~nCcupgkJQOqjcjcgdprIaI7hSlt_w8E9Ri_RAzPFrGqtUfqAJ_szB_aNZ2FdC26qmI3870Nn4~T0vtx8Gj3dEZR; Max-Age=31536000; Domain=.giantfood.com; Path=/; Secure; SameSite=Lax
strict-transport-security: max-age=31536000; includeSubDomains
vary: Origin, Access-Control-Request-Method, Access-Control-Request-Headers, accept-encoding
accept-ch: Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
x-datadome: protected
request-context: appId=cid-v1:75750625-0c81-4f08-9f5d-ce4f73198e54
X-Firefox-Spdy: h2
* history:
GET
https://giantfood.com/api/v6.0/user/369513017/order/history?filter=instore&loyaltyNumber=440155630880
headers:
request:
GET /api/v6.0/user/369513017/order/history?filter=instore&loyaltyNumber=440155630880 HTTP/2
Host: giantfood.com
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0
Accept: application/json, text/plain, */*
Accept-Language: en-US,en;q=0.9
Accept-Encoding: gzip, deflate, br, zstd
DNT: 1
Sec-GPC: 1
Connection: keep-alive
Referer: https://giantfood.com/account/history/invoice/in-store
Cookie: datadome=OH2XjtCoI6XjE3Qsz_b0F1YULKLatAC0Ea~VMeDGBP0N9Z~CeI3RqEbvkGmNW_VCOU~vRb6p0kqibvF2tLbWnzyAGIdO7jsC41KiYbp7USpJDnefZhIg0e1ypAugvDSw; cf_clearance=WEPyQokx9f0qoyS4Svsw4EkZ1TYOxjOwcUHspT3.rXw-1773348940-1.2.1.1-fPvERGxBlFUaBW83sUppbUWpwvFG7mZivag5vBvZb3kxUQv2WSVIV1tON0HV2n8bkVY0U8_BBl62a00Np.oJylYQcGME540gZlYEoL.gMs4WynLqApFe5BOXAEwOm01_6h6b62H90bl4ypRehVb_TXEi4qHaPLVSZhjZK_h.fv6RBqjgYch2j_8XnHe5HXvLziVjl1k2aJskozqy04KOyeHyc3OyIPTZd5On_KAzFIM; dvrctk=MnjKJVShVraEtbrBkkxWxLaZrXnIGNQlwB7QtZVPFeA=; __cflb=0H28vXMLFyydRmDMNgcPHijM6auXkCspCkuh58tVuJ3; __cf_bm=C6QbqiEvbbwdrYBpoJOkcWcedf60vcOfPfTPPbZzKbM-1773348202-1.0.1.1-cSHoYwi8ZjIHTdBItXQP_iXJdRJS6FYjFsGdl1eGHvS5pgfbcT4Lg19P6UStX.bZz1u0OXiS5ykdipPBtwP6OvZr68k4XSmjYpir05jNLhw; _dd_s=rum=0&expire=1773349842848; ppdtk=Uog72CR22mD85C7U4iZHlgOQeRmvHEYp0OdQc+0lEes1c5/LeqGT+ZUlXpSC6FpW; cartId=3820547
Sec-Fetch-Dest: empty
Sec-Fetch-Mode: cors
Sec-Fetch-Site: same-origin
Priority: u=0
TE: trailers
response:
HTTP/2 200
date: Thu, 12 Mar 2026 20:55:43 GMT
content-type: application/json
server: cloudflare
cf-ray: 9db5b38f7eebff28-IAD
cf-cache-status: DYNAMIC
content-encoding: gzip
set-cookie: datadome=rDtvd3J2hO5AeghJMSFRRxGc6ifKCQYgMLcqPNr9rWiz2rdcXb032AY6GIZn8tUmYB96BKKbzh3_jSjEzYWLj8hDjl3oGYYAiu4jwdaxpf3vh2v4f7KH7kbqgsMWpkjt; Max-Age=31536000; Domain=.giantfood.com; Path=/; Secure; SameSite=Lax
strict-transport-security: max-age=31536000; includeSubDomains
vary: Origin, Access-Control-Request-Method, Access-Control-Request-Headers, accept-encoding
accept-ch: Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
x-datadome: protected
request-context: appId=cid-v1:75750625-0c81-4f08-9f5d-ce4f73198e54
X-Firefox-Spdy: h2

273
site/My Account _ Giant.htm Normal file

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

1
site/order1.json Normal file
View File

@@ -0,0 +1 @@
{"orderId":"69a2e44a16be1142e74ad3cc","orderDate":"2026-02-27","deliveryDate":"2026-02-27","orderTotal":27.31,"totalItemCount":20,"totalSavings":5.02,"yourSavingsTotal":5.02,"couponsDiscountsTotal":0.00,"deliveryMethod":"INSTORE","loyaltyNumber":44015563088,"paymentMethod":"CASH","items":[{"podId":0,"shipQy":3,"totalPickedWeight":0.0000,"unitPrice":0.67,"itemName":"+LEMONS","lbEachCd":"EA","groceryAmount":2.01,"primUpcCd":"4053","mvpSavings":0.00,"rewardSavings":0.00,"couponSavings":0.00,"couponPrice":0.00,"categoryId":"1","categoryDesc":"Grocery","image":{}},{"podId":0,"shipQy":2,"totalPickedWeight":0.0000,"unitPrice":2.19,"itemName":"SB SPCY BRWN MUSTARD12Z","lbEachCd":"EA","groceryAmount":4.38,"primUpcCd":"68826707533","mvpSavings":0.40,"rewardSavings":0.00,"couponSavings":0.00,"couponPrice":0.00,"categoryId":"1","categoryDesc":"Grocery","image":{}},{"podId":310343,"shipQy":2,"totalPickedWeight":0.0000,"unitPrice":0.05,"itemName":"GL BAG CHARGE","lbEachCd":"EA","groceryAmount":0.10,"primUpcCd":"81244","mvpSavings":0.00,"rewardSavings":0.00,"couponSavings":0.00,"couponPrice":0.00,"categoryId":"1","categoryDesc":"Grocery","image":{}},{"podId":793,"shipQy":1,"totalPickedWeight":0.0000,"unitPrice":3.49,"itemName":"PF CHEDDAR GLD FISH 6.6","lbEachCd":"EA","groceryAmount":3.49,"primUpcCd":"1410008547","mvpSavings":0.50,"rewardSavings":0.00,"couponSavings":0.00,"couponPrice":0.00,"categoryId":"1","categoryDesc":"Grocery","image":{"small":"https://i5.peapod.com/c/GD/GD6CA.png","medium":"https://i5.peapod.com/c/VH/VH5NX.png","large":"https://i5.peapod.com/c/K4/K4CCI.png","xlarge":"https://i5.peapod.com/c/UX/UXQM7.png"}},{"podId":396304,"shipQy":6,"totalPickedWeight":0.0000,"unitPrice":1.89,"itemName":"SB CHPD SPINACH 12Z","lbEachCd":"EA","groceryAmount":11.34,"primUpcCd":"68826759629","mvpSavings":2.34,"rewardSavings":0.00,"couponSavings":0.00,"couponPrice":0.00,"categoryId":"1","categoryDesc":"Grocery","image":{"small":"https://i5.peapod.com/c/43/43N4N.png","medium":"https://i5.peapod.com/c/GU/GUJ3V.png","large":"https://i5.peapod.com/c/GD/GDWLE.png","xlarge":"https://i5.peapod.com/c/9O/9OOJY.png"}},{"podId":187420,"shipQy":4,"totalPickedWeight":0.0000,"unitPrice":0.99,"itemName":"SB PINTO BEANS 15.5Z","lbEachCd":"EA","groceryAmount":3.96,"primUpcCd":"68826702790","mvpSavings":0.00,"rewardSavings":0.00,"couponSavings":0.00,"couponPrice":0.00,"categoryId":"1","categoryDesc":"Grocery","image":{"small":"https://i5.peapod.com/c/X9/X9AQ8.png","medium":"https://i5.peapod.com/c/KM/KMSFG.png","large":"https://i5.peapod.com/c/0R/0RM8A.png","xlarge":"https://i5.peapod.com/c/JI/JI0DP.png"}},{"podId":153510,"shipQy":2,"totalPickedWeight":0.0000,"unitPrice":3.39,"itemName":"LA BND FJT TRT 10CT11.3","lbEachCd":"EA","groceryAmount":6.78,"primUpcCd":"2733100035","mvpSavings":1.78,"rewardSavings":0.00,"couponSavings":0.00,"couponPrice":0.00,"categoryId":"1","categoryDesc":"Grocery","image":{"small":"https://i5.peapod.com/c/SO/SO2X8.png","medium":"https://i5.peapod.com/c/QI/QITJ2.png","large":"https://i5.peapod.com/c/J7/J7KRF.png","xlarge":"https://i5.peapod.com/c/MF/MFSYF.png"}}],"pup":{"storeName":"Giant Food","aholdStoreNumber":760,"storeAddress1":"6800 RICHMOND HIGHWAY","storeCity":"ALEXANDRIA","storeState":"VA","storeZipcode":"22306"},"couponsAndDiscounts":[],"orderDetails":[{"itemCd":null,"itemCdDesc":"Subtotal","priceQy":27.04},{"itemCd":null,"itemCdDesc":"Tax","priceQy":0.27}],"weeklySpecials":[{"podId":0,"itemDesc":"SB SPCY BRWN MUSTARD12Z","itemSavings":0.40},{"podId":793,"itemDesc":"PF CHEDDAR GLD FISH 6.6","itemSavings":0.50},{"podId":396304,"itemDesc":"SB CHPD SPINACH 12Z","itemSavings":2.34},{"podId":153510,"itemDesc":"LA BND FJT TRT 10CT11.3","itemSavings":1.78}],"refundOrder":false,"ebtOrder":false}

162
site/scrape-giant.py Normal file
View File

@@ -0,0 +1,162 @@
import json
import time
from pathlib import Path
import pandas as pd
import requests
from playwright.sync_api import sync_playwright
BASE = "https://giantfood.com"
ACCOUNT_HISTORY_URL = f"{BASE}/account/history/invoice/in-store"
def build_session_via_playwright(headless=False):
with sync_playwright() as p:
browser = p.firefox.launch(headless=headless)
page = browser.new_page()
page.goto(ACCOUNT_HISTORY_URL, wait_until="networkidle")
print("log in in the browser window, then press enter here...")
input()
cookies = page.context.cookies()
ua = page.evaluate("() => navigator.userAgent")
browser.close()
s = requests.Session()
s.headers.update({
"user-agent": ua,
"accept": "application/json, text/plain, */*",
"referer": ACCOUNT_HISTORY_URL,
})
for c in cookies:
# requests wants host-only-ish handling; stripping leading dot is usually fine
domain = c.get("domain", "").lstrip(".") or "giantfood.com"
s.cookies.set(
c["name"],
c["value"],
domain=domain,
path=c.get("path", "/"),
)
return s
def get_history(session, user_id, loyalty_number, filter_="instore"):
url = f"{BASE}/api/v6.0/user/{user_id}/order/history"
params = {
"filter": filter_,
"loyaltyNumber": loyalty_number,
}
r = session.get(url, params=params, timeout=30)
r.raise_for_status()
return r.json()
def get_order_detail(session, user_id, order_id, is_instore=True):
url = f"{BASE}/api/v6.0/user/{user_id}/order/history/detail/{order_id}"
params = {"isInStore": str(is_instore).lower()}
r = session.get(url, params=params, timeout=30)
r.raise_for_status()
return r.json()
def flatten_orders(history_json, details_jsons):
orders_rows = []
items_rows = []
history_records = {
rec["orderId"]: rec
for rec in history_json.get("records", [])
}
for d in details_jsons:
order_id = d["orderId"]
hist = history_records.get(order_id, {})
pup = d.get("pup", {})
orders_rows.append({
"order_id": order_id,
"order_date": d.get("orderDate"),
"delivery_date": d.get("deliveryDate"),
"service_type": hist.get("serviceType"),
"payment_method": d.get("paymentMethod"),
"order_total": d.get("orderTotal"),
"total_item_count": d.get("totalItemCount"),
"total_savings": d.get("totalSavings"),
"your_savings_total": d.get("yourSavingsTotal"),
"coupons_discounts_total": d.get("couponsDiscountsTotal"),
"store_name": pup.get("storeName"),
"store_number": pup.get("aholdStoreNumber"),
"store_address1": pup.get("storeAddress1"),
"store_city": pup.get("storeCity"),
"store_state": pup.get("storeState"),
"store_zipcode": pup.get("storeZipcode"),
"refund_order": d.get("refundOrder"),
"ebt_order": d.get("ebtOrder"),
})
for idx, item in enumerate(d.get("items", []), start=1):
items_rows.append({
"order_id": order_id,
"order_date": d.get("orderDate"),
"line_no": idx,
"pod_id": item.get("podId"),
"upc": item.get("primUpcCd"),
"item_name": item.get("itemName"),
"category_id": item.get("categoryId"),
"category_desc": item.get("categoryDesc"),
"qty": item.get("shipQy"),
"unit": item.get("lbEachCd"),
"unit_price": item.get("unitPrice"),
"grocery_amount": item.get("groceryAmount"),
"picked_weight": item.get("totalPickedWeight"),
"mvp_savings": item.get("mvpSavings"),
"reward_savings": item.get("rewardSavings"),
"coupon_savings": item.get("couponSavings"),
"coupon_price": item.get("couponPrice"),
})
return pd.DataFrame(orders_rows), pd.DataFrame(items_rows)
def main():
user_id = "369513017" # move to config/env later
loyalty_number = "440155630880" # move to config/env later
outdir = Path("giant_output")
rawdir = outdir / "raw"
rawdir.mkdir(parents=True, exist_ok=True)
session = build_session_via_playwright(headless=False)
history = get_history(session, user_id, loyalty_number)
(rawdir / "history.json").write_text(json.dumps(history, indent=2), encoding="utf-8")
details = []
for rec in history.get("records", []):
order_id = rec["orderId"]
print("fetching", order_id)
detail = get_order_detail(session, user_id, order_id, is_instore=True)
details.append(detail)
(rawdir / f"{order_id}.json").write_text(json.dumps(detail, indent=2), encoding="utf-8")
time.sleep(1.5) # dont be a goblin
orders_df, items_df = flatten_orders(history, details)
orders_df.to_csv(outdir / "orders.csv", index=False)
items_df.to_csv(outdir / "items.csv", index=False)
# nice optional local db
import sqlite3
conn = sqlite3.connect(outdir / "giant.sqlite")
orders_df.to_sql("orders", conn, if_exists="replace", index=False)
items_df.to_sql("items", conn, if_exists="replace", index=False)
conn.close()
print(f"wrote {len(orders_df)} orders and {len(items_df)} items to {outdir}")
if __name__ == "__main__":
main()