108 lines
5.0 KiB
Org Mode
108 lines
5.0 KiB
Org Mode
* python setup
|
|
venv install playwright, pandas
|
|
playwright install
|
|
1. scrape - raw giant json
|
|
2. enrich -
|
|
cols:
|
|
item_name_norm
|
|
brand_guess
|
|
size_value
|
|
size_unit
|
|
pack_qty
|
|
variant
|
|
is_store_brand
|
|
is_fee
|
|
measure_type
|
|
price_per_lb
|
|
price_per_oz
|
|
price_per_each
|
|
image_url
|
|
|
|
normalize abbreviationsta
|
|
extract size like 12z, 10ct, 5lb
|
|
detect fees like bag charges
|
|
infer whether something is sold by each vs weight
|
|
carry forward image url
|
|
|
|
3. build observed-product atble from enriched items
|
|
|
|
|
|
* item:
|
|
get:
|
|
/api/v6.0/user/369513017/order/history/detail/69a2e44a16be1142e74ad3cc
|
|
|
|
headers:
|
|
request:
|
|
GET /api/v6.0/user/369513017/order/history/detail/69a2e44a16be1142e74ad3cc?isInStore=true HTTP/2
|
|
Host: giantfood.com
|
|
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0
|
|
Accept: application/json, text/plain, */*
|
|
Accept-Language: en-US,en;q=0.9
|
|
Accept-Encoding: gzip, deflate, br, zstd
|
|
DNT: 1
|
|
Sec-GPC: 1
|
|
Connection: keep-alive
|
|
Referer: https://giantfood.com/account/history/invoice/in-store
|
|
Cookie: datadome=rDtvd3J2hO5AeghJMSFRRxGc6ifKCQYgMLcqPNr9rWiz2rdcXb032AY6GIZn8tUmYB96BKKbzh3_jSjEzYWLj8hDjl3oGYYAiu4jwdaxpf3vh2v4f7KH7kbqgsMWpkjt; cf_clearance=WEPyQokx9f0qoyS4Svsw4EkZ1TYOxjOwcUHspT3.rXw-1773348940-1.2.1.1-fPvERGxBlFUaBW83sUppbUWpwvFG7mZivag5vBvZb3kxUQv2WSVIV1tON0HV2n8bkVY0U8_BBl62a00Np.oJylYQcGME540gZlYEoL.gMs4WynLqApFe5BOXAEwOm01_6h6b62H90bl4ypRehVb_TXEi4qHaPLVSZhjZK_h.fv6RBqjgYch2j_8XnHe5HXvLziVjl1k2aJskozqy04KOyeHyc3OyIPTZd5On_KAzFIM; dvrctk=MnjKJVShVraEtbrBkkxWxLaZrXnIGNQlwB7QtZVPFeA=; __cflb=0H28vXMLFyydRmDMNgcPHijM6auXkCspCkuh58tVuJ3; __cf_bm=C6QbqiEvbbwdrYBpoJOkcWcedf60vcOfPfTPPbZzKbM-1773348202-1.0.1.1-cSHoYwi8ZjIHTdBItXQP_iXJdRJS6FYjFsGdl1eGHvS5pgfbcT4Lg19P6UStX.bZz1u0OXiS5ykdipPBtwP6OvZr68k4XSmjYpir05jNLhw; _dd_s=rum=0&expire=1773349846445; ppdtk=Uog72CR22mD85C7U4iZHlgOQeRmvHEYp0OdQc+0lEes1c5/LeqGT+ZUlXpSC6FpW; cartId=3820547
|
|
Sec-Fetch-Dest: empty
|
|
Sec-Fetch-Mode: cors
|
|
Sec-Fetch-Site: same-origin
|
|
Priority: u=0
|
|
TE: trailers
|
|
|
|
response:
|
|
HTTP/2 200
|
|
date: Thu, 12 Mar 2026 20:55:47 GMT
|
|
content-type: application/json
|
|
server: cloudflare
|
|
cf-ray: 9db5b3a5d84aff28-IAD
|
|
cf-cache-status: DYNAMIC
|
|
content-encoding: gzip
|
|
set-cookie: datadome=MXMri0hss6PlQ0_oS7gG2iMdOKnNkbDmGvOxelgN~nCcupgkJQOqjcjcgdprIaI7hSlt_w8E9Ri_RAzPFrGqtUfqAJ_szB_aNZ2FdC26qmI3870Nn4~T0vtx8Gj3dEZR; Max-Age=31536000; Domain=.giantfood.com; Path=/; Secure; SameSite=Lax
|
|
strict-transport-security: max-age=31536000; includeSubDomains
|
|
vary: Origin, Access-Control-Request-Method, Access-Control-Request-Headers, accept-encoding
|
|
accept-ch: Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
|
x-datadome: protected
|
|
request-context: appId=cid-v1:75750625-0c81-4f08-9f5d-ce4f73198e54
|
|
X-Firefox-Spdy: h2
|
|
|
|
* history:
|
|
GET
|
|
https://giantfood.com/api/v6.0/user/369513017/order/history?filter=instore&loyaltyNumber=440155630880
|
|
|
|
headers:
|
|
request:
|
|
GET /api/v6.0/user/369513017/order/history?filter=instore&loyaltyNumber=440155630880 HTTP/2
|
|
Host: giantfood.com
|
|
User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:148.0) Gecko/20100101 Firefox/148.0
|
|
Accept: application/json, text/plain, */*
|
|
Accept-Language: en-US,en;q=0.9
|
|
Accept-Encoding: gzip, deflate, br, zstd
|
|
DNT: 1
|
|
Sec-GPC: 1
|
|
Connection: keep-alive
|
|
Referer: https://giantfood.com/account/history/invoice/in-store
|
|
Cookie: datadome=OH2XjtCoI6XjE3Qsz_b0F1YULKLatAC0Ea~VMeDGBP0N9Z~CeI3RqEbvkGmNW_VCOU~vRb6p0kqibvF2tLbWnzyAGIdO7jsC41KiYbp7USpJDnefZhIg0e1ypAugvDSw; cf_clearance=WEPyQokx9f0qoyS4Svsw4EkZ1TYOxjOwcUHspT3.rXw-1773348940-1.2.1.1-fPvERGxBlFUaBW83sUppbUWpwvFG7mZivag5vBvZb3kxUQv2WSVIV1tON0HV2n8bkVY0U8_BBl62a00Np.oJylYQcGME540gZlYEoL.gMs4WynLqApFe5BOXAEwOm01_6h6b62H90bl4ypRehVb_TXEi4qHaPLVSZhjZK_h.fv6RBqjgYch2j_8XnHe5HXvLziVjl1k2aJskozqy04KOyeHyc3OyIPTZd5On_KAzFIM; dvrctk=MnjKJVShVraEtbrBkkxWxLaZrXnIGNQlwB7QtZVPFeA=; __cflb=0H28vXMLFyydRmDMNgcPHijM6auXkCspCkuh58tVuJ3; __cf_bm=C6QbqiEvbbwdrYBpoJOkcWcedf60vcOfPfTPPbZzKbM-1773348202-1.0.1.1-cSHoYwi8ZjIHTdBItXQP_iXJdRJS6FYjFsGdl1eGHvS5pgfbcT4Lg19P6UStX.bZz1u0OXiS5ykdipPBtwP6OvZr68k4XSmjYpir05jNLhw; _dd_s=rum=0&expire=1773349842848; ppdtk=Uog72CR22mD85C7U4iZHlgOQeRmvHEYp0OdQc+0lEes1c5/LeqGT+ZUlXpSC6FpW; cartId=3820547
|
|
Sec-Fetch-Dest: empty
|
|
Sec-Fetch-Mode: cors
|
|
Sec-Fetch-Site: same-origin
|
|
Priority: u=0
|
|
TE: trailers
|
|
|
|
|
|
response:
|
|
HTTP/2 200
|
|
date: Thu, 12 Mar 2026 20:55:43 GMT
|
|
content-type: application/json
|
|
server: cloudflare
|
|
cf-ray: 9db5b38f7eebff28-IAD
|
|
cf-cache-status: DYNAMIC
|
|
content-encoding: gzip
|
|
set-cookie: datadome=rDtvd3J2hO5AeghJMSFRRxGc6ifKCQYgMLcqPNr9rWiz2rdcXb032AY6GIZn8tUmYB96BKKbzh3_jSjEzYWLj8hDjl3oGYYAiu4jwdaxpf3vh2v4f7KH7kbqgsMWpkjt; Max-Age=31536000; Domain=.giantfood.com; Path=/; Secure; SameSite=Lax
|
|
strict-transport-security: max-age=31536000; includeSubDomains
|
|
vary: Origin, Access-Control-Request-Method, Access-Control-Request-Headers, accept-encoding
|
|
accept-ch: Sec-CH-UA,Sec-CH-UA-Mobile,Sec-CH-UA-Platform,Sec-CH-UA-Arch,Sec-CH-UA-Full-Version-List,Sec-CH-UA-Model,Sec-CH-Device-Memory
|
|
x-datadome: protected
|
|
request-context: appId=cid-v1:75750625-0c81-4f08-9f5d-ce4f73198e54
|
|
X-Firefox-Spdy: h2
|