diff --git a/browser_session.py b/browser_session.py index 981fd10..a9f317d 100644 --- a/browser_session.py +++ b/browser_session.py @@ -149,25 +149,32 @@ def read_firefox_webapps_entries(profile_dir, origin_filters): ) return entries - def query_sqlite(path, query): copied_path = copy_sqlite_to_temp(path) + connection = None + cursor = None try: - with sqlite3.connect(copied_path) as connection: - return list(connection.execute(query)) + connection = sqlite3.connect(copied_path) + cursor = connection.cursor() + cursor.execute(query) + rows = cursor.fetchall() + return rows except sqlite3.OperationalError: return [] finally: + if cursor is not None: + cursor.close() + if connection is not None: + connection.close() copied_path.unlink(missing_ok=True) def copy_sqlite_to_temp(path): - source_path = Path(path) - with tempfile.NamedTemporaryFile(delete=False, suffix=source_path.suffix) as handle: - temp_path = Path(handle.name) - shutil.copy2(source_path, temp_path) - return temp_path - + import os, shutil, tempfile + fd, tmp = tempfile.mkstemp(suffix=".sqlite") + os.close(fd) + shutil.copyfile(path, tmp) + return Path(tmp) def decode_firefox_origin(raw_origin): origin = raw_origin.split("^", 1)[0] diff --git a/scrape_costco.py b/scrape_costco.py index aaac162..2d04012 100644 --- a/scrape_costco.py +++ b/scrape_costco.py @@ -227,6 +227,25 @@ def build_headers(auth_headers): def build_session(): retailer_session = load_costco_session() + click.echo( + "session bootstrap: " + f"cookies={bool(retailer_session.cookies)}, " + f"authorization={'costco-x-authorization' in retailer_session.headers}, " + f"client_id={'costco-x-wcs-clientId' in retailer_session.headers}, " + f"client_identifier={'client-identifier' in retailer_session.headers}" + ) + + auth = retailer_session.headers.get("costco-x-authorization", "") + if auth: + click.echo( + f"auth prefix ok={auth.startswith('Bearer ')} len={len(auth)} token_prefix={auth[:24]}" + ) + + click.echo( + "header values: " + f"client_id={retailer_session.headers.get('costco-x-wcs-clientId', '')} " + f"client_identifier={retailer_session.headers.get('client-identifier', '')}" + ) session = requests.Session() session.cookies.update(retailer_session.cookies) session.headers.update(build_headers(retailer_session.headers)) @@ -247,7 +266,7 @@ def graphql_post(session, query, variables): last_response = response if response.status_code == 200: return response.json() - click.echo(f"retry {attempt + 1}/3 status={response.status_code}") + click.echo(f"retry {attempt + 1}/3 status={response.status_code} body={response.text[:500]}") except Exception as exc: # pragma: no cover - network error path click.echo(f"retry {attempt + 1}/3 error={exc}") time.sleep(3) diff --git a/tests/test_browser_session.py b/tests/test_browser_session.py index d14444b..5477d07 100644 --- a/tests/test_browser_session.py +++ b/tests/test_browser_session.py @@ -15,13 +15,17 @@ class BrowserSessionTests(unittest.TestCase): ls_dir.mkdir(parents=True) db_path = ls_dir / "data.sqlite" - with sqlite3.connect(db_path) as connection: + connection = sqlite3.connect(db_path) + try: connection.execute("CREATE TABLE data (key TEXT, value TEXT)") connection.execute( "INSERT INTO data (key, value) VALUES (?, ?)", ("session", '{"costco":{"clientIdentifier":"481b1aec-aa3b-454b-b81b-48187e28f205"}}'), ) - + connection.commit() + finally: + connection.close() + entries = browser_session.read_firefox_storage_entries( profile_dir, origin_filters=["costco.com"],