Files
scrape-giant/browser_session.py

130 lines
3.8 KiB
Python

import configparser
import os
import shutil
import sqlite3
import tempfile
from pathlib import Path
import browser_cookie3
def find_firefox_profile_dir():
profiles_ini = firefox_profiles_root() / "profiles.ini"
parser = configparser.RawConfigParser()
if not profiles_ini.exists():
raise FileNotFoundError(f"Firefox profiles.ini not found at {profiles_ini}")
parser.read(profiles_ini, encoding="utf-8")
profiles = []
for section in parser.sections():
if not section.startswith("Profile"):
continue
path_value = parser.get(section, "Path", fallback="")
if not path_value:
continue
is_relative = parser.getboolean(section, "IsRelative", fallback=True)
profile_path = (
profiles_ini.parent / path_value if is_relative else Path(path_value)
)
profiles.append(
(
parser.getboolean(section, "Default", fallback=False),
profile_path,
)
)
if not profiles:
raise FileNotFoundError("No Firefox profiles found in profiles.ini")
profiles.sort(key=lambda item: (not item[0], str(item[1])))
return profiles[0][1]
def firefox_profiles_root():
if os.name == "nt":
appdata = os.getenv("APPDATA", "").strip()
if not appdata:
raise FileNotFoundError("APPDATA is not set")
return Path(appdata) / "Mozilla" / "Firefox"
return Path.home() / ".mozilla" / "firefox"
def load_firefox_cookies(domain_name, profile_dir):
cookie_file = Path(profile_dir) / "cookies.sqlite"
return browser_cookie3.firefox(cookie_file=str(cookie_file), domain_name=domain_name)
def read_firefox_local_storage(profile_dir, origin_filter):
storage_root = profile_dir / "storage" / "default"
if not storage_root.exists():
return {}
for ls_path in storage_root.glob("*/ls/data.sqlite"):
origin = decode_firefox_origin(ls_path.parents[1].name)
if origin_filter.lower() not in origin.lower():
continue
return {
stringify_sql_value(row[0]): stringify_sql_value(row[1])
for row in query_sqlite(ls_path, "SELECT key, value FROM data")
}
return {}
def read_firefox_webapps_store(profile_dir, origin_filter):
webapps_path = profile_dir / "webappsstore.sqlite"
if not webapps_path.exists():
return {}
values = {}
for row in query_sqlite(
webapps_path,
"SELECT originKey, key, value FROM webappsstore2",
):
origin = stringify_sql_value(row[0])
if origin_filter.lower() not in origin.lower():
continue
values[stringify_sql_value(row[1])] = stringify_sql_value(row[2])
return values
def query_sqlite(path, query):
copied_path = copy_sqlite_to_temp(path)
connection = None
cursor = None
try:
connection = sqlite3.connect(copied_path)
cursor = connection.cursor()
cursor.execute(query)
rows = cursor.fetchall()
return rows
except sqlite3.OperationalError:
return []
finally:
if cursor is not None:
cursor.close()
if connection is not None:
connection.close()
copied_path.unlink(missing_ok=True)
def copy_sqlite_to_temp(path):
fd, tmp = tempfile.mkstemp(suffix=".sqlite")
os.close(fd)
shutil.copyfile(path, tmp)
return Path(tmp)
def decode_firefox_origin(raw_origin):
origin = raw_origin.split("^", 1)[0]
return origin.replace("+++", "://")
def stringify_sql_value(value):
if value is None:
return ""
if isinstance(value, bytes):
for encoding in ("utf-8", "utf-16-le", "utf-16"):
try:
return value.decode(encoding)
except UnicodeDecodeError:
continue
return value.decode("utf-8", errors="ignore")
return str(value)