Generate Giant observed products

This commit is contained in:
ben
2026-03-16 00:43:11 -04:00
parent 8cdc4a1ad3
commit dc392149b5
3 changed files with 261 additions and 0 deletions

54
layer_helpers.py Normal file
View File

@@ -0,0 +1,54 @@
import csv
import hashlib
from collections import Counter
from pathlib import Path
def read_csv_rows(path):
path = Path(path)
with path.open(newline="", encoding="utf-8") as handle:
return list(csv.DictReader(handle))
def write_csv_rows(path, rows, fieldnames):
path = Path(path)
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", newline="", encoding="utf-8") as handle:
writer = csv.DictWriter(handle, fieldnames=fieldnames)
writer.writeheader()
writer.writerows(rows)
def stable_id(prefix, raw_key):
digest = hashlib.sha1(str(raw_key).encode("utf-8")).hexdigest()[:12]
return f"{prefix}_{digest}"
def first_nonblank(rows, field):
for row in rows:
value = row.get(field, "")
if value:
return value
return ""
def representative_value(rows, field):
values = [row.get(field, "") for row in rows if row.get(field, "")]
if not values:
return ""
counts = Counter(values)
return sorted(counts.items(), key=lambda item: (-item[1], item[0]))[0][0]
def distinct_values(rows, field):
return sorted({row.get(field, "") for row in rows if row.get(field, "")})
def compact_join(values, limit=3):
unique = []
seen = set()
for value in values:
if value and value not in seen:
seen.add(value)
unique.append(value)
return " | ".join(unique[:limit])