Generate Giant observed products
This commit is contained in:
54
layer_helpers.py
Normal file
54
layer_helpers.py
Normal file
@@ -0,0 +1,54 @@
|
||||
import csv
|
||||
import hashlib
|
||||
from collections import Counter
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def read_csv_rows(path):
|
||||
path = Path(path)
|
||||
with path.open(newline="", encoding="utf-8") as handle:
|
||||
return list(csv.DictReader(handle))
|
||||
|
||||
|
||||
def write_csv_rows(path, rows, fieldnames):
|
||||
path = Path(path)
|
||||
path.parent.mkdir(parents=True, exist_ok=True)
|
||||
with path.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerows(rows)
|
||||
|
||||
|
||||
def stable_id(prefix, raw_key):
|
||||
digest = hashlib.sha1(str(raw_key).encode("utf-8")).hexdigest()[:12]
|
||||
return f"{prefix}_{digest}"
|
||||
|
||||
|
||||
def first_nonblank(rows, field):
|
||||
for row in rows:
|
||||
value = row.get(field, "")
|
||||
if value:
|
||||
return value
|
||||
return ""
|
||||
|
||||
|
||||
def representative_value(rows, field):
|
||||
values = [row.get(field, "") for row in rows if row.get(field, "")]
|
||||
if not values:
|
||||
return ""
|
||||
counts = Counter(values)
|
||||
return sorted(counts.items(), key=lambda item: (-item[1], item[0]))[0][0]
|
||||
|
||||
|
||||
def distinct_values(rows, field):
|
||||
return sorted({row.get(field, "") for row in rows if row.get(field, "")})
|
||||
|
||||
|
||||
def compact_join(values, limit=3):
|
||||
unique = []
|
||||
seen = set()
|
||||
for value in values:
|
||||
if value and value not in seen:
|
||||
seen.add(value)
|
||||
unique.append(value)
|
||||
return " | ".join(unique[:limit])
|
||||
Reference in New Issue
Block a user