Extend shared schema for retailer-native ids

This commit is contained in:
ben
2026-03-16 09:17:36 -04:00
parent d20a131e04
commit 9497565978
8 changed files with 101 additions and 4 deletions

View File

@@ -17,6 +17,7 @@ OUTPUT_FIELDS = [
"observed_product_id",
"retailer",
"observed_key",
"representative_retailer_item_id",
"representative_upc",
"representative_item_name",
"representative_name_norm",
@@ -29,6 +30,8 @@ OUTPUT_FIELDS = [
"representative_image_url",
"is_store_brand",
"is_fee",
"is_discount_line",
"is_coupon_line",
"first_seen_date",
"last_seen_date",
"times_seen",
@@ -38,6 +41,7 @@ OUTPUT_FIELDS = [
"normalized_name_examples",
"example_prices",
"distinct_item_names_count",
"distinct_retailer_item_ids_count",
"distinct_upcs_count",
]
@@ -52,6 +56,17 @@ def build_observed_key(row):
]
)
if row.get("retailer_item_id"):
return "|".join(
[
row["retailer"],
f"retailer_item_id={row['retailer_item_id']}",
f"name={row['item_name_norm']}",
f"discount={row.get('is_discount_line', 'false')}",
f"coupon={row.get('is_coupon_line', 'false')}",
]
)
return "|".join(
[
row["retailer"],
@@ -82,6 +97,9 @@ def build_observed_products(rows):
"observed_product_id": stable_id("gobs", observed_key),
"retailer": ordered[0]["retailer"],
"observed_key": observed_key,
"representative_retailer_item_id": representative_value(
ordered, "retailer_item_id"
),
"representative_upc": representative_value(ordered, "upc"),
"representative_item_name": representative_value(ordered, "item_name"),
"representative_name_norm": representative_value(
@@ -98,6 +116,10 @@ def build_observed_products(rows):
"representative_image_url": first_nonblank(ordered, "image_url"),
"is_store_brand": representative_value(ordered, "is_store_brand"),
"is_fee": representative_value(ordered, "is_fee"),
"is_discount_line": representative_value(
ordered, "is_discount_line"
),
"is_coupon_line": representative_value(ordered, "is_coupon_line"),
"first_seen_date": ordered[0]["order_date"],
"last_seen_date": ordered[-1]["order_date"],
"times_seen": str(len(ordered)),
@@ -115,6 +137,9 @@ def build_observed_products(rows):
"distinct_item_names_count": str(
len(distinct_values(ordered, "item_name"))
),
"distinct_retailer_item_ids_count": str(
len(distinct_values(ordered, "retailer_item_id"))
),
"distinct_upcs_count": str(len(distinct_values(ordered, "upc"))),
}
)