Refactor retailer normalization outputs

This commit is contained in:
ben
2026-03-18 15:46:20 -04:00
parent ec1f36a140
commit 9064de5f67
7 changed files with 222 additions and 11 deletions

View File

@@ -8,7 +8,10 @@ import click
from enrich_giant import (
OUTPUT_FIELDS,
derive_normalized_quantity,
derive_price_fields,
format_decimal,
normalization_identity,
normalize_number,
normalize_unit,
normalize_whitespace,
@@ -177,12 +180,42 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
price_per_each, price_per_lb, price_per_oz = derive_costco_prices(
item, measure_type, size_value, size_unit, pack_qty
)
normalized_row_id = f"{RETAILER}:{order_id}:{line_no}"
normalized_quantity, normalized_quantity_unit = derive_normalized_quantity(
size_value,
size_unit,
pack_qty,
measure_type,
)
identity_key, normalization_basis = normalization_identity(
{
"retailer": RETAILER,
"normalized_row_id": normalized_row_id,
"upc": "",
"retailer_item_id": str(item.get("itemNumber", "")),
"item_name_norm": item_name_norm,
"size_value": size_value,
"size_unit": size_unit,
"pack_qty": pack_qty,
}
)
price_fields = derive_price_fields(
price_per_each,
price_per_lb,
price_per_oz,
str(item.get("amount", "")),
str(item.get("unit", "")),
pack_qty,
)
return {
"retailer": RETAILER,
"order_id": str(order_id),
"line_no": str(line_no),
"observed_item_key": f"{RETAILER}:{order_id}:{line_no}",
"normalized_row_id": normalized_row_id,
"normalized_item_id": f"cnorm:{identity_key}",
"normalization_basis": normalization_basis,
"observed_item_key": normalized_row_id,
"order_date": normalize_whitespace(order_date),
"retailer_item_id": str(item.get("itemNumber", "")),
"pod_id": "",
@@ -210,13 +243,14 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
"size_unit": size_unit,
"pack_qty": pack_qty,
"measure_type": measure_type,
"normalized_quantity": normalized_quantity,
"normalized_quantity_unit": normalized_quantity_unit,
"is_store_brand": "true" if brand_guess else "false",
"is_item": "false" if is_discount_line else "true",
"is_fee": "false",
"is_discount_line": "true" if is_discount_line else "false",
"is_coupon_line": is_coupon_line,
"price_per_each": price_per_each,
"price_per_lb": price_per_lb,
"price_per_oz": price_per_oz,
**price_fields,
"parse_version": PARSER_VERSION,
"parse_notes": "",
}
@@ -321,6 +355,7 @@ def write_csv(path, rows):
help="CSV path for enriched Costco item rows.",
)
def main(input_dir, output_csv):
click.echo("legacy entrypoint: prefer normalize_costco_web.py for data-model outputs")
rows = build_items_enriched(Path(input_dir))
write_csv(Path(output_csv), rows)
click.echo(f"wrote {len(rows)} rows to {output_csv}")