Refactor retailer normalization outputs
This commit is contained in:
@@ -8,7 +8,10 @@ import click
|
||||
|
||||
from enrich_giant import (
|
||||
OUTPUT_FIELDS,
|
||||
derive_normalized_quantity,
|
||||
derive_price_fields,
|
||||
format_decimal,
|
||||
normalization_identity,
|
||||
normalize_number,
|
||||
normalize_unit,
|
||||
normalize_whitespace,
|
||||
@@ -177,12 +180,42 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
||||
price_per_each, price_per_lb, price_per_oz = derive_costco_prices(
|
||||
item, measure_type, size_value, size_unit, pack_qty
|
||||
)
|
||||
normalized_row_id = f"{RETAILER}:{order_id}:{line_no}"
|
||||
normalized_quantity, normalized_quantity_unit = derive_normalized_quantity(
|
||||
size_value,
|
||||
size_unit,
|
||||
pack_qty,
|
||||
measure_type,
|
||||
)
|
||||
identity_key, normalization_basis = normalization_identity(
|
||||
{
|
||||
"retailer": RETAILER,
|
||||
"normalized_row_id": normalized_row_id,
|
||||
"upc": "",
|
||||
"retailer_item_id": str(item.get("itemNumber", "")),
|
||||
"item_name_norm": item_name_norm,
|
||||
"size_value": size_value,
|
||||
"size_unit": size_unit,
|
||||
"pack_qty": pack_qty,
|
||||
}
|
||||
)
|
||||
price_fields = derive_price_fields(
|
||||
price_per_each,
|
||||
price_per_lb,
|
||||
price_per_oz,
|
||||
str(item.get("amount", "")),
|
||||
str(item.get("unit", "")),
|
||||
pack_qty,
|
||||
)
|
||||
|
||||
return {
|
||||
"retailer": RETAILER,
|
||||
"order_id": str(order_id),
|
||||
"line_no": str(line_no),
|
||||
"observed_item_key": f"{RETAILER}:{order_id}:{line_no}",
|
||||
"normalized_row_id": normalized_row_id,
|
||||
"normalized_item_id": f"cnorm:{identity_key}",
|
||||
"normalization_basis": normalization_basis,
|
||||
"observed_item_key": normalized_row_id,
|
||||
"order_date": normalize_whitespace(order_date),
|
||||
"retailer_item_id": str(item.get("itemNumber", "")),
|
||||
"pod_id": "",
|
||||
@@ -210,13 +243,14 @@ def parse_costco_item(order_id, order_date, raw_path, line_no, item):
|
||||
"size_unit": size_unit,
|
||||
"pack_qty": pack_qty,
|
||||
"measure_type": measure_type,
|
||||
"normalized_quantity": normalized_quantity,
|
||||
"normalized_quantity_unit": normalized_quantity_unit,
|
||||
"is_store_brand": "true" if brand_guess else "false",
|
||||
"is_item": "false" if is_discount_line else "true",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "true" if is_discount_line else "false",
|
||||
"is_coupon_line": is_coupon_line,
|
||||
"price_per_each": price_per_each,
|
||||
"price_per_lb": price_per_lb,
|
||||
"price_per_oz": price_per_oz,
|
||||
**price_fields,
|
||||
"parse_version": PARSER_VERSION,
|
||||
"parse_notes": "",
|
||||
}
|
||||
@@ -321,6 +355,7 @@ def write_csv(path, rows):
|
||||
help="CSV path for enriched Costco item rows.",
|
||||
)
|
||||
def main(input_dir, output_csv):
|
||||
click.echo("legacy entrypoint: prefer normalize_costco_web.py for data-model outputs")
|
||||
rows = build_items_enriched(Path(input_dir))
|
||||
write_csv(Path(output_csv), rows)
|
||||
click.echo(f"wrote {len(rows)} rows to {output_csv}")
|
||||
|
||||
Reference in New Issue
Block a user