diff --git a/enrich_costco.py b/enrich_costco.py index 78258a0..de81351 100644 --- a/enrich_costco.py +++ b/enrich_costco.py @@ -30,6 +30,11 @@ CODE_TOKEN_RE = re.compile( ) PACK_FRACTION_RE = re.compile(r"(? MANDARIN` and `LIFE 6'TABLE MDL #80873U - T12/H3/P36 -> LIFE 6'TABLE MDL` +- datetime: 2026-03-20 11:09:32 EDT ** notes +- Kept this explicitly Costco-specific and narrow: the cleanup removes known logistics/code artifacts and orphan slash tokens without introducing fuzzy naming logic. +- The structured parsing still owns size/pack extraction, so name cleanup can safely strip dual-unit and logistics fragments after those fields are parsed. +- Discount-line behavior remains unchanged; this task only cleaned normalized names and preserved the existing audit trail. * [ ] t1.15: refactor review/combine pipeline around normalized_item_id and catalog links (4-8 commits) replace the old observed/canonical workflow with a review-first pipeline that uses normalized_item_id as the retailer-level review unit and links it to catalog items diff --git a/tests/test_costco_pipeline.py b/tests/test_costco_pipeline.py index 50f141f..2cd5a96 100644 --- a/tests/test_costco_pipeline.py +++ b/tests/test_costco_pipeline.py @@ -285,6 +285,47 @@ class CostcoPipelineTests(unittest.TestCase): self.assertEqual("true", discount["is_coupon_line"]) self.assertEqual("false", discount["is_item"]) + def test_costco_name_cleanup_removes_dual_weight_and_logistics_artifacts(self): + mixed_units = enrich_costco.parse_costco_item( + order_id="abc", + order_date="2026-03-12", + raw_path=Path("costco_output/raw/abc.json"), + line_no=1, + item={ + "itemNumber": "18600", + "itemDescription01": "MANDARINS 2.27 KG / 5 LBS", + "itemDescription02": None, + "itemDepartmentNumber": 65, + "transDepartmentNumber": 65, + "unit": 1, + "itemIdentifier": "E", + "amount": 7.49, + "itemUnitPriceAmount": 7.49, + }, + ) + self.assertEqual("MANDARIN", mixed_units["item_name_norm"]) + self.assertEqual("5", mixed_units["size_value"]) + self.assertEqual("lb", mixed_units["size_unit"]) + + logistics = enrich_costco.parse_costco_item( + order_id="abc", + order_date="2026-03-12", + raw_path=Path("costco_output/raw/abc.json"), + line_no=2, + item={ + "itemNumber": "1375005", + "itemDescription01": "LIFE 6'TABLE MDL #80873U - T12/H3/P36", + "itemDescription02": None, + "itemDepartmentNumber": 18, + "transDepartmentNumber": 18, + "unit": 1, + "itemIdentifier": "E", + "amount": 119.98, + "itemUnitPriceAmount": 119.98, + }, + ) + self.assertEqual("LIFE 6'TABLE MDL", logistics["item_name_norm"]) + def test_build_items_enriched_matches_discount_to_item(self): with tempfile.TemporaryDirectory() as tmpdir: raw_dir = Path(tmpdir) / "raw"