import unittest import build_canonical_layer class CanonicalLayerTests(unittest.TestCase): def test_build_canonical_layer_auto_links_exact_upc_and_name_size_only(self): observed_rows = [ { "observed_product_id": "gobs_1", "representative_upc": "111", "representative_retailer_item_id": "11", "representative_name_norm": "GALA APPLE", "representative_brand": "SB", "representative_variant": "", "representative_size_value": "5", "representative_size_unit": "lb", "representative_pack_qty": "", "representative_measure_type": "weight", "is_fee": "false", "is_discount_line": "false", "is_coupon_line": "false", }, { "observed_product_id": "gobs_2", "representative_upc": "111", "representative_retailer_item_id": "12", "representative_name_norm": "LARGE WHITE EGGS", "representative_brand": "SB", "representative_variant": "", "representative_size_value": "", "representative_size_unit": "", "representative_pack_qty": "18", "representative_measure_type": "count", "is_fee": "false", "is_discount_line": "false", "is_coupon_line": "false", }, { "observed_product_id": "gobs_3", "representative_upc": "", "representative_retailer_item_id": "21", "representative_name_norm": "ROTINI", "representative_brand": "", "representative_variant": "", "representative_size_value": "16", "representative_size_unit": "oz", "representative_pack_qty": "", "representative_measure_type": "weight", "is_fee": "false", "is_discount_line": "false", "is_coupon_line": "false", }, { "observed_product_id": "gobs_4", "representative_upc": "", "representative_retailer_item_id": "22", "representative_name_norm": "ROTINI", "representative_brand": "SB", "representative_variant": "", "representative_size_value": "16", "representative_size_unit": "oz", "representative_pack_qty": "", "representative_measure_type": "weight", "is_fee": "false", "is_discount_line": "false", "is_coupon_line": "false", }, { "observed_product_id": "gobs_5", "representative_upc": "", "representative_retailer_item_id": "99", "representative_name_norm": "GL BAG CHARGE", "representative_brand": "", "representative_variant": "", "representative_size_value": "", "representative_size_unit": "", "representative_pack_qty": "", "representative_measure_type": "each", "is_fee": "true", "is_discount_line": "false", "is_coupon_line": "false", }, { "observed_product_id": "gobs_6", "representative_upc": "", "representative_retailer_item_id": "", "representative_name_norm": "LIME", "representative_brand": "", "representative_variant": "", "representative_size_value": "", "representative_size_unit": "", "representative_pack_qty": "", "representative_measure_type": "each", "is_fee": "false", "is_discount_line": "false", "is_coupon_line": "false", }, ] canonicals, links = build_canonical_layer.build_canonical_layer(observed_rows) self.assertEqual(2, len(canonicals)) self.assertEqual(4, len(links)) methods = {row["observed_product_id"]: row["link_method"] for row in links} self.assertEqual("exact_upc", methods["gobs_1"]) self.assertEqual("exact_upc", methods["gobs_2"]) self.assertEqual("exact_name_size", methods["gobs_3"]) self.assertEqual("exact_name_size", methods["gobs_4"]) self.assertNotIn("gobs_5", methods) self.assertNotIn("gobs_6", methods) def test_clean_canonical_name_removes_packaging_noise(self): self.assertEqual("LIME", build_canonical_layer.clean_canonical_name("LIME . / .")) self.assertEqual("EGG", build_canonical_layer.clean_canonical_name("5DZ EGG / /")) if __name__ == "__main__": unittest.main()