120 lines
4.9 KiB
Python
120 lines
4.9 KiB
Python
import unittest
|
|
|
|
import build_canonical_layer
|
|
|
|
|
|
class CanonicalLayerTests(unittest.TestCase):
|
|
def test_build_canonical_layer_auto_links_exact_upc_and_name_size_only(self):
|
|
observed_rows = [
|
|
{
|
|
"observed_product_id": "gobs_1",
|
|
"representative_upc": "111",
|
|
"representative_retailer_item_id": "11",
|
|
"representative_name_norm": "GALA APPLE",
|
|
"representative_brand": "SB",
|
|
"representative_variant": "",
|
|
"representative_size_value": "5",
|
|
"representative_size_unit": "lb",
|
|
"representative_pack_qty": "",
|
|
"representative_measure_type": "weight",
|
|
"is_fee": "false",
|
|
"is_discount_line": "false",
|
|
"is_coupon_line": "false",
|
|
},
|
|
{
|
|
"observed_product_id": "gobs_2",
|
|
"representative_upc": "111",
|
|
"representative_retailer_item_id": "12",
|
|
"representative_name_norm": "LARGE WHITE EGGS",
|
|
"representative_brand": "SB",
|
|
"representative_variant": "",
|
|
"representative_size_value": "",
|
|
"representative_size_unit": "",
|
|
"representative_pack_qty": "18",
|
|
"representative_measure_type": "count",
|
|
"is_fee": "false",
|
|
"is_discount_line": "false",
|
|
"is_coupon_line": "false",
|
|
},
|
|
{
|
|
"observed_product_id": "gobs_3",
|
|
"representative_upc": "",
|
|
"representative_retailer_item_id": "21",
|
|
"representative_name_norm": "ROTINI",
|
|
"representative_brand": "",
|
|
"representative_variant": "",
|
|
"representative_size_value": "16",
|
|
"representative_size_unit": "oz",
|
|
"representative_pack_qty": "",
|
|
"representative_measure_type": "weight",
|
|
"is_fee": "false",
|
|
"is_discount_line": "false",
|
|
"is_coupon_line": "false",
|
|
},
|
|
{
|
|
"observed_product_id": "gobs_4",
|
|
"representative_upc": "",
|
|
"representative_retailer_item_id": "22",
|
|
"representative_name_norm": "ROTINI",
|
|
"representative_brand": "SB",
|
|
"representative_variant": "",
|
|
"representative_size_value": "16",
|
|
"representative_size_unit": "oz",
|
|
"representative_pack_qty": "",
|
|
"representative_measure_type": "weight",
|
|
"is_fee": "false",
|
|
"is_discount_line": "false",
|
|
"is_coupon_line": "false",
|
|
},
|
|
{
|
|
"observed_product_id": "gobs_5",
|
|
"representative_upc": "",
|
|
"representative_retailer_item_id": "99",
|
|
"representative_name_norm": "GL BAG CHARGE",
|
|
"representative_brand": "",
|
|
"representative_variant": "",
|
|
"representative_size_value": "",
|
|
"representative_size_unit": "",
|
|
"representative_pack_qty": "",
|
|
"representative_measure_type": "each",
|
|
"is_fee": "true",
|
|
"is_discount_line": "false",
|
|
"is_coupon_line": "false",
|
|
},
|
|
{
|
|
"observed_product_id": "gobs_6",
|
|
"representative_upc": "",
|
|
"representative_retailer_item_id": "",
|
|
"representative_name_norm": "LIME",
|
|
"representative_brand": "",
|
|
"representative_variant": "",
|
|
"representative_size_value": "",
|
|
"representative_size_unit": "",
|
|
"representative_pack_qty": "",
|
|
"representative_measure_type": "each",
|
|
"is_fee": "false",
|
|
"is_discount_line": "false",
|
|
"is_coupon_line": "false",
|
|
},
|
|
]
|
|
|
|
canonicals, links = build_canonical_layer.build_canonical_layer(observed_rows)
|
|
|
|
self.assertEqual(2, len(canonicals))
|
|
self.assertEqual(4, len(links))
|
|
methods = {row["observed_product_id"]: row["link_method"] for row in links}
|
|
self.assertEqual("exact_upc", methods["gobs_1"])
|
|
self.assertEqual("exact_upc", methods["gobs_2"])
|
|
self.assertEqual("exact_name_size", methods["gobs_3"])
|
|
self.assertEqual("exact_name_size", methods["gobs_4"])
|
|
self.assertNotIn("gobs_5", methods)
|
|
self.assertNotIn("gobs_6", methods)
|
|
|
|
def test_clean_canonical_name_removes_packaging_noise(self):
|
|
self.assertEqual("LIME", build_canonical_layer.clean_canonical_name("LIME . / ."))
|
|
self.assertEqual("EGG", build_canonical_layer.clean_canonical_name("5DZ EGG / /"))
|
|
|
|
|
|
if __name__ == "__main__":
|
|
unittest.main()
|