Files
scrape-giant/tests/test_canonical_layer.py

120 lines
4.9 KiB
Python

import unittest
import build_canonical_layer
class CanonicalLayerTests(unittest.TestCase):
def test_build_canonical_layer_auto_links_exact_upc_and_name_size_only(self):
observed_rows = [
{
"observed_product_id": "gobs_1",
"representative_upc": "111",
"representative_retailer_item_id": "11",
"representative_name_norm": "GALA APPLE",
"representative_brand": "SB",
"representative_variant": "",
"representative_size_value": "5",
"representative_size_unit": "lb",
"representative_pack_qty": "",
"representative_measure_type": "weight",
"is_fee": "false",
"is_discount_line": "false",
"is_coupon_line": "false",
},
{
"observed_product_id": "gobs_2",
"representative_upc": "111",
"representative_retailer_item_id": "12",
"representative_name_norm": "LARGE WHITE EGGS",
"representative_brand": "SB",
"representative_variant": "",
"representative_size_value": "",
"representative_size_unit": "",
"representative_pack_qty": "18",
"representative_measure_type": "count",
"is_fee": "false",
"is_discount_line": "false",
"is_coupon_line": "false",
},
{
"observed_product_id": "gobs_3",
"representative_upc": "",
"representative_retailer_item_id": "21",
"representative_name_norm": "ROTINI",
"representative_brand": "",
"representative_variant": "",
"representative_size_value": "16",
"representative_size_unit": "oz",
"representative_pack_qty": "",
"representative_measure_type": "weight",
"is_fee": "false",
"is_discount_line": "false",
"is_coupon_line": "false",
},
{
"observed_product_id": "gobs_4",
"representative_upc": "",
"representative_retailer_item_id": "22",
"representative_name_norm": "ROTINI",
"representative_brand": "SB",
"representative_variant": "",
"representative_size_value": "16",
"representative_size_unit": "oz",
"representative_pack_qty": "",
"representative_measure_type": "weight",
"is_fee": "false",
"is_discount_line": "false",
"is_coupon_line": "false",
},
{
"observed_product_id": "gobs_5",
"representative_upc": "",
"representative_retailer_item_id": "99",
"representative_name_norm": "GL BAG CHARGE",
"representative_brand": "",
"representative_variant": "",
"representative_size_value": "",
"representative_size_unit": "",
"representative_pack_qty": "",
"representative_measure_type": "each",
"is_fee": "true",
"is_discount_line": "false",
"is_coupon_line": "false",
},
{
"observed_product_id": "gobs_6",
"representative_upc": "",
"representative_retailer_item_id": "",
"representative_name_norm": "LIME",
"representative_brand": "",
"representative_variant": "",
"representative_size_value": "",
"representative_size_unit": "",
"representative_pack_qty": "",
"representative_measure_type": "each",
"is_fee": "false",
"is_discount_line": "false",
"is_coupon_line": "false",
},
]
canonicals, links = build_canonical_layer.build_canonical_layer(observed_rows)
self.assertEqual(2, len(canonicals))
self.assertEqual(4, len(links))
methods = {row["observed_product_id"]: row["link_method"] for row in links}
self.assertEqual("exact_upc", methods["gobs_1"])
self.assertEqual("exact_upc", methods["gobs_2"])
self.assertEqual("exact_name_size", methods["gobs_3"])
self.assertEqual("exact_name_size", methods["gobs_4"])
self.assertNotIn("gobs_5", methods)
self.assertNotIn("gobs_6", methods)
def test_clean_canonical_name_removes_packaging_noise(self):
self.assertEqual("LIME", build_canonical_layer.clean_canonical_name("LIME . / ."))
self.assertEqual("EGG", build_canonical_layer.clean_canonical_name("5DZ EGG / /"))
if __name__ == "__main__":
unittest.main()