Auto-link canonical products conservatively

This commit is contained in:
ben
2026-03-16 00:44:45 -04:00
parent 347cd44d09
commit 385a31c07f
2 changed files with 150 additions and 33 deletions

View File

@@ -4,10 +4,11 @@ import build_canonical_layer
class CanonicalLayerTests(unittest.TestCase):
def test_build_canonical_layer_seeds_one_canonical_per_observed_product(self):
def test_build_canonical_layer_auto_links_exact_upc_and_name_size(self):
observed_rows = [
{
"observed_product_id": "gobs_1",
"representative_upc": "111",
"representative_name_norm": "GALA APPLE",
"representative_brand": "SB",
"representative_variant": "",
@@ -15,9 +16,23 @@ class CanonicalLayerTests(unittest.TestCase):
"representative_size_unit": "lb",
"representative_pack_qty": "",
"representative_measure_type": "weight",
"is_fee": "false",
},
{
"observed_product_id": "gobs_2",
"representative_upc": "111",
"representative_name_norm": "LARGE WHITE EGGS",
"representative_brand": "SB",
"representative_variant": "",
"representative_size_value": "",
"representative_size_unit": "",
"representative_pack_qty": "18",
"representative_measure_type": "count",
"is_fee": "false",
},
{
"observed_product_id": "gobs_3",
"representative_upc": "",
"representative_name_norm": "ROTINI",
"representative_brand": "",
"representative_variant": "",
@@ -25,17 +40,44 @@ class CanonicalLayerTests(unittest.TestCase):
"representative_size_unit": "oz",
"representative_pack_qty": "",
"representative_measure_type": "weight",
"is_fee": "false",
},
{
"observed_product_id": "gobs_4",
"representative_upc": "",
"representative_name_norm": "ROTINI",
"representative_brand": "SB",
"representative_variant": "",
"representative_size_value": "16",
"representative_size_unit": "oz",
"representative_pack_qty": "",
"representative_measure_type": "weight",
"is_fee": "false",
},
{
"observed_product_id": "gobs_5",
"representative_upc": "",
"representative_name_norm": "GL BAG CHARGE",
"representative_brand": "",
"representative_variant": "",
"representative_size_value": "",
"representative_size_unit": "",
"representative_pack_qty": "",
"representative_measure_type": "each",
"is_fee": "true",
},
]
canonicals, links = build_canonical_layer.build_canonical_layer(observed_rows)
self.assertEqual(2, len(canonicals))
self.assertEqual(2, len(links))
self.assertEqual("GALA APPLE", canonicals[0]["canonical_name"])
self.assertEqual("5", canonicals[0]["normalized_quantity"])
self.assertEqual("lb", canonicals[0]["normalized_quantity_unit"])
self.assertEqual("seed_observed_product", links[0]["link_method"])
self.assertEqual(4, len(links))
methods = {row["observed_product_id"]: row["link_method"] for row in links}
self.assertEqual("exact_upc", methods["gobs_1"])
self.assertEqual("exact_upc", methods["gobs_2"])
self.assertEqual("exact_name_size", methods["gobs_3"])
self.assertEqual("exact_name_size", methods["gobs_4"])
self.assertNotIn("gobs_5", methods)
if __name__ == "__main__":