Fix Costco hash-size weight parsing

This commit is contained in:
ben
2026-03-23 13:56:47 -04:00
parent facebced9c
commit 73176117fe
2 changed files with 27 additions and 1 deletions

View File

@@ -29,7 +29,7 @@ CODE_TOKEN_RE = re.compile(
r"\b(?:SL\d+|T\d+H\d+|P\d+(?:/\d+)?|W\d+T\d+H\d+|FY\d+|CSPC#|C\d+T\d+H\d+|EC\d+T\d+H\d+|\d+X\d+)\b"
)
PACK_FRACTION_RE = re.compile(r"(?<![A-Z0-9])(\d+)\s*/\s*(\d+(?:\.\d+)?)\s*(OZ|LB|LBS|CT)\b")
HASH_SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)#\b")
HASH_SIZE_RE = re.compile(r"(?<![A-Z0-9])(\d+(?:\.\d+)?)#(?=\s|$)")
ITEM_CODE_RE = re.compile(r"#\w+\b")
DUAL_WEIGHT_RE = re.compile(
r"\b\d+(?:\.\d+)?\s*(?:KG|G|LB|LBS|OZ)\s*/\s*\d+(?:\.\d+)?\s*(?:KG|G|LB|LBS|OZ)\b"

View File

@@ -346,6 +346,32 @@ class CostcoPipelineTests(unittest.TestCase):
)
self.assertEqual("LIFE 6'TABLE MDL", logistics["item_name_norm"])
def test_costco_hash_weight_parses_into_weight_basis(self):
row = enrich_costco.parse_costco_item(
order_id="abc",
order_date="2024-11-29",
raw_path=Path("costco_output/raw/abc.json"),
line_no=4,
item={
"itemNumber": "999",
"itemDescription01": "25# FLOUR ALL-PURPOSE HARV P98/100",
"itemDescription02": None,
"itemDepartmentNumber": 14,
"transDepartmentNumber": 14,
"unit": 1,
"itemIdentifier": "E",
"amount": 8.79,
"itemUnitPriceAmount": 8.79,
},
)
self.assertEqual("FLOUR ALL-PURPOSE HARV", row["item_name_norm"])
self.assertEqual("25", row["size_value"])
self.assertEqual("lb", row["size_unit"])
self.assertEqual("weight", row["measure_type"])
self.assertEqual("25", row["normalized_quantity"])
self.assertEqual("lb", row["normalized_quantity_unit"])
self.assertEqual("0.3516", row["price_per_lb"])
def test_build_items_enriched_matches_discount_to_item(self):
with tempfile.TemporaryDirectory() as tmpdir:
raw_dir = Path(tmpdir) / "raw"