diff --git a/enrich_costco.py b/enrich_costco.py new file mode 100644 index 0000000..8129c64 --- /dev/null +++ b/enrich_costco.py @@ -0,0 +1,271 @@ +import csv +import json +import re +from pathlib import Path + +import click + +from enrich_giant import ( + OUTPUT_FIELDS, + format_decimal, + normalize_number, + normalize_unit, + normalize_whitespace, + singularize_tokens, + to_decimal, +) + + +PARSER_VERSION = "costco-enrich-v1" +RETAILER = "costco" +DEFAULT_INPUT_DIR = Path("costco_output/raw") +DEFAULT_OUTPUT_CSV = Path("costco_output/items_enriched.csv") + +CODE_TOKEN_RE = re.compile( + r"\b(?:SL\d+|T\d+H\d+|P\d+(?:/\d+)?|W\d+T\d+H\d+|FY\d+|CSPC#|C\d+T\d+H\d+|EC\d+T\d+H\d+|\d+X\d+)\b" +) +PACK_FRACTION_RE = re.compile(r"(?