Finalize post-refactor layout and remove old pipeline files
This commit is contained in:
@@ -1,119 +0,0 @@
|
||||
import unittest
|
||||
|
||||
import build_canonical_layer
|
||||
|
||||
|
||||
class CanonicalLayerTests(unittest.TestCase):
|
||||
def test_build_canonical_layer_auto_links_exact_upc_and_name_size_only(self):
|
||||
observed_rows = [
|
||||
{
|
||||
"observed_product_id": "gobs_1",
|
||||
"representative_upc": "111",
|
||||
"representative_retailer_item_id": "11",
|
||||
"representative_name_norm": "GALA APPLE",
|
||||
"representative_brand": "SB",
|
||||
"representative_variant": "",
|
||||
"representative_size_value": "5",
|
||||
"representative_size_unit": "lb",
|
||||
"representative_pack_qty": "",
|
||||
"representative_measure_type": "weight",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
},
|
||||
{
|
||||
"observed_product_id": "gobs_2",
|
||||
"representative_upc": "111",
|
||||
"representative_retailer_item_id": "12",
|
||||
"representative_name_norm": "LARGE WHITE EGGS",
|
||||
"representative_brand": "SB",
|
||||
"representative_variant": "",
|
||||
"representative_size_value": "",
|
||||
"representative_size_unit": "",
|
||||
"representative_pack_qty": "18",
|
||||
"representative_measure_type": "count",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
},
|
||||
{
|
||||
"observed_product_id": "gobs_3",
|
||||
"representative_upc": "",
|
||||
"representative_retailer_item_id": "21",
|
||||
"representative_name_norm": "ROTINI",
|
||||
"representative_brand": "",
|
||||
"representative_variant": "",
|
||||
"representative_size_value": "16",
|
||||
"representative_size_unit": "oz",
|
||||
"representative_pack_qty": "",
|
||||
"representative_measure_type": "weight",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
},
|
||||
{
|
||||
"observed_product_id": "gobs_4",
|
||||
"representative_upc": "",
|
||||
"representative_retailer_item_id": "22",
|
||||
"representative_name_norm": "ROTINI",
|
||||
"representative_brand": "SB",
|
||||
"representative_variant": "",
|
||||
"representative_size_value": "16",
|
||||
"representative_size_unit": "oz",
|
||||
"representative_pack_qty": "",
|
||||
"representative_measure_type": "weight",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
},
|
||||
{
|
||||
"observed_product_id": "gobs_5",
|
||||
"representative_upc": "",
|
||||
"representative_retailer_item_id": "99",
|
||||
"representative_name_norm": "GL BAG CHARGE",
|
||||
"representative_brand": "",
|
||||
"representative_variant": "",
|
||||
"representative_size_value": "",
|
||||
"representative_size_unit": "",
|
||||
"representative_pack_qty": "",
|
||||
"representative_measure_type": "each",
|
||||
"is_fee": "true",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
},
|
||||
{
|
||||
"observed_product_id": "gobs_6",
|
||||
"representative_upc": "",
|
||||
"representative_retailer_item_id": "",
|
||||
"representative_name_norm": "LIME",
|
||||
"representative_brand": "",
|
||||
"representative_variant": "",
|
||||
"representative_size_value": "",
|
||||
"representative_size_unit": "",
|
||||
"representative_pack_qty": "",
|
||||
"representative_measure_type": "each",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
},
|
||||
]
|
||||
|
||||
canonicals, links = build_canonical_layer.build_canonical_layer(observed_rows)
|
||||
|
||||
self.assertEqual(2, len(canonicals))
|
||||
self.assertEqual(4, len(links))
|
||||
methods = {row["observed_product_id"]: row["link_method"] for row in links}
|
||||
self.assertEqual("exact_upc", methods["gobs_1"])
|
||||
self.assertEqual("exact_upc", methods["gobs_2"])
|
||||
self.assertEqual("exact_name_size", methods["gobs_3"])
|
||||
self.assertEqual("exact_name_size", methods["gobs_4"])
|
||||
self.assertNotIn("gobs_5", methods)
|
||||
self.assertNotIn("gobs_6", methods)
|
||||
|
||||
def test_clean_canonical_name_removes_packaging_noise(self):
|
||||
self.assertEqual("LIME", build_canonical_layer.clean_canonical_name("LIME . / ."))
|
||||
self.assertEqual("EGG", build_canonical_layer.clean_canonical_name("5DZ EGG / /"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -7,7 +7,6 @@ from unittest import mock
|
||||
|
||||
import enrich_costco
|
||||
import scrape_costco
|
||||
import validate_cross_retailer_flow
|
||||
|
||||
|
||||
class CostcoPipelineTests(unittest.TestCase):
|
||||
@@ -423,76 +422,6 @@ class CostcoPipelineTests(unittest.TestCase):
|
||||
self.assertIn("matched_discount=4873222", purchase_row["parse_notes"])
|
||||
self.assertIn("matched_to_item=4873222", discount_row["parse_notes"])
|
||||
|
||||
def test_cross_retailer_validation_writes_proof_example(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
giant_csv = Path(tmpdir) / "giant_items_enriched.csv"
|
||||
costco_csv = Path(tmpdir) / "costco_items_enriched.csv"
|
||||
outdir = Path(tmpdir) / "combined"
|
||||
|
||||
fieldnames = enrich_costco.OUTPUT_FIELDS
|
||||
giant_row = {field: "" for field in fieldnames}
|
||||
giant_row.update(
|
||||
{
|
||||
"retailer": "giant",
|
||||
"order_id": "g1",
|
||||
"line_no": "1",
|
||||
"order_date": "2026-03-01",
|
||||
"retailer_item_id": "100",
|
||||
"item_name": "FRESH BANANA",
|
||||
"item_name_norm": "BANANA",
|
||||
"upc": "4011",
|
||||
"measure_type": "weight",
|
||||
"is_store_brand": "false",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"line_total": "1.29",
|
||||
}
|
||||
)
|
||||
costco_row = {field: "" for field in fieldnames}
|
||||
costco_row.update(
|
||||
{
|
||||
"retailer": "costco",
|
||||
"order_id": "c1",
|
||||
"line_no": "1",
|
||||
"order_date": "2026-03-12",
|
||||
"retailer_item_id": "30669",
|
||||
"item_name": "BANANAS 3 LB / 1.36 KG",
|
||||
"item_name_norm": "BANANA",
|
||||
"upc": "",
|
||||
"size_value": "3",
|
||||
"size_unit": "lb",
|
||||
"measure_type": "weight",
|
||||
"is_store_brand": "false",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"line_total": "2.98",
|
||||
}
|
||||
)
|
||||
|
||||
with giant_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerow(giant_row)
|
||||
with costco_csv.open("w", newline="", encoding="utf-8") as handle:
|
||||
writer = csv.DictWriter(handle, fieldnames=fieldnames)
|
||||
writer.writeheader()
|
||||
writer.writerow(costco_row)
|
||||
|
||||
validate_cross_retailer_flow.main.callback(
|
||||
giant_items_enriched_csv=str(giant_csv),
|
||||
costco_items_enriched_csv=str(costco_csv),
|
||||
outdir=str(outdir),
|
||||
)
|
||||
|
||||
proof_path = outdir / "proof_examples.csv"
|
||||
self.assertTrue(proof_path.exists())
|
||||
with proof_path.open(newline="", encoding="utf-8") as handle:
|
||||
rows = list(csv.DictReader(handle))
|
||||
self.assertEqual(1, len(rows))
|
||||
self.assertEqual("banana", rows[0]["proof_name"])
|
||||
|
||||
def test_main_writes_summary_request_metadata(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
outdir = Path(tmpdir) / "costco_output"
|
||||
|
||||
@@ -1,67 +0,0 @@
|
||||
import unittest
|
||||
|
||||
import build_observed_products
|
||||
|
||||
|
||||
class ObservedProductTests(unittest.TestCase):
|
||||
def test_build_observed_products_aggregates_rows_with_same_key(self):
|
||||
rows = [
|
||||
{
|
||||
"retailer": "giant",
|
||||
"order_id": "1",
|
||||
"line_no": "1",
|
||||
"order_date": "2026-01-01",
|
||||
"item_name": "SB GALA APPLE 5LB",
|
||||
"item_name_norm": "GALA APPLE",
|
||||
"retailer_item_id": "11",
|
||||
"upc": "111",
|
||||
"brand_guess": "SB",
|
||||
"variant": "",
|
||||
"size_value": "5",
|
||||
"size_unit": "lb",
|
||||
"pack_qty": "",
|
||||
"measure_type": "weight",
|
||||
"image_url": "https://example.test/a.jpg",
|
||||
"is_store_brand": "true",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"line_total": "7.99",
|
||||
},
|
||||
{
|
||||
"retailer": "giant",
|
||||
"order_id": "2",
|
||||
"line_no": "1",
|
||||
"order_date": "2026-01-10",
|
||||
"item_name": "SB GALA APPLE 5 LB",
|
||||
"item_name_norm": "GALA APPLE",
|
||||
"retailer_item_id": "11",
|
||||
"upc": "111",
|
||||
"brand_guess": "SB",
|
||||
"variant": "",
|
||||
"size_value": "5",
|
||||
"size_unit": "lb",
|
||||
"pack_qty": "",
|
||||
"measure_type": "weight",
|
||||
"image_url": "",
|
||||
"is_store_brand": "true",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"line_total": "8.49",
|
||||
},
|
||||
]
|
||||
|
||||
observed = build_observed_products.build_observed_products(rows)
|
||||
|
||||
self.assertEqual(1, len(observed))
|
||||
self.assertEqual("2", observed[0]["times_seen"])
|
||||
self.assertEqual("2026-01-01", observed[0]["first_seen_date"])
|
||||
self.assertEqual("2026-01-10", observed[0]["last_seen_date"])
|
||||
self.assertEqual("11", observed[0]["representative_retailer_item_id"])
|
||||
self.assertEqual("111", observed[0]["representative_upc"])
|
||||
self.assertIn("SB GALA APPLE 5LB", observed[0]["raw_name_examples"])
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -65,6 +65,21 @@ class PipelineStatusTests(unittest.TestCase):
|
||||
},
|
||||
],
|
||||
resolutions=[],
|
||||
links=[
|
||||
{
|
||||
"normalized_item_id": "gnorm_banana",
|
||||
"catalog_id": "cat_banana",
|
||||
"review_status": "approved",
|
||||
}
|
||||
],
|
||||
catalog=[
|
||||
{
|
||||
"catalog_id": "cat_banana",
|
||||
"catalog_name": "BANANA",
|
||||
"product_type": "banana",
|
||||
"category": "produce",
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
counts = {row["stage"]: row["count"] for row in summary}
|
||||
|
||||
@@ -1,133 +0,0 @@
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
import build_observed_products
|
||||
import build_review_queue
|
||||
from layer_helpers import write_csv_rows
|
||||
|
||||
|
||||
class ReviewQueueTests(unittest.TestCase):
|
||||
def test_build_review_queue_preserves_existing_status(self):
|
||||
observed_rows = [
|
||||
{
|
||||
"observed_product_id": "gobs_1",
|
||||
"retailer": "giant",
|
||||
"representative_upc": "111",
|
||||
"representative_image_url": "",
|
||||
"representative_name_norm": "GALA APPLE",
|
||||
"times_seen": "2",
|
||||
"distinct_item_names_count": "2",
|
||||
"distinct_upcs_count": "1",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
}
|
||||
]
|
||||
item_rows = [
|
||||
{
|
||||
"observed_product_id": "gobs_1",
|
||||
"item_name": "SB GALA APPLE 5LB",
|
||||
"item_name_norm": "GALA APPLE",
|
||||
"line_total": "7.99",
|
||||
},
|
||||
{
|
||||
"observed_product_id": "gobs_1",
|
||||
"item_name": "SB GALA APPLE 5 LB",
|
||||
"item_name_norm": "GALA APPLE",
|
||||
"line_total": "8.49",
|
||||
},
|
||||
]
|
||||
existing = {
|
||||
build_review_queue.stable_id("rvw", "gobs_1|missing_image"): {
|
||||
"status": "approved",
|
||||
"resolution_notes": "looked fine",
|
||||
"created_at": "2026-03-15",
|
||||
}
|
||||
}
|
||||
|
||||
queue = build_review_queue.build_review_queue(
|
||||
observed_rows, item_rows, existing, "2026-03-16"
|
||||
)
|
||||
|
||||
self.assertEqual(2, len(queue))
|
||||
missing_image = [row for row in queue if row["reason_code"] == "missing_image"][0]
|
||||
self.assertEqual("approved", missing_image["status"])
|
||||
self.assertEqual("looked fine", missing_image["resolution_notes"])
|
||||
|
||||
def test_review_queue_main_writes_output(self):
|
||||
with tempfile.TemporaryDirectory() as tmpdir:
|
||||
observed_path = Path(tmpdir) / "products_observed.csv"
|
||||
items_path = Path(tmpdir) / "items_enriched.csv"
|
||||
output_path = Path(tmpdir) / "review_queue.csv"
|
||||
|
||||
observed_rows = [
|
||||
{
|
||||
"observed_product_id": "gobs_1",
|
||||
"retailer": "giant",
|
||||
"observed_key": "giant|upc=111|name=GALA APPLE",
|
||||
"representative_retailer_item_id": "11",
|
||||
"representative_upc": "111",
|
||||
"representative_item_name": "SB GALA APPLE 5LB",
|
||||
"representative_name_norm": "GALA APPLE",
|
||||
"representative_brand": "SB",
|
||||
"representative_variant": "",
|
||||
"representative_size_value": "5",
|
||||
"representative_size_unit": "lb",
|
||||
"representative_pack_qty": "",
|
||||
"representative_measure_type": "weight",
|
||||
"representative_image_url": "",
|
||||
"is_store_brand": "true",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"first_seen_date": "2026-01-01",
|
||||
"last_seen_date": "2026-01-10",
|
||||
"times_seen": "2",
|
||||
"example_order_id": "1",
|
||||
"example_item_name": "SB GALA APPLE 5LB",
|
||||
"raw_name_examples": "SB GALA APPLE 5LB | SB GALA APPLE 5 LB",
|
||||
"normalized_name_examples": "GALA APPLE",
|
||||
"example_prices": "7.99 | 8.49",
|
||||
"distinct_item_names_count": "2",
|
||||
"distinct_retailer_item_ids_count": "1",
|
||||
"distinct_upcs_count": "1",
|
||||
}
|
||||
]
|
||||
item_rows = [
|
||||
{
|
||||
"retailer": "giant",
|
||||
"order_id": "1",
|
||||
"line_no": "1",
|
||||
"item_name": "SB GALA APPLE 5LB",
|
||||
"item_name_norm": "GALA APPLE",
|
||||
"retailer_item_id": "11",
|
||||
"upc": "111",
|
||||
"size_value": "5",
|
||||
"size_unit": "lb",
|
||||
"pack_qty": "",
|
||||
"measure_type": "weight",
|
||||
"is_store_brand": "true",
|
||||
"is_fee": "false",
|
||||
"is_discount_line": "false",
|
||||
"is_coupon_line": "false",
|
||||
"line_total": "7.99",
|
||||
}
|
||||
]
|
||||
|
||||
write_csv_rows(
|
||||
observed_path, observed_rows, build_observed_products.OUTPUT_FIELDS
|
||||
)
|
||||
write_csv_rows(items_path, item_rows, list(item_rows[0].keys()))
|
||||
|
||||
build_review_queue.main.callback(
|
||||
observed_csv=str(observed_path),
|
||||
items_enriched_csv=str(items_path),
|
||||
output_csv=str(output_path),
|
||||
)
|
||||
|
||||
self.assertTrue(output_path.exists())
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
@@ -3,7 +3,7 @@ import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
import scraper
|
||||
import scrape_giant as scraper
|
||||
|
||||
|
||||
class ScraperTests(unittest.TestCase):
|
||||
|
||||
Reference in New Issue
Block a user