Add catalog search to review flow

This commit is contained in:
ben
2026-03-20 13:32:20 -04:00
parent 17158fb9e9
commit f93b9aa464
2 changed files with 225 additions and 63 deletions

View File

@@ -1,5 +1,6 @@
from collections import defaultdict
from datetime import date
import re
import click
@@ -29,6 +30,7 @@ QUEUE_FIELDS = [
INFO_COLOR = "cyan"
PROMPT_COLOR = "bright_yellow"
WARNING_COLOR = "magenta"
TOKEN_RE = re.compile(r"[A-Z0-9]+")
def print_intro_text():
@@ -134,6 +136,13 @@ def sort_related_items(rows):
)
def tokenize_match_text(*values):
tokens = set()
for value in values:
tokens.update(TOKEN_RE.findall((value or "").upper()))
return tokens
def build_catalog_suggestions(related_rows, purchase_rows, catalog_rows, limit=3):
normalized_names = {
row.get("normalized_item_name", "").strip().upper()
@@ -190,6 +199,55 @@ def build_catalog_suggestions(related_rows, purchase_rows, catalog_rows, limit=3
return suggestions
def search_catalog_rows(query, catalog_rows, purchase_rows, current_normalized_item_id, limit=10):
query_tokens = tokenize_match_text(query)
if not query_tokens:
return []
linked_purchase_counts = defaultdict(int)
linked_normalized_ids = defaultdict(set)
current_catalog_id = ""
for row in purchase_rows:
catalog_id = row.get("catalog_id", "")
normalized_item_id = row.get("normalized_item_id", "")
if catalog_id and normalized_item_id:
linked_purchase_counts[catalog_id] += 1
linked_normalized_ids[catalog_id].add(normalized_item_id)
if normalized_item_id == current_normalized_item_id and catalog_id:
current_catalog_id = catalog_id
ranked_rows = []
for row in catalog_rows:
catalog_id = row.get("catalog_id", "")
if not catalog_id or catalog_id == current_catalog_id:
continue
catalog_tokens = tokenize_match_text(
row.get("catalog_name", ""),
row.get("product_type", ""),
row.get("variant", ""),
)
overlap = query_tokens & catalog_tokens
if not overlap:
continue
ranked_rows.append(
{
"catalog_id": catalog_id,
"catalog_name": row.get("catalog_name", ""),
"product_type": row.get("product_type", ""),
"category": row.get("category", ""),
"variant": row.get("variant", ""),
"linked_normalized_items": len(linked_normalized_ids.get(catalog_id, set())),
"linked_purchase_rows": linked_purchase_counts.get(catalog_id, 0),
"score": len(overlap),
}
)
ranked_rows.sort(
key=lambda row: (-row["score"], row["catalog_name"], row["catalog_id"])
)
return ranked_rows[:limit]
def suggestion_display_rows(suggestions, purchase_rows, catalog_rows):
linked_purchase_counts = defaultdict(int)
linked_normalized_ids = defaultdict(set)
@@ -235,6 +293,15 @@ def suggestion_display_rows(suggestions, purchase_rows, catalog_rows):
return display_rows
def print_catalog_rows(rows):
for index, row in enumerate(rows, start=1):
click.echo(
f" [{index}] {row['catalog_name']}, {row.get('product_type', '')}, "
f"{row.get('category', '')} ({row['linked_normalized_items']} items, "
f"{row['linked_purchase_rows']} rows)"
)
def build_display_lines(related_rows):
lines = []
for index, row in enumerate(sort_related_items(related_rows), start=1):
@@ -267,12 +334,7 @@ def choose_existing_catalog(display_rows, normalized_name, matched_count):
f"Select the catalog_name to associate {matched_count} items with:",
fg=INFO_COLOR,
)
for index, row in enumerate(display_rows, start=1):
click.echo(
f" [{index}] {row['catalog_name']}, {row.get('product_type', '')}, "
f"{row.get('category', '')} ({row['linked_normalized_items']} items, "
f"{row['linked_purchase_rows']} rows)"
)
print_catalog_rows(display_rows)
choice = click.prompt(
click.style("selection", fg=PROMPT_COLOR),
type=click.IntRange(1, len(display_rows)),
@@ -314,18 +376,13 @@ def prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queu
click.echo(line)
if suggestions:
click.echo(f"{len(suggestions)} catalog_name suggestions found:")
for index, suggestion in enumerate(suggestions, start=1):
click.echo(
f" [{index}] {suggestion['catalog_name']}, {suggestion.get('product_type', '')}, "
f"{suggestion.get('category', '')} ({suggestion['linked_normalized_items']} items, "
f"{suggestion['linked_purchase_rows']} rows)"
)
print_catalog_rows(suggestions)
else:
click.echo("no catalog_name suggestions found")
prompt_bits = []
if suggestions:
prompt_bits.append("[#] link to suggestion")
prompt_bits.extend(["[l]ink existing", "[n]ew", "[s]kip", "e[x]clude", "[q]uit"])
prompt_bits.extend(["[s]earch", "[n]ew", "e[x]clude", "[q]uit"])
click.secho(" ".join(prompt_bits) + " >", fg=PROMPT_COLOR)
action = click.prompt("", type=str, prompt_suffix=" ").strip().lower()
if action.isdigit() and suggestions:
@@ -346,14 +403,42 @@ def prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queu
if action == "q":
return None, None
if action == "s":
return {
"normalized_item_id": queue_row["normalized_item_id"],
"catalog_id": "",
"resolution_action": "skip",
"status": "pending",
"resolution_notes": queue_row.get("resolution_notes", ""),
"reviewed_at": str(date.today()),
}, None
while True:
query = click.prompt(click.style("search", fg=PROMPT_COLOR), default="", show_default=False).strip()
if not query:
return prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queue_index, queue_total)
search_rows = search_catalog_rows(
query,
catalog_rows,
purchase_rows,
queue_row["normalized_item_id"],
)
if not search_rows:
click.echo("no matches found")
retry = click.prompt(
click.style("search again? [enter=yes, q=no]", fg=PROMPT_COLOR),
default="",
show_default=False,
).strip().lower()
if retry == "q":
return prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queue_index, queue_total)
continue
click.echo(f"{len(search_rows)} search results found:")
print_catalog_rows(search_rows)
choice = click.prompt(
click.style("selection", fg=PROMPT_COLOR),
type=click.IntRange(1, len(search_rows)),
)
chosen_row = search_rows[choice - 1]
notes = click.prompt(click.style("link notes", fg=PROMPT_COLOR), default="", show_default=False)
return {
"normalized_item_id": queue_row["normalized_item_id"],
"catalog_id": chosen_row["catalog_id"],
"resolution_action": "link",
"status": "approved",
"resolution_notes": notes,
"reviewed_at": str(date.today()),
}, None
if action == "x":
notes = click.prompt(click.style("exclude notes", fg=PROMPT_COLOR), default="", show_default=False)
return {
@@ -364,45 +449,6 @@ def prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queu
"resolution_notes": notes,
"reviewed_at": str(date.today()),
}, None
if action == "l":
display_rows = suggestions or [
{
"catalog_id": row["catalog_id"],
"catalog_name": row["catalog_name"],
"reason": "catalog sample",
"product_type": row.get("product_type", ""),
"category": row.get("category", ""),
"linked_normalized_items": 0,
"linked_purchase_rows": 0,
}
for row in catalog_rows[:10]
if row.get("catalog_id")
]
while True:
catalog_id, outcome = choose_existing_catalog(display_rows, normalized_name, matched_count)
if outcome == "skip":
return {
"normalized_item_id": queue_row["normalized_item_id"],
"catalog_id": "",
"resolution_action": "skip",
"status": "pending",
"resolution_notes": queue_row.get("resolution_notes", ""),
"reviewed_at": str(date.today()),
}, None
if outcome == "quit":
return None, None
if outcome == "back":
continue
break
notes = click.prompt(click.style("link notes", fg=PROMPT_COLOR), default="", show_default=False)
return {
"normalized_item_id": queue_row["normalized_item_id"],
"catalog_id": catalog_id,
"resolution_action": "link",
"status": "approved",
"resolution_notes": notes,
"reviewed_at": str(date.today()),
}, None
if action != "n":
click.secho("invalid action", fg=WARNING_COLOR)
return prompt_resolution(queue_row, related_rows, purchase_rows, catalog_rows, queue_index, queue_total)