"""
Data Retrieval Filters - Module tách biệt cho logic lọc sản phẩm.
Chứa các Maps (Color, Style...) và các hàm lọc Hard/Soft Filter.
"""

import logging
import re

logger = logging.getLogger(__name__)


RE_PRODUCT_LINE_EN = re.compile(r"product_line_en:\s*(.*?)(?:\.(?:\s|$)|$)", re.IGNORECASE)
RE_PRODUCT_LINE_VN = re.compile(r"product_line_vn:\s*(.*?)(?:\.(?:\s|$)|$)", re.IGNORECASE)
RE_MASTER_COLOR = re.compile(r"master_color:\s*(.*?)(?:\.(?:\s|$)|$)", re.IGNORECASE)
RE_FORM_SLEEVE = re.compile(r"form_sleeve:\s*(.*?)(?:\.(?:\s|$)|$)", re.IGNORECASE)
RE_STYLE = re.compile(r"style:\s*(.*?)(?:\.(?:\s|$)|$)", re.IGNORECASE)
RE_FITTING = re.compile(r"fitting:\s*(.*?)(?:\.(?:\s|$)|$)", re.IGNORECASE)
RE_MATERIAL = re.compile(r"material_group:\s*(.*?)(?:\.(?:\s|$)|$)", re.IGNORECASE)
RE_NECKLINE = re.compile(r"form_neckline:\s*(.*?)(?:\.(?:\s|$)|$)", re.IGNORECASE)
RE_SEASON = re.compile(r"season:\s*(.*?)(?:\.(?:\s|$)|$)", re.IGNORECASE)


# Gender mapping
GENDER_MAP = {
    "men": ["men", "nam", "male", "boy"],
    "nam": ["men", "nam", "male"],
    "women": ["women", "nữ", "female", "nu"],
    "nữ": ["women", "nữ", "female", "nu"],
    "boy": ["boy", "bé trai", "be trai"],
    "bé trai": ["boy", "bé trai", "be trai"],
    "girl": ["girl", "bé gái", "be gai"],
    "bé gái": ["girl", "bé gái", "be gai"],
    "unisex": ["unisex"],
}

# Age mapping
AGE_MAP = {
    "adult": ["adult", "người lớn", "nguoi lon"],
    "người lớn": ["adult", "người lớn", "nguoi lon"],
    "kid": ["kid", "trẻ em", "tre em", "kids", "child", "children"],
    "trẻ em": ["kid", "trẻ em", "tre em", "kids"],
}

# Color mapping: Vietnamese -> English alternatives
COLOR_MAP = {
    "nâu": ["nâu", "brown", "khaki", "be", "camel", "chocolate", "coffee"],
    "brown": ["nâu", "brown", "khaki", "be", "camel"],
    "đen": ["đen", "black"],
    "black": ["đen", "black"],
    "trắng": ["trắng", "white", "cream", "kem"],
    "white": ["trắng", "white", "cream"],
    "hồng": ["hồng", "pink", "magenta", "rose"],
    "pink": ["hồng", "pink", "magenta"],
    "xanh": ["xanh", "blue", "green", "navy", "teal"],
    "xanh dương": ["xanh dương", "blue", "navy"],
    "xanh lá": ["xanh lá", "green"],
    "đỏ": ["đỏ", "red", "crimson"],
    "red": ["đỏ", "red"],
    "vàng": ["vàng", "yellow", "gold"],
    "yellow": ["vàng", "yellow"],
    "cam": ["cam", "orange"],
    "orange": ["cam", "orange"],
    "tím": ["tím", "purple", "violet"],
    "purple": ["tím", "purple"],
    "xám": ["xám", "grey", "gray"],
    "grey": ["xám", "grey", "gray"],
    "gray": ["xám", "grey", "gray"],
}

SLEEVE_MAP = {
    "dài tay": ["full length sleeve", "long sleeve", "dài tay", "dai tay"],
    "long sleeve": ["full length sleeve", "long sleeve", "dài tay"],
    "full length sleeve": ["full length sleeve", "long sleeve"],
    "cộc tay": ["short sleeve", "cộc tay", "coc tay", "ngắn tay"],
    "short sleeve": ["short sleeve", "cộc tay", "ngắn tay"],
    "ngắn tay": ["short sleeve", "cộc tay", "ngắn tay"],
    "sát nách": ["sleeveless", "sát nách", "sat nach", "tank"],
    "sleeveless": ["sleeveless", "sát nách", "tank"],
}

STYLE_MAP = {
    "minimalist": ["minimalist", "minimal", "đơn giản"],
    "đơn giản": ["minimalist", "minimal", "đơn giản"],
    "classic": ["classic", "cổ điển"],
    "cổ điển": ["classic", "cổ điển"],
    "basic": ["basic", "cơ bản"],
    "cơ bản": ["basic", "cơ bản"],
    "sporty": ["sporty", "thể thao", "sport"],
    "thể thao": ["sporty", "thể thao", "sport"],
    "elegant": ["elegant", "thanh lịch"],
    "thanh lịch": ["elegant", "thanh lịch"],
    "casual": ["casual", "thường ngày"],
    "thường ngày": ["casual", "thường ngày"],
    "feminine": ["feminine", "nữ tính"],
    "nữ tính": ["feminine", "nữ tính"],
}

FITTING_MAP = {
    "slim": ["slim", "ôm", "slim fit"],
    "ôm": ["slim", "ôm", "slim fit"],
    "regular": ["regular", "vừa", "regular fit"],
    "vừa": ["regular", "vừa", "regular fit"],
    "loose": ["loose", "rộng", "oversize"],
    "rộng": ["loose", "rộng", "oversize"],
    "oversize": ["loose", "rộng", "oversize"],
}

NECKLINE_MAP = {
    "cổ tròn": ["round neck", "cổ tròn", "crew neck"],
    "round neck": ["round neck", "cổ tròn", "crew neck"],
    "cổ v": ["v-neck", "cổ v", "v neck"],
    "v-neck": ["v-neck", "cổ v", "v neck"],
    "cổ tim": ["v-neck", "cổ tim", "sweetheart"],
    "cổ polo": ["polo collar", "cổ polo", "polo"],
    "polo": ["polo collar", "cổ polo", "polo"],
    "cổ lọ": ["turtle neck", "cổ lọ", "turtleneck"],
    "turtle neck": ["turtle neck", "cổ lọ", "turtleneck"],
    "cổ sơ mi": ["shirt collar", "cổ sơ mi", "collar"],
}

MATERIAL_MAP = {
    "cotton": ["cotton", "100% cotton", "cotton blend"],
    "polyester": ["polyester", "polyester blend", "poly"],
    "len": ["wool", "len", "cashmere"],
    "wool": ["wool", "len", "cashmere"],
    "lụa": ["silk", "lụa"],
    "silk": ["silk", "lụa"],
    "jean": ["denim", "jean", "jeans"],
    "denim": ["denim", "jean", "jeans"],
    "kaki": ["khaki", "kaki", "chino"],
    "khaki": ["khaki", "kaki", "chino"],
}

SEASON_MAP = {
    "thu đông": ["fall winter", "thu đông", "autumn winter", "fw"],
    "fall winter": ["fall winter", "thu đông", "autumn winter"],
    "xuân hè": ["spring summer", "xuân hè", "ss"],
    "spring summer": ["spring summer", "xuân hè", "ss"],
    "year": ["year", "all season", "cả năm"],
    "cả năm": ["year", "all season", "cả năm"],
}


# ==============================================================================
# 2. HARD FILTERS (Must match exactly)
# ==============================================================================


def filter_by_gender(products: list[dict], requested_gender: str) -> list[dict]:
    """Post-filter products by gender."""
    if not requested_gender:
        return products

    requested_lower = requested_gender.lower().strip()
    acceptable_genders = GENDER_MAP.get(requested_lower, [requested_lower])

    # Pre-compile gender patterns
    patterns = [f"gender_by_product: {g}" for g in acceptable_genders]

    filtered = []
    for p in products:
        desc = p.get("description_text_full", "").lower()
        if any(pattern in desc for pattern in patterns):
            filtered.append(p)

    if not filtered:
        logger.warning("🚫 No products match gender '%s'", requested_gender)

    return filtered


def filter_by_age(products: list[dict], requested_age: str) -> list[dict]:
    """Post-filter products by age group."""
    if not requested_age:
        return products

    requested_lower = requested_age.lower().strip()
    acceptable_ages = AGE_MAP.get(requested_lower, [requested_lower])

    # Pre-compile age patterns
    patterns = [f"age_by_product: {a}" for a in acceptable_ages]

    filtered = []
    for p in products:
        desc = p.get("description_text_full", "").lower()
        if any(pattern in desc for pattern in patterns):
            filtered.append(p)

    if not filtered:
        logger.warning("🚫 No products match age '%s'", requested_age)

    return filtered


def filter_by_product_name(products: list[dict], requested_product_name: str) -> list[dict]:
    """
    Post-filter products by product category (HARD filter).
    Uses `product_line_en` or `product_line_vn`.
    """
    if not requested_product_name:
        return products

    requested_lower = requested_product_name.lower().strip()

    # Map user search term -> acceptable product_line values
    product_map = {
        # Chân váy / Skirt
        "chân váy": ["skirt", "chân váy", "chan vay"],
        "chan vay": ["skirt", "chân váy", "chan vay"],
        "skirt": ["skirt", "chân váy"],
        # Váy / Dress (including compound terms AI might generate)
        "váy": ["dress", "váy", "vay", "đầm", "dam"],
        "vay": ["dress", "váy", "vay"],
        "dress": ["dress", "váy", "đầm"],
        "đầm": ["dress", "đầm", "dam", "váy"],
        "dam": ["dress", "đầm", "dam"],
        "váy liền thân": ["dress", "váy", "vay", "đầm", "dam"],  # AI often generates this
        "vay lien than": ["dress", "váy", "vay", "đầm", "dam"],
        "đầm liền thân": ["dress", "váy", "vay", "đầm", "dam"],
        "dam lien than": ["dress", "váy", "vay", "đầm", "dam"],
        "váy liền": ["dress", "váy", "vay", "đầm", "dam"],
        "vay lien": ["dress", "váy", "vay", "đầm", "dam"],
        "váy đầm": ["dress", "váy", "vay", "đầm", "dam"],  # AI sometimes generates this
        "vay dam": ["dress", "váy", "vay", "đầm", "dam"],
        "váy bé gái": ["dress", "váy", "vay", "đầm", "dam"],  # Kids dresses
        "vay be gai": ["dress", "váy", "vay", "đầm", "dam"],
        "chân váy bé gái": ["skirt", "chân váy", "chan vay"],
        "chan vay be gai": ["skirt", "chân váy", "chan vay"],
        # Áo / Shirt / Top
        "áo": ["shirt", "top", "polo", "t-shirt", "blouse", "áo", "ao"],
        "ao": ["shirt", "top", "áo", "ao"],
        "áo sơ mi": ["shirt", "áo sơ mi", "ao so mi"],
        "ao so mi": ["shirt", "áo sơ mi"],
        "áo thun": ["t-shirt", "tee", "áo thun", "ao thun"],
        "ao thun": ["t-shirt", "áo thun"],
        "áo polo": ["polo", "áo polo", "ao polo"],
        "ao polo": ["polo", "áo polo"],
        "áo khoác": ["jacket", "coat", "áo khoác", "ao khoac", "outerwear"],
        "ao khoac": ["jacket", "coat", "áo khoác"],
        "áo len": ["sweater", "knit", "knitwear", "áo len", "ao len"],
        "ao len": ["sweater", "knit", "áo len"],
        "áo hoodie": ["hoodie", "áo hoodie"],
        "hoodie": ["hoodie", "áo hoodie"],
        "shirt": ["shirt", "áo", "ao"],
        "t-shirt": ["t-shirt", "tee", "áo thun"],
        "top": ["top", "áo", "blouse"],
        # Quần / Pants
        "quần": ["pants", "trousers", "shorts", "jeans", "quần", "quan"],
        "quan": ["pants", "trousers", "quần", "quan"],
        "quần jeans": ["jeans", "denim", "quần jeans", "quan jeans"],
        "quan jeans": ["jeans", "quần jeans"],
        "jeans": ["jeans", "denim", "quần jeans"],
        "quần short": ["shorts", "quần short", "quan short"],
        "quan short": ["shorts", "quần short"],
        "shorts": ["shorts", "quần short"],
        "quần dài": ["pants", "trousers", "quần dài", "quan dai"],
        "quan dai": ["pants", "trousers", "quần dài"],
        "pants": ["pants", "trousers", "quần"],
        "trousers": ["pants", "trousers", "quần"],
        # Bộ / Set
        "bộ": ["set", "bộ", "bo", "bộ quần áo"],
        "bo": ["set", "bộ", "bo"],
        "bộ quần áo": ["set", "bộ quần áo", "bo quan ao"],
        "bo quan ao": ["set", "bộ quần áo"],
        "set": ["set", "bộ"],
        # Phụ kiện / Accessories
        "phụ kiện": ["accessory", "accessories", "phụ kiện", "phu kien"],
        "phu kien": ["accessory", "phụ kiện"],
        "accessory": ["accessory", "accessories", "phụ kiện"],
        "túi": ["bag", "túi", "tui"],
        "tui": ["bag", "túi"],
        "bag": ["bag", "túi"],
        "mũ": ["hat", "cap", "mũ", "mu", "nón"],
        "mu": ["hat", "cap", "mũ"],
        "hat": ["hat", "cap", "mũ"],
        "cap": ["cap", "hat", "mũ"],
        "khăn": ["scarf", "khăn", "khan"],
        "khan": ["scarf", "khăn"],
        "scarf": ["scarf", "khăn"],
        # Tất / Socks
        "tất": ["socks", "tất", "tat"],
        "tat": ["socks", "tất"],
        "socks": ["socks", "tất"],
    }

    acceptable_values = product_map.get(requested_lower, [requested_lower])
    # Convert to set for O(1) lookup (faster than list.in for many items)
    acceptable_set = set(acceptable_values)

    filtered = []
    for p in products:
        desc = p.get("description_text_full", "")

        # Use pre-compiled regex (faster than re.search with string pattern)
        match_en = RE_PRODUCT_LINE_EN.search(desc)
        product_line_en = match_en.group(1).strip().lower() if match_en else ""

        match_vn = RE_PRODUCT_LINE_VN.search(desc)
        product_line_vn = match_vn.group(1).strip().lower() if match_vn else ""

        # Check if either product_line matches any acceptable value (set lookup)
        if any(value in product_line_en for value in acceptable_set) or any(
            value in product_line_vn for value in acceptable_set
        ):
            filtered.append(p)
        else:
            logger.debug(
                "❌ Product line '%s'/'%s' does NOT match any of %s",
                product_line_en[:20],
                product_line_vn[:20],
                list(acceptable_set)[:3],
            )

    if not filtered:
        logger.warning(
            "🚫 No products match product_line '%s' (acceptable: %s)", requested_product_name, acceptable_values[:3]
        )
    else:
        logger.info(
            "✅ Product line filter: %s/%s products matched for '%s'",
            len(filtered),
            len(products),
            requested_product_name,
        )

    return filtered


# ==============================================================================
# 3. SOFT FILTERS (Priority fallback)
# ==============================================================================


def _filter_single_value(products: list[dict], value: str, field_name: str, value_map: dict) -> list[dict]:
    """Generic helper: Filter products by a single value for a given field."""
    if not value:
        return products

    value_lower = value.lower().strip()
    acceptable_values = value_map.get(value_lower, [value_lower])

    # Pre-compile search patterns to avoid f-string creation in loop
    patterns = [f"{field_name}: {v}" for v in acceptable_values]
    patterns_no_space = [f"{field_name}:{v}" for v in acceptable_values]

    filtered = []
    for p in products:
        desc = p.get("description_text_full", "").lower()  # ⚡ Lowercase once per product
        # Check patterns (faster short-circuit than inner loop)
        if any(pattern in desc for pattern in patterns) or any(pattern in desc for pattern in patterns_no_space):
            filtered.append(p)

    return filtered


def filter_with_priority(
    products: list[dict], requested_values, field_name: str, value_map: dict, display_name: str
) -> tuple[list[dict], dict]:
    """
    Generic priority-based filter for any SOFT filter field.
    First value in list is highest priority. If no match, try next value (fallback).
    ⚡ OPTIMIZED: Single-pass filtering + early exit
    """
    if isinstance(requested_values, str):
        value_list = [requested_values]
    elif isinstance(requested_values, list):
        value_list = requested_values
    else:
        return products, {"fallback_used": False}

    if not value_list:
        return products, {"fallback_used": False}

    primary_value = value_list[0]

    # ⚡ OPTIMIZATION: Pre-compile all acceptable values from mapping
    # This avoids redundant lookups in _filter_single_value
    value_lower = primary_value.lower().strip()
    primary_acceptable = value_map.get(value_lower, [value_lower])
    primary_patterns = [f"{field_name}: {v}" for v in primary_acceptable]
    primary_patterns_no_space = [f"{field_name}:{v}" for v in primary_acceptable]

    # First pass: Try primary value (highest priority)
    filtered = []
    for p in products:
        desc = p.get("description_text_full", "").lower()
        if any(pattern in desc for pattern in primary_patterns) or any(
            pattern in desc for pattern in primary_patterns_no_space
        ):
            filtered.append(p)

    if filtered:
        return filtered, {
            "requested_value": primary_value,
            "matched_value": primary_value,
            "fallback_used": False,
            "message": None,
        }

    # Fallback: Try remaining values only if primary fails
    for i, fallback_value in enumerate(value_list[1:], start=1):
        fallback_lower = fallback_value.lower().strip()
        fallback_acceptable = value_map.get(fallback_lower, [fallback_value])
        fallback_patterns = [f"{field_name}: {v}" for v in fallback_acceptable]
        fallback_patterns_no_space = [f"{field_name}:{v}" for v in fallback_acceptable]

        filtered = []
        for p in products:
            desc = p.get("description_text_full", "").lower()
            if any(pattern in desc for pattern in fallback_patterns) or any(
                pattern in desc for pattern in fallback_patterns_no_space
            ):
                filtered.append(p)

        if filtered:
            return filtered, {
                "requested_value": primary_value,
                "matched_value": fallback_value,
                "fallback_used": True,
                "message": f"Không có {display_name.lower()} {primary_value}, đã tìm {display_name.lower()} {fallback_value} thay thế.",
            }

    logger.warning("🚫 No products match any %s in %s", field_name, value_list)

    # Build clear recommendation message
    fallback_options = ", ".join(value_list[1:]) if len(value_list) > 1 else "không có"
    recommendation_msg = (
        f"⚠️ Shop hiện chưa có {display_name.lower()} '{primary_value}'. "
        f"Đề xuất {display_name.lower()} khác: {fallback_options}. "
        f"Bạn có muốn xem những {display_name.lower()} này không?"
    )

    return [], {
        "requested_value": primary_value,
        "matched_value": None,
        "fallback_used": True,
        "message": recommendation_msg,
        "recommendation": "suggest_alternatives",
    }


def format_product_results(products: list[dict]) -> list[dict]:
    """
    Smart format:
    - 1 variant (SKU) → Flat item
    - Multiple variants → Grouped item with variants list
    """
    max_products = 15
    grouped: dict[str, dict] = {}  # {product_id: {product_info + variants}}

    for p in products:
        desc_full = p.get("description_text_full", "")
        parsed = parse_description_text(desc_full)

        original_price = p.get("original_price") or 0
        sale_price = p.get("sale_price") or 0
        sku = p.get("internal_ref_code")

        if not sku:
            continue

        # Extract product_id từ SKU (6TW25W005-SK010 → 6TW25W005)
        product_id = p.get("magento_ref_code") or sku.split("-")[0]
        color_code = p.get("product_color_code", "")
        color_name = parsed.get("master_color", "")

        # Tạo product entry nếu chưa có
        if product_id not in grouped:
            grouped[product_id] = {"product_id": product_id, "name": parsed.get("product_name", ""), "variants": []}

        # Thêm variant (màu sắc + giá)
        variant_label = f"{color_code} ({color_name})" if color_name else color_code
        grouped[product_id]["variants"].append(
            {
                "sku": sku,
                "color_code": color_code,  # Added for dedup logic
                "color": variant_label,
                "price": int(original_price),
                "sale_price": int(sale_price),
                "url": parsed.get("product_web_url", ""),
                "thumbnail_image_url": parsed.get("product_image_url_thumbnail", ""),
            }
        )

    # Smart format: 1 variant → flat, multiple variants → grouped
    formatted = []
    product_count = 0

    for product_data in list(grouped.values()):
        if product_count >= max_products:
            break

        raw_variants = product_data["variants"]
        unique_variants_map = {}
        
        for v in raw_variants:
            # Logic: If SKU is same as ProductID (Base Code) -> Try use Color Code as SKU
            # This handles cases where DB returns BaseCode for all color variants
            v_sku = v["sku"]
            v_color_code = v.get("color_code")
            
            if v_sku == product_data["product_id"] and v_color_code:
                 v_sku = v_color_code
            
            # Update SKU in variant object
            v["sku"] = v_sku
            
            # Dedup by Final SKU
            if v_sku not in unique_variants_map:
                unique_variants_map[v_sku] = v
        
        cleaned_variants = list(unique_variants_map.values())
        all_skus = list(unique_variants_map.keys())

        # 1 variant → Flat format (simple)
        if len(cleaned_variants) == 1:
            v = cleaned_variants[0]
            formatted.append(
                {
                    "sku": v["sku"],
                    "name": product_data["name"],
                    "color": v["color"],
                    "price": v["price"],
                    "sale_price": v["sale_price"],
                    "url": v["url"],
                    "thumbnail_image_url": v["thumbnail_image_url"],
                }
            )
        # Multiple variants → Grouped format (easy to browse)
        else:
            formatted.append(
                {
                    "product_id": product_data["product_id"],
                    "name": product_data["name"],
                    "variants": cleaned_variants,
                    "all_skus": all_skus,
                    "primary_sku": all_skus[0],
                }
            )

        product_count += 1

    logger.info(
        f"📦 Formatted {len(formatted)} products (flat={sum(1 for f in formatted if 'sku' in f and 'product_id' not in f)}, grouped={sum(1 for f in formatted if 'product_id' in f)})"
    )

    logger.info(f"📦 Formatted {len(formatted)} product variants with SKU")
    return formatted


def parse_description_text(desc: str) -> dict:
    """
    Parse description_text_full thành dict các field.
    Format: "product_name: X. master_color: Y. product_web_url: https://canifa.com/... ..."
    """
    result = {}
    if not desc:
        return result

    # Extract product_name: từ đầu đến ". master_color:" hoặc ". product_image_url:"
    name_match = re.search(r"product_name:\s*(.+?)\.(?:\s+master_color:|$)", desc)
    if name_match:
        result["product_name"] = name_match.group(1).strip()

    # Extract product_image_url_thumbnail: từ field name đến ". product_web_url:"
    thumb_match = re.search(r"product_image_url_thumbnail:\s*(https?://[^\s]+?)\.(?:\s+product_web_url:|$)", desc)
    if thumb_match:
        result["product_image_url_thumbnail"] = thumb_match.group(1).strip()

    # Extract product_web_url: từ field name đến ". description_text:"
    url_match = re.search(r"product_web_url:\s*(https?://[^\s]+?)\.(?:\s+description_text:|$)", desc)
    if url_match:
        result["product_web_url"] = url_match.group(1).strip()

    # Extract master_color: từ field name đến ". product_image_url:"
    color_match = re.search(r"master_color:\s*(.+?)\.(?:\s+product_image_url:|$)", desc)
    if color_match:
        result["master_color"] = color_match.group(1).strip()

    return result
