# import logging

# from common.embedding_service import create_embedding_async

# logger = logging.getLogger(__name__)


# def _escape(val: str) -> str:
#     """Thoát dấu nháy đơn để tránh SQL Injection cơ bản."""
#     return val.replace("'", "''")


# def _get_where_clauses(params) -> list[str]:
#     """
#     Xây dựng WHERE clauses theo thứ tự ưu tiên dựa trên selectivity thực tế

#     FILTER PRIORITY (Based on Canifa catalog analysis):

#     🔥 TIER 1 (99% selectivity):
#        1. SKU Code → 1-5 records

#     🎯 TIER 2 (50-70% selectivity):
#        2. Gender → Splits catalog in half
#        3. Age → Kids vs Adults split
#        4. Product Category → 10-15 categories

#     💎 TIER 3 (30-50% selectivity):
#        5. Material Group → Knit vs Woven (2 groups)
#        6. Price Range → Numeric filtering

#     🎨 TIER 4 (10-30% selectivity):
#        7. Season → 4 seasons
#        8. Style/Fitting → Multiple options

#     ⚠️ TIER 5 (<10% selectivity):
#        9. Form details → Granular attributes
#        10. Color → LOWEST selectivity (many SKUs share colors)

#     Early return: If SKU exists, skip low-selectivity filters
#     """
#     clauses = []

#     # 🔥 TIER 1: SKU/Product Code (Unique identifier)
#     # Selectivity: ~99% → 1 SKU = 1 style (3-5 colors max)
#     sku_clause = _get_sku_clause(params)
#     if sku_clause:
#         clauses.append(sku_clause)

#         # Early return optimization: SKU đã xác định product rõ ràng
#         # CHỈ GIỮ LẠI price filter (nếu có) để verify budget constraint
#         # BỎ QUA: gender, color, style, fitting... vì SKU đã unique

#         price_clauses = _get_price_clauses(params)
#         if price_clauses:
#             clauses.extend(price_clauses)

#         return clauses  # ⚡ STOP - Không thêm filter khác!

#     # 🎯 TIER 2: High-level categorization (50-70% reduction)
#     # Gender + Age + Category có selectivity cao nhất trong non-SKU filters
#     clauses.extend(_get_high_selectivity_clauses(params))

#     # 💎 TIER 3: Material & Price (30-50% reduction)
#     material_clause = _get_material_clause(params)
#     if material_clause:
#         clauses.append(material_clause)
#     clauses.extend(_get_price_clauses(params))

#     # 🎨 TIER 4: Attributes (10-30% reduction)
#     clauses.extend(_get_attribute_clauses(params))

#     # ⚠️ TIER 5: Granular details & Color (LAST - lowest selectivity)
#     clauses.extend(_get_form_detail_clauses(params))
#     color_clause = _get_color_clause(params)
#     if color_clause:
#         clauses.append(color_clause)  # Color ALWAYS LAST!

#     return clauses


# def _get_sku_clause(params) -> str | None:
#     """
#     TIER 1: SKU/Product Code (Highest selectivity - 99%)
#     1 SKU code = 1 product style (may have 3-5 color variants)

#     WHY SKU is always priority #1:
#     - 1 code = 1 unique product design
#     - Adding other filters (color, style, gender) is redundant
#     - Only price filter may be kept for budget validation

#     Example queries:
#     - "Mã 6OT25W010" → Only SKU needed
#     - "Mã 6OT25W010 màu xám" → Only SKU (color is for display/selection, not filtering)
#     - "Mã 6OT25W010 dưới 500k" → SKU + price (validate budget)
#     """
#     m_code = getattr(params, "magento_ref_code", None)
#     if m_code:
#         m = _escape(m_code)
#         return f"(magento_ref_code = '{m}' OR internal_ref_code = '{m}')"
#     return None


# def _get_color_clause(params) -> str | None:
#     """
#     TIER 5: Color (LOWEST selectivity - 5-10%)
#     Multiple SKUs share the same color (e.g., 50+ gray products)
#     ALWAYS filter color LAST after other constraints
#     """
#     color = getattr(params, "master_color", None)
#     if color:
#         c = _escape(color).lower()
#         return f"(LOWER(master_color) LIKE '%{c}%' OR LOWER(product_color_name) LIKE '%{c}%')"
#     return None


# def _get_high_selectivity_clauses(params) -> list[str]:
#     """
#     TIER 2: High-level categorization (50-70% reduction per filter)
#     Order: Gender → Age → Product Category
#     """
#     clauses = []

#     # Gender: Male/Female/Unisex split (50-70% reduction)
#     gender = getattr(params, "gender_by_product", None)
#     if gender:
#         clauses.append(f"gender_by_product = '{_escape(gender)}'")

#     # Age: Kids/Adults split (50% reduction of remaining)
#     age = getattr(params, "age_by_product", None)
#     if age:
#         clauses.append(f"age_by_product = '{_escape(age)}'")

#     # Product Category: Váy/Áo/Quần... (30-50% reduction)
#     product_line = getattr(params, "product_line_vn", None)
#     if product_line:
#         p = _escape(product_line).lower()
#         clauses.append(f"LOWER(product_line_vn) LIKE '%{p}%'")

#     return clauses


# def _get_material_clause(params) -> str | None:
#     """TIER 3: Material Group - Knit vs Woven (50% split)"""
#     material = getattr(params, "material_group", None)
#     if material:
#         m = _escape(material).lower()
#         return f"LOWER(material_group) LIKE '%{m}%'"
#     return None


# def _get_price_clauses(params) -> list[str]:
#     """TIER 3: Price Range - Numeric filtering (30-40% reduction)"""
#     clauses = []
#     p_min = getattr(params, "price_min", None)
#     if p_min is not None:
#         clauses.append(f"sale_price >= {p_min}")
#     p_max = getattr(params, "price_max", None)
#     if p_max is not None:
#         clauses.append(f"sale_price <= {p_max}")
#     return clauses


# def _get_attribute_clauses(params) -> list[str]:
#     """
#     TIER 4: Attributes (10-30% reduction)
#     Season, Style, Fitting
#     """
#     clauses = []

#     # Season: 4 seasons (~25% each)
#     season = getattr(params, "season", None)
#     if season:
#         s = _escape(season).lower()
#         clauses.append(f"LOWER(season) LIKE '%{s}%'")

#     # Style: Basic/Feminine/Sporty... (~15-20% reduction)
#     style = getattr(params, "style", None)
#     if style:
#         st = _escape(style).lower()
#         clauses.append(f"LOWER(style) LIKE '%{st}%'")

#     # Fitting: Regular/Slim/Loose (~15% reduction)
#     fitting = getattr(params, "fitting", None)
#     if fitting:
#         f = _escape(fitting).lower()
#         clauses.append(f"LOWER(fitting) LIKE '%{f}%'")

#     # Size Scale: S, M, L, 29, 30... (Specific filtering)
#     size = getattr(params, "size_scale", None)
#     if size:
#         sz = _escape(size).lower()
#         clauses.append(f"LOWER(size_scale) LIKE '%{sz}%'")

#     return clauses


# def _get_form_detail_clauses(params) -> list[str]:
#     """
#     TIER 5: Granular form details (<10% reduction each)
#     Neckline, Sleeve type
#     """
#     clauses = []

#     form_fields = [
#         ("form_neckline", "form_neckline"),
#         ("form_sleeve", "form_sleeve"),
#     ]

#     for param_name, col_name in form_fields:
#         val = getattr(params, param_name, None)
#         if val:
#             v = _escape(val).lower()
#             clauses.append(f"LOWER({col_name}) LIKE '%{v}%'")

#     return clauses


# async def build_starrocks_query(params, query_vector: list[float] | None = None) -> str:
#     """
#     Build SQL Hybrid tối ưu với Filter Priority:
#     1. Pre-filtering theo độ ưu tiên (SKU → Exact → Price → Partial)
#     2. Vector Search (HNSW Index) - Semantic understanding
#     3. Flexible Keyword Search (OR + Scoring) - Fuzzy matching fallback
#     4. Grouping (Gom màu theo style)
#     """

#     # --- Process vector in query field ---
#     query_text = getattr(params, "query", None)
#     # if query_text and query_vector is None:
#     #     query_vector = await create_embedding_async(query_text)

#     # --- Build filter clauses (OPTIMIZED ORDER) ---
#     where_clauses = _get_where_clauses(params)
#     where_sql = " AND ".join(where_clauses) if where_clauses else "1=1"

#     # --- Build SQL ---
#     if query_vector and len(query_vector) > 0:
#         v_str = "[" + ",".join(str(v) for v in query_vector) + "]"

#         sql = f"""
#         WITH top_sku_candidates AS (
#             SELECT
#                 approx_cosine_similarity(vector, {v_str}) as similarity_score,
#                 internal_ref_code,
#                 product_name,
#                 sale_price,
#                 original_price,
#                 master_color,
#                 product_image_url,
#                 product_image_url_thumbnail,
#                 product_web_url,
#                 description_text,
#                 material,
#                 material_group,
#                 gender_by_product,
#                 age_by_product,
#                 season,
#                 style,
#                 fitting,
#                 form_neckline,
#                 form_sleeve,
#                 product_line_vn,
#                 product_color_name
#             FROM shared_source.magento_product_dimension_with_text_embedding
#             WHERE {where_sql} AND vector IS NOT NULL
#             ORDER BY similarity_score DESC
#             LIMIT 50
#         )
#         SELECT
#             internal_ref_code,
#             ANY_VALUE(product_name) as product_name,
#             ANY_VALUE(sale_price) as sale_price,
#             ANY_VALUE(original_price) as original_price,
#             GROUP_CONCAT(DISTINCT master_color ORDER BY master_color SEPARATOR ', ') as available_colors,
#             ANY_VALUE(product_image_url) as product_image_url,
#             ANY_VALUE(product_image_url_thumbnail) as product_image_url_thumbnail,
#             ANY_VALUE(product_web_url) as product_web_url,
#             ANY_VALUE(description_text) as description_text,
#             ANY_VALUE(material) as material,
#             ANY_VALUE(material_group) as material_group,
#             ANY_VALUE(gender_by_product) as gender_by_product,
#             ANY_VALUE(age_by_product) as age_by_product,
#             ANY_VALUE(season) as season,
#             ANY_VALUE(style) as style,
#             ANY_VALUE(fitting) as fitting,
#             ANY_VALUE(form_neckline) as form_neckline,
#             ANY_VALUE(form_sleeve) as form_sleeve,
#             ANY_VALUE(product_line_vn) as product_line_vn,
#             MAX(similarity_score) as max_score
#         FROM top_sku_candidates
#         GROUP BY internal_ref_code
#         ORDER BY max_score DESC
#         LIMIT 10
#         """
#     else:
#         # ⚡ FALLBACK: FLEXIBLE KEYWORD SEARCH (OR + SCORING)
#         # Giải quyết case: User search "áo khoác nỉ" → DB có "Áo nỉ nam"
#         keywords = getattr(params, "keywords", None)
#         keyword_score_sql = ""
#         keyword_filter = ""

#         if keywords:
#             k_clean = _escape(keywords).lower().strip()
#             if k_clean:
#                 words = k_clean.split()

#                 # Build scoring expression: Each matched word = +1 point
#                 # Example: "áo khoác nỉ" (3 words)
#                 #   - "Áo nỉ nam" matches 2/3 → Score = 2
#                 #   - "Áo khoác nỉ hoodie" matches 3/3 → Score = 3
#                 score_terms = [
#                     f"(CASE WHEN LOWER(product_name) LIKE '%{w}%' THEN 1 ELSE 0 END)"
#                     for w in words
#                 ]
#                 keyword_score_sql = f"({' + '.join(score_terms)}) as keyword_match_score"

#                 # Minimum threshold: At least 50% of words must match
#                 # Example: 3 words → need at least 2 matches (66%)
#                 #          2 words → need at least 1 match (50%)
#                 min_matches = max(1, len(words) // 2)
#                 keyword_filter = f" AND ({' + '.join(score_terms)}) >= {min_matches}"

#         # Select clause with optional scoring
#         select_score = f", {keyword_score_sql}" if keyword_score_sql else ""
#         order_by = "keyword_match_score DESC, sale_price ASC" if keyword_score_sql else "sale_price ASC"

#         sql = f"""
#         SELECT
#             internal_ref_code,
#             ANY_VALUE(product_name) as product_name,
#             ANY_VALUE(sale_price) as sale_price,
#             ANY_VALUE(original_price) as original_price,
#             GROUP_CONCAT(DISTINCT master_color ORDER BY master_color SEPARATOR ', ') as available_colors,
#             ANY_VALUE(product_image_url) as product_image_url,
#             ANY_VALUE(product_image_url_thumbnail) as product_image_url_thumbnail,
#             ANY_VALUE(product_web_url) as product_web_url,
#             ANY_VALUE(description_text) as description_text,
#             ANY_VALUE(material) as material,
#             ANY_VALUE(material_group) as material_group,
#             ANY_VALUE(gender_by_product) as gender_by_product,
#             ANY_VALUE(age_by_product) as age_by_product,
#             ANY_VALUE(season) as season,
#             ANY_VALUE(style) as style,
#             ANY_VALUE(fitting) as fitting,
#             ANY_VALUE(form_neckline) as form_neckline,
#             ANY_VALUE(form_sleeve) as form_sleeve,
#             ANY_VALUE(product_line_vn) as product_line_vn
#             {select_score}
#         FROM shared_source.magento_product_dimension_with_text_embedding
#         WHERE {where_sql} {keyword_filter}
#         GROUP BY internal_ref_code
#         HAVING COUNT(*) > 0
#         ORDER BY {order_by}
#         LIMIT 10
#         """

#     # Log filter statistics
#     filter_info = f"Mode: {'Vector' if query_vector else 'Keyword'}, Filters: {len(where_clauses)}"
#     if where_clauses:
#         # Identify high-priority filters used
#         has_sku = any('internal_ref_code' in c or 'magento_ref_code' in c for c in where_clauses)
#         has_gender = any('gender_by_product' in c for c in where_clauses)
#         has_category = any('product_line_vn' in c for c in where_clauses)

#         priority_info = []
#         if has_sku:
#             priority_info.append("SKU")
#         if has_gender:
#             priority_info.append("Gender")
#         if has_category:
#             priority_info.append("Category")

#         if priority_info:
#             filter_info += f", Priority: {'+'.join(priority_info)}"

#     logger.info(f"📊 {filter_info}")

#     # Write SQL to file for debugging
#     try:
#         with open(r"d:\cnf\chatbot_canifa\backend\embedding.txt", "w", encoding="utf-8") as f:
#             f.write(sql)
#     except Exception as e:
#         logger.error(f"Failed to write SQL to embedding.txt: {e}")

#     return sql


import logging
import time

from common.embedding_service import create_embedding_async

logger = logging.getLogger(__name__)


def _escape(val: str) -> str:
    """Thoát dấu nháy đơn để tránh SQL Injection cơ bản."""
    return val.replace("'", "''")


def _get_where_clauses(params) -> list[str]:
    """Xây dựng danh sách các điều kiện lọc từ params."""
    clauses = []
    clauses.extend(_get_price_clauses(params))
    clauses.extend(_get_metadata_clauses(params))
    clauses.extend(_get_special_clauses(params))
    return clauses


def _get_price_clauses(params) -> list[str]:
    """Lọc theo giá."""
    clauses = []
    p_min = getattr(params, "price_min", None)
    if p_min is not None:
        clauses.append(f"sale_price >= {p_min}")
    p_max = getattr(params, "price_max", None)
    if p_max is not None:
        clauses.append(f"sale_price <= {p_max}")
    return clauses


def _get_metadata_clauses(params) -> list[str]:
    """Xây dựng điều kiện lọc từ metadata (Phối hợp Exact và Partial)."""
    clauses = []

    # 1. Exact Match (Giới tính, Độ tuổi) - Các trường này cần độ chính xác tuyệt đối
    exact_fields = [
        ("gender_by_product", "gender_by_product"),
        ("age_by_product", "age_by_product"),
    ]
    for param_name, col_name in exact_fields:
        val = getattr(params, param_name, None)
        if val:
            clauses.append(f"{col_name} = '{_escape(val)}'")

    # 2. Partial Match (LIKE) - Giúp map text linh hoạt hơn (Chất liệu, Dòng SP, Phong cách...)
    # Cái này giúp map: "Yarn" -> "Yarn - Sợi", "Knit" -> "Knit - Dệt Kim"
    partial_fields = [
        ("season", "season"),
        ("material_group", "material_group"),
        ("product_line_vn", "product_line_vn"),
        ("style", "style"),
        ("fitting", "fitting"),
        ("form_neckline", "form_neckline"),
        ("form_sleeve", "form_sleeve"),
    ]
    for param_name, col_name in partial_fields:
        val = getattr(params, param_name, None)
        if val:
            v = _escape(val).lower()
            # Dùng LOWER + LIKE để cân mọi loại ký tự thừa hoặc hoa/thường
            clauses.append(f"LOWER({col_name}) LIKE '%{v}%'")

    return clauses


def _get_special_clauses(params) -> list[str]:
    """Các trường hợp đặc biệt: Mã sản phẩm, Màu sắc."""
    clauses = []
    # Mã sản phẩm / SKU
    m_code = getattr(params, "magento_ref_code", None)
    if m_code:
        m = _escape(m_code)
        clauses.append(f"(magento_ref_code = '{m}' OR internal_ref_code = '{m}')")

    # Màu sắc
    color = getattr(params, "master_color", None)
    if color:
        c = _escape(color).lower()
        clauses.append(f"(LOWER(master_color) LIKE '%{c}%' OR LOWER(product_color_name) LIKE '%{c}%')")
    return clauses


async def build_starrocks_query(params, query_vector: list[float] | None = None) -> str:
    """
    Build SQL Hybrid tối ưu với POST-FILTERING Strategy & Anti-Duplication.

    🔥 CHIẾN LƯỢC TỐI ƯU:
    1. Vector Search TRƯỚC (LIMIT 100) để tận dụng HNSW Index (tốc độ ~50ms).
    2. JOIN chính xác theo (code + màu) để tránh bùng nổ dữ liệu (Data Explosion).
    3. Dùng MAX_BY để lấy description của đúng thằng có score cao nhất.
    """
    logger.info("🔧 [DEBUG] build_starrocks_query STARTED")

    # --- 1. Xử lý Vector ---
    query_text = getattr(params, "query", None)
    if query_text and query_vector is None:
        emb_start = time.time()
        query_vector = await create_embedding_async(query_text)
        emb_time = (time.time() - emb_start) * 1000
        logger.info(f"⏱️ [TIMER] Embedding Generation: {emb_time:.2f}ms")

    # --- 2. Xây dựng Filter cho POST-FILTERING ---
    where_clauses = _get_where_clauses(params)
    post_filter_sql = " AND ".join(where_clauses) if where_clauses else "1=1"

    # --- 3. Build SQL ---
    if query_vector and len(query_vector) > 0:
        v_str = "[" + ",".join(str(v) for v in query_vector) + "]"

        # Alias các trường trong filter sang bảng t2 để tránh lỗi ambiguous
        post_filter_aliased = post_filter_sql
        fields_to_alias = [
            "sale_price",
            "gender_by_product",
            "age_by_product",
            "material_group",
            "season",
            "style",
            "fitting",
            "form_neckline",
            "form_sleeve",
            "product_line_vn",
            "magento_ref_code",
            "internal_ref_code",
            "master_color",
            "product_color_name",
        ]
        for field in fields_to_alias:
            post_filter_aliased = post_filter_aliased.replace(field, f"t2.{field}")

        sql = f"""
        WITH top_candidates AS (
            SELECT /*+ SET_VAR(ann_params='{{"ef_search":64}}') */
                internal_ref_code,
                product_color_code,
                approx_cosine_similarity(vector, {v_str}) as similarity_score
            FROM shared_source.magento_product_dimension_with_text_embedding
            WHERE vector IS NOT NULL
            ORDER BY similarity_score DESC
            LIMIT 100
        )
        SELECT
            t1.internal_ref_code,
            -- MAX_BY đảm bảo mô tả đi kèm đúng với thằng cao điểm nhất (Data Integrity)
            MAX_BY(t2.description_text_full, t1.similarity_score) as description_text_full,
            MAX(t1.similarity_score) as max_score
        FROM top_candidates t1
        JOIN shared_source.magento_product_dimension_with_text_embedding t2
            ON t1.internal_ref_code = t2.internal_ref_code 
            AND t1.product_color_code = t2.product_color_code -- QUAN TRỌNG: Tránh nhân bản dòng theo màu
        WHERE {post_filter_aliased}
        GROUP BY t1.internal_ref_code
        ORDER BY max_score DESC
        LIMIT 10
        """
    else:
        # FALLBACK: Keyword search
        keywords = getattr(params, "keywords", None)
        k_filter = ""
        if keywords:
            k = _escape(keywords).lower()
            k_filter = f" AND LOWER(product_name) LIKE '%{k}%'"

        where_sql = " AND ".join(where_clauses) if where_clauses else "1=1"

        sql = f"""
        SELECT
            internal_ref_code,
            -- Lấy đại diện 1 mô tả cho keyword search
            MAX(description_text_full) as description_text_full,
            MIN(sale_price) as min_price
        FROM shared_source.magento_product_dimension_with_text_embedding
        WHERE {where_sql} {k_filter}
        GROUP BY internal_ref_code
        ORDER BY min_price ASC
        LIMIT 10
        """

    # --- 4. Ghi Log Debug ---
    try:
        debug_path = r"d:\cnf\chatbot_canifa\backend\query.txt"
        with open(debug_path, "w", encoding="utf-8") as f:
            f.write(sql)
        logger.info(f"💾 SQL saved to: {debug_path}")
    except Exception as e:
        logger.error(f"Save log failed: {e}")

    return sql
