import logging

from common.embedding_service import create_embedding_async

logger = logging.getLogger(__name__)


def _escape(val: str) -> str:
    """Thoát dấu nháy đơn để tránh SQL Injection cơ bản."""
    return val.replace("'", "''")


def _get_where_clauses(params) -> list[str]:
    """Xây dựng danh sách các điều kiện lọc từ params."""
    clauses = []
    clauses.extend(_get_price_clauses(params))
    clauses.extend(_get_metadata_clauses(params))
    clauses.extend(_get_special_clauses(params))
    return clauses


def _get_price_clauses(params) -> list[str]:
    """Lọc theo giá."""
    clauses = []
    p_min = getattr(params, "price_min", None)
    if p_min is not None:
        clauses.append(f"sale_price >= {p_min}")
    p_max = getattr(params, "price_max", None)
    if p_max is not None:
        clauses.append(f"sale_price <= {p_max}")
    return clauses


def _get_metadata_clauses(params) -> list[str]:
    """Xây dựng điều kiện lọc từ metadata (Phối hợp Exact và Partial)."""
    clauses = []
    
    # 1. Exact Match (Giới tính, Độ tuổi) - Các trường này cần độ chính xác tuyệt đối
    exact_fields = [
        ("gender_by_product", "gender_by_product"),
        ("age_by_product", "age_by_product"),
    ]
    for param_name, col_name in exact_fields:
        val = getattr(params, param_name, None)
        if val:
            clauses.append(f"{col_name} = '{_escape(val)}'")

    # 2. Partial Match (LIKE) - Giúp map text linh hoạt hơn (Chất liệu, Dòng SP, Phong cách...)
    # Cái này giúp map: "Yarn" -> "Yarn - Sợi", "Knit" -> "Knit - Dệt Kim"
    partial_fields = [
        ("season", "season"),
        ("material_group", "material_group"),
        ("product_line_vn", "product_line_vn"),
        ("style", "style"),
        ("fitting", "fitting"),
        ("form_neckline", "form_neckline"),
        ("form_sleeve", "form_sleeve"),
    ]
    for param_name, col_name in partial_fields:
        val = getattr(params, param_name, None)
        if val:
            v = _escape(val).lower()
            # Dùng LOWER + LIKE để cân mọi loại ký tự thừa hoặc hoa/thường
            clauses.append(f"LOWER({col_name}) LIKE '%{v}%'")
            
    return clauses


def _get_special_clauses(params) -> list[str]:
    """Các trường hợp đặc biệt: Mã sản phẩm, Màu sắc."""
    clauses = []
    # Mã sản phẩm / SKU
    m_code = getattr(params, "magento_ref_code", None)
    if m_code:
        m = _escape(m_code)
        clauses.append(f"(magento_ref_code = '{m}' OR internal_ref_code = '{m}')")

    # Màu sắc
    color = getattr(params, "master_color", None)
    if color:
        c = _escape(color).lower()
        clauses.append(f"(LOWER(master_color) LIKE '%{c}%' OR LOWER(product_color_name) LIKE '%{c}%')")
    return clauses


async def build_starrocks_query(params, query_vector: list[float] | None = None) -> str:
    """
    Build SQL Hybrid tối ưu:
    1. Pre-filtering (Metadata)
    2. Vector Search (HNSW Index)
    3. Grouping (Gom màu theo style)
    """

    # --- Process vector in query field ---
    query_text = getattr(params, "query", None)
    if query_text and query_vector is None:
        query_vector = await create_embedding_async(query_text)

    # --- Build filter clauses ---
    where_clauses = _get_where_clauses(params)
    where_sql = " AND ".join(where_clauses) if where_clauses else "1=1"

    # --- Build SQL ---
    if query_vector and len(query_vector) > 0:
        v_str = "[" + ",".join(str(v) for v in query_vector) + "]"

        # OPTIMIZED: Only SELECT necessary fields in CTE, not SELECT *
        sql = f"""
        WITH top_sku_candidates AS (
            SELECT
                internal_ref_code,
                product_name,
                sale_price,
                original_price,
                master_color,
                product_image_url,
                product_image_url_thumbnail,
                product_web_url,
                description_text,
                material,
                material_group,
                gender_by_product,
                age_by_product,
                season,
                style,
                fitting,
                form_neckline,
                form_sleeve,
                product_line_vn,
                product_color_name,
                cosine_similarity(vector, {v_str}) as similarity_score
            FROM shared_source.magento_product_dimension_with_text_embedding
            WHERE {where_sql} AND vector IS NOT NULL
            ORDER BY similarity_score DESC
            LIMIT 50
        )
        SELECT
            internal_ref_code,
            ANY_VALUE(product_name) as product_name,
            ANY_VALUE(sale_price) as sale_price,
            ANY_VALUE(original_price) as original_price,
            GROUP_CONCAT(DISTINCT master_color ORDER BY master_color SEPARATOR ', ') as available_colors,
            ANY_VALUE(product_image_url) as product_image_url,
            ANY_VALUE(product_image_url_thumbnail) as product_image_url_thumbnail,
            ANY_VALUE(product_web_url) as product_web_url,
            ANY_VALUE(description_text) as description_text,
            ANY_VALUE(material) as material,
            ANY_VALUE(material_group) as material_group,
            ANY_VALUE(gender_by_product) as gender_by_product,
            ANY_VALUE(age_by_product) as age_by_product,
            ANY_VALUE(season) as season,
            ANY_VALUE(style) as style,
            ANY_VALUE(fitting) as fitting,
            ANY_VALUE(form_neckline) as form_neckline,
            ANY_VALUE(form_sleeve) as form_sleeve,
            ANY_VALUE(product_line_vn) as product_line_vn,
            MAX(similarity_score) as max_score
        FROM top_sku_candidates
        GROUP BY internal_ref_code
        ORDER BY max_score DESC
        LIMIT 10
        """  # noqa: S608
    else:
        # FALLBACK: Keyword search - MAXIMALLY OPTIMIZED (No CTE overhead)
        keywords = getattr(params, "keywords", None)
        keyword_filter = ""
        if keywords:
            k = _escape(keywords).lower()
            keyword_filter = f" AND LOWER(product_name) LIKE '%{k}%'"

        # Direct query - No CTE needed, StarRocks optimizes GROUP BY internally
        sql = f"""
        SELECT
            internal_ref_code,
            ANY_VALUE(product_name) as product_name,
            ANY_VALUE(sale_price) as sale_price,
            ANY_VALUE(original_price) as original_price,
            GROUP_CONCAT(DISTINCT master_color ORDER BY master_color SEPARATOR ', ') as available_colors,
            ANY_VALUE(product_image_url) as product_image_url,
            ANY_VALUE(product_image_url_thumbnail) as product_image_url_thumbnail,
            ANY_VALUE(product_web_url) as product_web_url,
            ANY_VALUE(description_text) as description_text,
            ANY_VALUE(material) as material,
            ANY_VALUE(material_group) as material_group,
            ANY_VALUE(gender_by_product) as gender_by_product,
            ANY_VALUE(age_by_product) as age_by_product,
            ANY_VALUE(season) as season,
            ANY_VALUE(style) as style,
            ANY_VALUE(fitting) as fitting,
            ANY_VALUE(form_neckline) as form_neckline,
            ANY_VALUE(form_sleeve) as form_sleeve,
            ANY_VALUE(product_line_vn) as product_line_vn
        FROM shared_source.magento_product_dimension_with_text_embedding
        WHERE {where_sql} {keyword_filter}
        GROUP BY internal_ref_code
        HAVING COUNT(*) > 0
        ORDER BY sale_price ASC
        LIMIT 10
        """  # noqa: S608

    logger.info(f"📊 Query Mode: {'Vector' if query_vector else 'Keyword'}")
    return sql
