import logging
import time

from common.embedding_service import create_embedding_async

logger = logging.getLogger(__name__)


def _escape(val: str) -> str:
    """Thoát dấu nháy đơn để tránh SQL Injection cơ bản."""
    return val.replace("'", "''")


def _get_where_clauses(params) -> list[str]:
    """Xây dựng danh sách các điều kiện lọc từ params."""
    clauses = []
    clauses.extend(_get_price_clauses(params))
    clauses.extend(_get_metadata_clauses(params))
    clauses.extend(_get_special_clauses(params))
    return clauses


def _get_price_clauses(params) -> list[str]:
    """Lọc theo giá."""
    clauses = []
    p_min = getattr(params, "price_min", None)
    if p_min is not None:
        clauses.append(f"sale_price >= {p_min}")
    p_max = getattr(params, "price_max", None)
    if p_max is not None:
        clauses.append(f"sale_price <= {p_max}")
    return clauses


def _get_metadata_clauses(params) -> list[str]:
    """Xây dựng điều kiện lọc từ metadata (Phối hợp Exact và Partial)."""
    clauses = []

    # 1. Exact Match (Giới tính, Độ tuổi) - Các trường này cần độ chính xác tuyệt đối
    exact_fields = [
        ("gender_by_product", "gender_by_product"),
        ("age_by_product", "age_by_product"),
    ]
    for param_name, col_name in exact_fields:
        val = getattr(params, param_name, None)
        if val:
            clauses.append(f"{col_name} = '{_escape(val)}'")

    # 2. Partial Match (LIKE) - Giúp map text linh hoạt hơn (Chất liệu, Dòng SP, Phong cách...)
    # Cái này giúp map: "Yarn" -> "Yarn - Sợi", "Knit" -> "Knit - Dệt Kim"
    partial_fields = [
        ("season", "season"),
        ("material_group", "material_group"),
        ("product_line_vn", "product_line_vn"),
        ("style", "style"),
        ("fitting", "fitting"),
        ("form_neckline", "form_neckline"),
        ("form_sleeve", "form_sleeve"),
    ]
    for param_name, col_name in partial_fields:
        val = getattr(params, param_name, None)
        if val:
            v = _escape(val).lower()
            # Dùng LOWER + LIKE để cân mọi loại ký tự thừa hoặc hoa/thường
            clauses.append(f"LOWER({col_name}) LIKE '%{v}%'")

    return clauses


def _get_special_clauses(params) -> list[str]:
    """Các trường hợp đặc biệt: Mã sản phẩm, Màu sắc."""
    clauses = []
    # Mã sản phẩm / SKU
    m_code = getattr(params, "magento_ref_code", None)
    if m_code:
        m = _escape(m_code)
        clauses.append(f"(magento_ref_code = '{m}' OR internal_ref_code = '{m}')")

    # Màu sắc
    color = getattr(params, "master_color", None)
    if color:
        c = _escape(color).lower()
        clauses.append(f"(LOWER(master_color) LIKE '%{c}%' OR LOWER(product_color_name) LIKE '%{c}%')")
    return clauses


async def build_starrocks_query(params, query_vector: list[float] | None = None) -> str:
    """
    Build SQL cho Product Search với 2 chiến lược:
    1. CODE SEARCH: Nếu có magento_ref_code → Tìm trực tiếp theo mã (KHÔNG dùng vector)
    2. HYDE SEARCH: Semantic search với HyDE vector (Pure vector approach)
    """

    # ============================================================
    # CASE 1: CODE SEARCH - Tìm theo mã sản phẩm (No Vector)
    # ============================================================
    magento_code = getattr(params, "magento_ref_code", None)
    if magento_code:
        logger.info(f"🎯 [CODE SEARCH] Direct search by code: {magento_code}")
        code = _escape(magento_code)

        # Tìm trực tiếp theo mã + Lọc trùng (GROUP BY internal_ref_code)
        # Tìm chính xác theo mã (Lấy tất cả các bản ghi/màu sắc/size của mã đó)
        sql = f"""
        SELECT 
            internal_ref_code,
            description_text_full,
            sale_price,
            original_price,
            discount_amount,
            1.0 as max_score
        FROM shared_source.magento_product_dimension_with_text_embedding
        WHERE (magento_ref_code = '{code}' OR internal_ref_code = '{code}')
        """

        print("✅ [CODE SEARCH] Query built - No vector search needed!")

        # Ghi log debug query FULL vào Background Task (Không làm chậm Request)
        # asyncio.create_task(save_query_to_log(sql))

        return sql

    # ============================================================
    # CASE 2: HYDE SEARCH - Semantic Vector Search
    # ============================================================
    logger.info("🚀 [HYDE RETRIEVER] Starting semantic vector search...")

    # 1. Lấy Vector từ HyDE (AI-generated hypothetical document)
    query_text = getattr(params, "query", None)

    if query_text and query_vector is None:
        emb_start = time.time()
        query_vector = await create_embedding_async(query_text)
        logger.info(f"⏱️ [TIMER] Single HyDE Embedding: {(time.time() - emb_start) * 1000:.2f}ms")

    if not query_vector:
        logger.warning("⚠️ No vector found, returning empty query.")
        return ""

    v_str = "[" + ",".join(str(v) for v in query_vector) + "]"

    # 2. Build PRICE filter ONLY (chỉ lọc giá, để vector tự semantic search)
    price_clauses = _get_price_clauses(params)
    where_filter = ""
    if price_clauses:
        where_filter = " AND " + " AND ".join(price_clauses)
        logger.info(f"💰 [PRICE FILTER] Applied: {where_filter}")

    # 3. SQL Pure Vector Search + Price Filter Only
    sql = f"""
    WITH top_matches AS (
        SELECT /*+ SET_VAR(ann_params='{{"ef_search":128}}') */
            internal_ref_code,
            product_color_code,
            description_text_full,
            sale_price,
            original_price,
            discount_amount,
            approx_cosine_similarity(vector, {v_str}) as similarity_score
        FROM shared_source.magento_product_dimension_with_text_embedding
        ORDER BY similarity_score DESC
        LIMIT 100
    )
    SELECT 
        internal_ref_code,
        MAX_BY(description_text_full, similarity_score) as description_text_full,
        MAX_BY(sale_price, similarity_score) as sale_price,
        MAX_BY(original_price, similarity_score) as original_price,
        MAX_BY(discount_amount, similarity_score) as discount_amount,
        MAX(similarity_score) as max_score
    FROM top_matches
    WHERE 1=1 {where_filter}
    GROUP BY internal_ref_code
    ORDER BY max_score DESC
    LIMIT 20
    """

    return sql


# ============================================================
# TEMPORARILY COMMENTED OUT - save_query_to_log
# ============================================================
# async def save_query_to_log(sql: str):
#     """Lưu query full vào file hyde_pure_query.txt."""
#     import os
#     log_path = r"D:\cnf\chatbot_canifa\backend\logs\hyde_pure_query.txt"
#     try:
#         log_dir = os.path.dirname(log_path)
#         if not os.path.exists(log_dir):
#             os.makedirs(log_dir)
#         with open(log_path, "w", encoding="utf-8") as f:
#             f.write(sql)
#         print(f"💾 Full Query saved to: {log_path}")
#     except Exception as e:
#         print(f"Save query log failed: {e}")


# ============================================================
# TEMPORARILY COMMENTED OUT - save_preview_to_log
# ============================================================
# async def save_preview_to_log(search_query: str, products: list[dict]):
#     """Lưu kết quả DB trả về vào db_preview.txt (Format đẹp cho AI)."""
#     import os
#     preview_path = r"D:\cnf\chatbot_canifa\backend\logs\db_preview.txt"
#     try:
#         log_dir = os.path.dirname(preview_path)
#         if not os.path.exists(log_dir):
#             os.makedirs(log_dir)
#
#         with open(preview_path, "a", encoding="utf-8") as f:
#             f.write(f"\n{'='*60}\n")
#             f.write(f"⏰ TIME: {time.strftime('%Y-%m-%d %H:%M:%S')}\n")
#             f.write(f"🔍 SEARCH: {search_query}\n")
#             f.write(f"📊 RESULTS COUNT: {len(products)}\n")
#             f.write(f"{'-'*60}\n")
#
#             if not products:
#                 f.write("❌ NO PRODUCTS FOUND\n")
#             else:
#                 for idx, p in enumerate(products[:5], 1):
#                     code = p.get("internal_ref_code", "N/A")
#                     sale = p.get("sale_price", "N/A")
#                     orig = p.get("original_price", "N/A")
#                     disc = p.get("discount_amount", "0")
#                     score = p.get("max_score", p.get("similarity_score", "N/A"))
#                     desc = p.get("description_text_full", "No Description")
#
#                     f.write(f"{idx}. [{code}] Score: {score}\n")
#                     f.write(f"   💰 Price: {sale} (Orig: {orig}, Disc: {disc}%)\n")
#                     f.write(f"   📝 Desc: {desc}\n")
#
#             f.write(f"{'='*60}\n")
#         print(f"💾 DB Preview (Results) saved to: {preview_path}")
#     except Exception as e:
#         print(f"Save preview log failed: {e}")
