"""
HYBRID SEARCH
═════════════

Kết hợp Semantic (Vector) Search + Keyword Filtering

Flow:
1. Thêm prefix "description_text: " vào query (match format với DB)
2. Embed prefixed query → Vector
3. Vector search trong StarRocks (HNSW Index) → TOP N candidates
4. Parse keywords từ query gốc → Build filters
5. Filter candidates → Return kết quả cuối

Usage:
    results = await hybrid_search("áo mùa đông cho bé gái", limit=20)
"""

import logging
import time
from typing import Optional

from common.embedding_service import create_embedding_async
from common.starrocks_connection import StarRocksConnection
from .query_parser import parse_query, build_where_clause, get_matched_keywords

logger = logging.getLogger(__name__)

# Prefix để match format với data đã embed trong DB
QUERY_PREFIX = "description_text: "

# Số lượng candidates lấy từ vector search (trước khi filter)
DEFAULT_CANDIDATE_LIMIT = 200

# StarRocks connection
_db = StarRocksConnection()


async def hybrid_search(
    query: str,
    limit: int = 20,
    candidate_limit: int = DEFAULT_CANDIDATE_LIMIT,
    price_min: Optional[float] = None,
    price_max: Optional[float] = None,
    ef_search: int = 128,
) -> dict:
    """
    Hybrid Search: Vector Search + Keyword Filter
    
    Args:
        query: User query string (VD: "áo mùa đông cho bé gái")
        limit: Số kết quả trả về (default: 20)
        candidate_limit: Số candidates từ vector search (default: 200)
        price_min: Giá tối thiểu (optional)
        price_max: Giá tối đa (optional)
        ef_search: HNSW ef_search parameter (default: 128)
    
    Returns:
        dict: {
            "status": "success",
            "query": original query,
            "prefixed_query": query with prefix,
            "matched_keywords": list of matched keywords,
            "filters_applied": dict of filters,
            "process_time": time in seconds,
            "results": list of products
        }
    """
    start_time = time.time()
    
    try:
        # ═══════════════════════════════════════════════════════════════════
        # STEP 1: Prefix query để match format với DB
        # ═══════════════════════════════════════════════════════════════════
        prefixed_query = QUERY_PREFIX + query.strip()
        logger.info(f"Prefixed query: '{prefixed_query}'")
        
        # ═══════════════════════════════════════════════════════════════════
        # STEP 2: Embed prefixed query
        # ═══════════════════════════════════════════════════════════════════
        embed_start = time.time()
        query_vector = await create_embedding_async(prefixed_query)
        
        if not query_vector:
            logger.error("Failed to create embedding for query")
            return {
                "status": "error",
                "message": "Failed to create embedding",
                "results": []
            }
        
        v_str = "[" + ",".join(map(str, query_vector)) + "]"
        logger.info(f"Embedding took {time.time() - embed_start:.3f}s")
        
        # ═══════════════════════════════════════════════════════════════════
        # STEP 3: Parse keywords từ query GỐC (không có prefix)
        # ═══════════════════════════════════════════════════════════════════
        filters = parse_query(query)
        where_clause = build_where_clause(filters)
        matched_keywords = get_matched_keywords(query)
        
        logger.info(f"Matched keywords: {matched_keywords}")
        logger.info(f"Filters: {filters}")
        
        # ═══════════════════════════════════════════════════════════════════
        # STEP 4: Add price filter nếu có
        # ═══════════════════════════════════════════════════════════════════
        price_conditions = []
        if price_min is not None and price_min > 0:
            price_conditions.append(f"sale_price >= {int(price_min)}")
        if price_max is not None and price_max > 0:
            price_conditions.append(f"sale_price <= {int(price_max)}")
        
        # Combine với keyword filters
        if price_conditions:
            if where_clause:
                where_clause += " AND " + " AND ".join(price_conditions)
            else:
                where_clause = "WHERE " + " AND ".join(price_conditions)
        
        # ═══════════════════════════════════════════════════════════════════
        # STEP 5: Build SQL Query
        # Vector search TRƯỚC (trong CTE) → Keyword filter SAU (outer query)
        # ═══════════════════════════════════════════════════════════════════
        sql = f"""
        WITH semantic_candidates AS (
            SELECT 
                /*+ SET_VAR(ann_params='{{"ef_search":{ef_search}}}') */
                internal_ref_code,
                product_name,
                description_text,
                product_image_url,
                product_web_url,
                sale_price,
                original_price,
                discount_amount,
                master_color,
                season,
                gender_by_product,
                age_by_product,
                product_line_vn,
                style,
                approx_cosine_similarity(vector, {v_str}) as similarity_score
            FROM shared_source.magento_product_dimension_with_text_embedding
            ORDER BY similarity_score DESC
            LIMIT {candidate_limit}
        )
        SELECT 
            internal_ref_code,
            MAX_BY(product_name, similarity_score) as product_name,
            MAX_BY(description_text, similarity_score) as description_text,
            MAX_BY(product_image_url, similarity_score) as product_image_url,
            MAX_BY(product_web_url, similarity_score) as product_web_url,
            MAX_BY(sale_price, similarity_score) as sale_price,
            MAX_BY(original_price, similarity_score) as original_price,
            MAX_BY(discount_amount, similarity_score) as discount_amount,
            GROUP_CONCAT(DISTINCT master_color ORDER BY master_color SEPARATOR ', ') as available_colors,
            MAX_BY(season, similarity_score) as season,
            MAX_BY(gender_by_product, similarity_score) as gender_by_product,
            MAX_BY(age_by_product, similarity_score) as age_by_product,
            MAX_BY(product_line_vn, similarity_score) as product_line_vn,
            MAX_BY(style, similarity_score) as style,
            MAX(similarity_score) as similarity_score
        FROM semantic_candidates
        {where_clause}
        GROUP BY internal_ref_code
        ORDER BY similarity_score DESC
        LIMIT {limit}
        """
        
        logger.info(f"SQL Query:\n{sql}")
        
        # ═══════════════════════════════════════════════════════════════════
        # STEP 6: Execute query
        # ═══════════════════════════════════════════════════════════════════
        db_start = time.time()
        results = await _db.execute_query_async(sql)
        logger.info(f"DB query took {time.time() - db_start:.3f}s, returned {len(results)} results")
        
        process_time = time.time() - start_time
        
        return {
            "status": "success",
            "query": query,
            "prefixed_query": prefixed_query,
            "matched_keywords": matched_keywords,
            "filters_applied": filters,
            "candidate_limit": candidate_limit,
            "result_count": len(results),
            "process_time": f"{process_time:.3f}s",
            "results": results
        }
        
    except Exception as e:
        logger.error(f"Hybrid search error: {e}", exc_info=True)
        return {
            "status": "error",
            "message": str(e),
            "results": []
        }


async def semantic_only_search(
    query: str,
    limit: int = 20,
    ef_search: int = 128,
) -> dict:
    """
    Semantic search only (không có keyword filter)
    Dùng khi muốn test pure vector search
    """
    start_time = time.time()
    
    prefixed_query = QUERY_PREFIX + query.strip()
    query_vector = await create_embedding_async(prefixed_query)
    
    if not query_vector:
        return {"status": "error", "message": "Failed to create embedding", "results": []}
    
    v_str = "[" + ",".join(map(str, query_vector)) + "]"
    
    sql = f"""
    SELECT 
        /*+ SET_VAR(ann_params='{{"ef_search":{ef_search}}}') */
        internal_ref_code,
        product_name,
        description_text,
        product_image_url,
        sale_price,
        master_color,
        season,
        gender_by_product,
        age_by_product,
        product_line_vn,
        approx_cosine_similarity(vector, {v_str}) as similarity_score
    FROM shared_source.magento_product_dimension_with_text_embedding
    ORDER BY similarity_score DESC
    LIMIT {limit}
    """
    
    results = await _db.execute_query_async(sql)
    
    return {
        "status": "success",
        "query": query,
        "prefixed_query": prefixed_query,
        "search_type": "semantic_only",
        "process_time": f"{time.time() - start_time:.3f}s",
        "results": results
    }
