"""
CANIFA Data Retrieval Tool - Tối giản cho Agentic Workflow.
Hỗ trợ Hybrid Search: Semantic (Vector) + Metadata Filter.
"""

import asyncio
import json
import logging
import time
from decimal import Decimal

from langchain_core.tools import tool
from pydantic import BaseModel, Field

from agent.tools.product_search_helpers import build_starrocks_query
from common.embedding_service import create_embeddings_async
from common.starrocks_connection import get_db_connection

# from langsmith import traceable

logger = logging.getLogger(__name__)


class DecimalEncoder(json.JSONEncoder):
    """Xử lý kiểu Decimal từ Database khi convert sang JSON."""

    def default(self, obj):
        if isinstance(obj, Decimal):
            return float(obj)
        return super().default(obj)


class SearchItem(BaseModel):
    """
    Cấu trúc một mục tìm kiếm đơn lẻ trong Multi-Search.

    Lưu ý quan trọng về cách SINH QUERY:
    - Trường `query` KHÔNG phải câu hỏi thô của khách.
    - Phải là một đoạn text có cấu trúc giống hệt format trong cột `description_text_full` của DB,
      ví dụ (chỉ là 1 chuỗi duy nhất, nối các field bằng dấu chấm):

      product_name: Pack 3 đôi tất bé gái cổ thấp. master_color: Xanh da trời/ Blue.
      product_image_url: https://.... product_image_url_thumbnail: https://....
      product_web_url: https://.... description_text: ... material: ...
      material_group: Yarn - Sợi. gender_by_product: female. age_by_product: others.
      season: Year. style: Feminine. fitting: Slim. size_scale: 4/6.
      form_neckline: None. form_sleeve: None. product_line_vn: Tất.
      product_color_name: Blue Strip 449.

    - Khi khách chỉ nói “áo màu hồng”, hãy suy luận và sinh query dạng:

      product_name: Áo thun/áo sơ mi/áo ... màu hồng ... . master_color: Hồng/ Pink.
      product_image_url: None. product_image_url_thumbnail: None.
      product_web_url: None. description_text: ... (mô tả thêm nếu có).
      material: None. material_group: None. gender_by_product: ... (nếu đoán được).
      age_by_product: others. season: Year. style: ... (nếu đoán được).
      fitting: ... size_scale: None. form_neckline: None. form_sleeve: None.
      product_line_vn: Áo. product_color_name: Pink / Hồng (nếu hợp lý).

    - Nếu không suy luận được giá trị cho field nào thì để `None` hoặc bỏ trống phần text đó.
    """
    model_config = {"extra": "forbid"}

    query: str = Field(
        ...,
        description=(
            "ĐOẠN TEXT CÓ CẤU TRÚC theo format của cột description_text_full trong DB, "
            "bao gồm các cặp key: product_name, master_color, product_image_url, "
            "product_image_url_thumbnail, product_web_url, description_text, material, "
            "material_group, gender_by_product, age_by_product, season, style, fitting, "
            "size_scale, form_neckline, form_sleeve, product_line_vn, product_color_name. "
            "Ví dụ: 'product_name: Pack 3 đôi tất bé gái cổ thấp. master_color: Xanh da trời/ Blue. "
            "product_image_url: https://.... product_web_url: https://.... description_text: ... "
            "material: None. material_group: Yarn - Sợi. gender_by_product: female. ...'"
        ),
    )
    magento_ref_code: str | None = Field(
        ..., description="Mã sản phẩm hoặc SKU (Ví dụ: 8TS24W001). CHỈ điền khi khách hỏi mã code cụ thể."
    )
    price_min: float | None = Field(..., description="Giá thấp nhất (VD: 100000)")
    price_max: float | None = Field(..., description="Giá cao nhất (VD: 500000)")
    action: str = Field(..., description="Hành động: 'search' (tìm kiếm) hoặc 'visual_search' (phân tích ảnh)")
    
    # Metadata Fields for Filtering
    # STRICT MODE: All fields must be required. Use ... and str | None
    gender_by_product: str | None = Field(..., description="Giới tính (nam/nữ/bé trai/bé gái)")
    age_by_product: str | None = Field(..., description="Độ tuổi")
    product_name: str | None = Field(..., description="Tên sản phẩm")
    style: str | None = Field(..., description="Phong cách")
    master_color: str | None = Field(..., description="Màu sắc chính")
    season: str | None = Field(..., description="Mùa")
    material_group: str | None = Field(..., description="Nhóm chất liệu")
    fitting: str | None = Field(..., description="Dáng đồ")
    form_neckline: str | None = Field(..., description="Dáng cổ")
    form_sleeve: str | None = Field(..., description="Dáng tay")


class MultiSearchParams(BaseModel):
    """Tham số cho Parallel Multi-Search."""
    model_config = {"extra": "forbid"}

    searches: list[SearchItem] = Field(..., description="Danh sách các truy vấn tìm kiếm chạy song song")


def _parse_query_metadata(query_str: str) -> dict:
    """Parse structured query string explicitly generated by the Agent."""
    import re
    metadata = {}
    if not query_str:
        return metadata

    # Matches "key: value" at the start of lines (handling indentation)
    pattern = re.compile(r"^\s*([a-z_]+):\s*(.+?)\s*$", re.MULTILINE)
    matches = pattern.findall(query_str)
    
    for key, value in matches:
        if value and value.lower() != 'none':
            metadata[key] = value.strip()
            
    return metadata


from agent.prompt_utils import read_tool_prompt

@tool(args_schema=MultiSearchParams)
# @traceable(run_type="tool", name="data_retrieval_tool")
async def data_retrieval_tool(searches: list[SearchItem]) -> str:
    """
    (Placeholder) Tìm kiếm sản phẩm thời trang.
    """
    logger.info("data_retrieval_tool started, searches=%s", len(searches))
    try:
        # Pre-process: Parse metadata from query string
        for item in searches:
            if item.query:
                meta = _parse_query_metadata(item.query)
                for k, v in meta.items():
                    if hasattr(item, k):
                        setattr(item, k, v)
        
        # 0. Log input tổng quan (không log chi tiết dài)
        for idx, item in enumerate(searches):
            short_query = (item.query[:60] + "...") if item.query and len(item.query) > 60 else item.query
            logger.debug(
                "search[%s] query=%r, code=%r, price_min=%r, price_max=%r, gender=%r, age=%r",
                idx,
                short_query,
                item.magento_ref_code,
                item.price_min,
                item.price_max,
                item.gender_by_product,
                item.age_by_product
            )


        queries_to_embed = [s.query for s in searches if s.query]
        all_vectors = []
        if queries_to_embed:
            logger.info("batch embedding %s queries", len(queries_to_embed))
            emb_batch_start = time.time()
            all_vectors = await create_embeddings_async(queries_to_embed)
            logger.info(
                "batch embedding done in %.2f ms",
                (time.time() - emb_batch_start) * 1000,
            )

        # 2. Get DB connection (singleton)
        db = get_db_connection()

        tasks = []
        vector_idx = 0
        for item in searches:
            current_vector = None
            if item.query:
                if vector_idx < len(all_vectors):
                    current_vector = all_vectors[vector_idx]
                    vector_idx += 1

            tasks.append(_execute_single_search(db, item, query_vector=current_vector))

        results = await asyncio.gather(*tasks)

        # 3. Tổng hợp kết quả
        combined_results = []
        for i, products in enumerate(results):
            combined_results.append(
                {
                    "search_index": i,
                    "search_criteria": searches[i].dict(exclude_none=True),
                    "count": len(products),
                    "products": products,
                }
            )

        logger.info("data_retrieval_tool finished, results=%s", len(combined_results))
        return json.dumps(
            {"status": "success", "results": combined_results},
            ensure_ascii=False,
            cls=DecimalEncoder,
        )

    except Exception as e:
        logger.exception("Error in Multi-Search data_retrieval_tool: %s", e)
        return json.dumps({"status": "error", "message": str(e)})


async def _execute_single_search(db, item: SearchItem, query_vector: list[float] | None = None) -> list[dict]:
    """Thực thi một search query đơn lẻ (Async)."""
    try:
        short_query = (item.query[:60] + "...") if item.query and len(item.query) > 60 else item.query
        logger.debug(
            "_execute_single_search started, query=%r, code=%r",
            short_query,
            item.magento_ref_code,
        )

        # Timer: build query (sử dụng vector đã có hoặc build mới)
        query_build_start = time.time()
        sql, params = await build_starrocks_query(item, query_vector=query_vector)
        query_build_time = (time.time() - query_build_start) * 1000  # Convert to ms
        logger.debug("SQL built, length=%s, build_time_ms=%.2f", len(sql), query_build_time)

        if not sql:
            return []

        # Timer: execute DB query
        db_start = time.time()
        products = await db.execute_query_async(sql, params=params)
        db_time = (time.time() - db_start) * 1000  # Convert to ms
        logger.info(
            "_execute_single_search done, products=%s, build_ms=%.2f, db_ms=%.2f, total_ms=%.2f",
            len(products),
            query_build_time,
            db_time,
            query_build_time + db_time,
        )
        
        # Debug: Log first product to see fields
        if products:
            first_p = products[0]
            logger.info("🔍 [DEBUG] First product keys: %s", list(first_p.keys()))
            logger.info("🔍 [DEBUG] First product price: %s, sale_price: %s", 
                       first_p.get("original_price"), first_p.get("sale_price"))

        return _format_product_results(products)
    except Exception as e:
        logger.exception("Single search error for item %r: %s", item, e)
        return []


def _format_product_results(products: list[dict]) -> list[dict]:
    """Lọc và format kết quả trả về cho Agent - Parse description_text_full thành structured fields."""
    max_items = 15
    formatted: list[dict] = []

    for p in products[:max_items]:
        desc_full = p.get("description_text_full", "")
        
        # Parse các field từ description_text_full
        parsed = _parse_description_text(desc_full)
        
        formatted.append(
            {
                "sku": p.get("internal_ref_code"),
                "name": parsed.get("product_name", ""),
                "price": p.get("original_price") or 0,
                "sale_price": p.get("sale_price") or 0,
                "description": p.get("description_text_full", ""),
                "url": parsed.get("product_web_url", ""),
                "thumbnail_image_url": parsed.get("product_image_url_thumbnail", ""),
                "discount_amount": p.get("discount_amount") or 0,
                "max_score": p.get("max_score") or 0,
            }
        )

    return formatted


def _parse_description_text(desc: str) -> dict:
    """
    Parse description_text_full thành dict các field.
    Format: "product_name: X. master_color: Y. product_web_url: https://canifa.com/... ..."
    """
    import re
    result = {}
    if not desc:
        return result
    
    # Extract product_name: từ đầu đến ". master_color:" hoặc ". product_image_url:"
    name_match = re.search(r"product_name:\s*(.+?)\.(?:\s+master_color:|$)", desc)
    if name_match:
        result["product_name"] = name_match.group(1).strip()
    
    # Extract product_image_url_thumbnail: từ field name đến ". product_web_url:"
    thumb_match = re.search(r"product_image_url_thumbnail:\s*(https?://[^\s]+?)\.(?:\s+product_web_url:|$)", desc)
    if thumb_match:
        result["product_image_url_thumbnail"] = thumb_match.group(1).strip()
    
    # Extract product_web_url: từ field name đến ". description_text:"
    url_match = re.search(r"product_web_url:\s*(https?://[^\s]+?)\.(?:\s+description_text:|$)", desc)
    if url_match:
        result["product_web_url"] = url_match.group(1).strip()
    
    # Extract master_color: từ field name đến ". product_image_url:"
    color_match = re.search(r"master_color:\s*(.+?)\.(?:\s+product_image_url:|$)", desc)
    if color_match:
        result["master_color"] = color_match.group(1).strip()
    
    return result

# Load dynamic docstring
data_retrieval_tool.__doc__ = read_tool_prompt("data_retrieval_tool") or data_retrieval_tool.__doc__
