"""
Performance Test API Server
Server giả lập để test tải với Locust - KHÔNG gọi OpenAI, KHÔNG gọi Postgres.
"""

import asyncio
import logging
import os
import sys
import time
from pathlib import Path

import uvicorn
from fastapi import FastAPI, HTTPException
from pydantic import BaseModel

# Setup path to import backend modules
sys.path.insert(0, str(Path(__file__).parent.parent))

# Setup Logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
    handlers=[logging.FileHandler("perf_error.log", encoding="utf-8"), logging.StreamHandler(sys.stdout)],
)
logger = logging.getLogger(__name__)

# ============================================================
# 0. FORCE PROACTOR EVENT LOOP (CRITICAL FOR WINDOWS HIGH CONCURRENCY)
# ============================================================
if os.name == "nt":
    try:
        asyncio.set_event_loop_policy(asyncio.WindowsProactorEventLoopPolicy())
        logger.info("⚡ Windows ProactorEventLoopPolicy set (Supports high FD count)")
    except Exception as e:
        logger.warning(f"Could not set ProactorEventLoopPolicy: {e}")

# ============================================================
# 1. MOCK LLM - Trả lời siêu tốc, không gọi OpenAI
# ============================================================
from typing import Any

from langchain_core.language_models import BaseChatModel
from langchain_core.messages import AIMessage
from langchain_core.outputs import ChatGeneration, ChatResult


class MockHighSpeedLLM(BaseChatModel):
    """
    Mock LLM siêu tốc cho Performance Testing.
    Không gọi OpenAI - trả về response ngay lập tức.
    """

    def _generate(
        self, messages: list[Any], stop: list[str] | None = None, run_manager: Any | None = None, **kwargs: Any
    ) -> ChatResult:
        return ChatResult(
            generations=[
                ChatGeneration(message=AIMessage(content="[MOCK] Xin chào! Đây là bot test, không phải OpenAI thật."))
            ]
        )

    @property
    def _llm_type(self) -> str:
        return "mock-high-speed"

    def bind_tools(self, tools: Any, **kwargs: Any) -> Any:
        """Bypass tool binding - trả về self."""
        return self


# ============================================================
# 2. PATCH create_llm TRƯỚC KHI IMPORT GRAPH
# ============================================================
def mock_create_llm(*args, **kwargs):
    """Factory function giả - trả về MockLLM."""
    logger.info("🎭 MockLLM được gọi thay vì OpenAI!")
    return MockHighSpeedLLM()


# Patch TRƯỚC khi import agent.graph
import common.llm_factory

common.llm_factory.create_llm = mock_create_llm
logger.info("🎭 PATCHED common.llm_factory.create_llm")


# ============================================================
# 3. PATCH Langfuse - Tắt để tránh Rate Limit
# ============================================================
def mock_get_callback_handler():
    """Trả về None - không gửi trace."""
    return


import common.langfuse_client

common.langfuse_client.get_callback_handler = mock_get_callback_handler
logger.info("🔇 PATCHED common.langfuse_client.get_callback_handler")

# ============================================================
# 4. MOCK EMBEDDING - Để test DB mà không bị OpenAI Rate Limit
# ============================================================
import common.embedding_service


async def mock_create_embedding_async(text: str) -> list[float]:
    """Trả về vector giả lập (1536 dim) để test StarRocks."""
    return [0.01] * 1536


common.embedding_service.create_embedding_async = mock_create_embedding_async
logger.info("🧠 PATCHED common.embedding_service.create_embedding_async (Using Mock Vector)")


# ============================================================
# 5. GIỜ MỚI IMPORT GRAPH (Sau khi patch xong)
# ============================================================
from langchain_core.messages import HumanMessage

from agent.graph import build_graph
from agent.models import get_config

# ============================================================
# 6. IMPORT CHO DB TEST
# ============================================================
from common.starrocks_connection import StarRocksConnection

# ============================================================
# FASTAPI APP
# ============================================================
app = FastAPI(title="Performance Test API")


# Request Models
class SearchRequest(BaseModel):
    # Locust gửi 'message' hoặc 'query'
    query: str = ""
    message: str = ""
    limit: int = 10
    user_id: str = "perf_user"
    thread_id: str = "perf_thread"

    @property
    def final_query(self) -> str:
        return self.message or self.query


class MockParams(BaseModel):
    query: str = ""  # Phải là query, không phải query_text để build_starrocks_query nhận diện
    limit: int = 10
    sku: str = None
    gender_by_product: str = None  # Khớp với metadata fields
    season: str = None
    master_color: str = None
    product_line_vn: str = None
    price_min: float = None
    price_max: float = None


# Global variables
mock_graph = None


@app.on_event("startup")
async def startup_event():
    global mock_graph

    logger.info("🚀 Performance Test API Server Starting...")

    # 1. Pre-warm DB connection
    conn = StarRocksConnection()
    conn.connect()
    logger.info("✅ StarRocks Connection initialized")

    # 2. Build Mock Graph (LLM đã bị patch từ đầu file)
    config = get_config()
    mock_graph = build_graph(config)
    logger.info("✅ Mock Graph built successfully (No OpenAI, No Postgres, No Langfuse)")


# ============================================================
# ENDPOINTS
# ============================================================


@app.post("/api/agent/chat")
async def api_agent_chat(request: SearchRequest):
    """
    Test toàn bộ Flow Graph với MockLLM.
    Dùng chung endpoint với hàng thật để Locust test đúng flow.
    """
    if not mock_graph:
        raise HTTPException(500, "Mock Graph not initialized")

    start_time = time.time()
    try:
        # Lấy query thực tế (từ message hoặc query)
        user_query = request.final_query
        if not user_query:
            return {"status": "error", "message": "Missing query/message"}

        # Tạo thread_id
        thread_id = request.thread_id or f"perf_test_{int(time.time() * 1000)}"

        input_state = {
            "messages": [HumanMessage(content=user_query)],
            "user_id": request.user_id,
        }
        config_runnable = {"configurable": {"thread_id": thread_id}}

        # Chạy Graph
        async for _event in mock_graph.astream(input_state, config=config_runnable):
            pass

        process_time = time.time() - start_time
        return {
            "status": "success",
            "process_time_seconds": round(process_time, 4),
            "response": "[MOCK RESULT] Đây là câu trả lời giả lập từ Graph Flow.",
        }
    except Exception as e:
        logger.error(f"Graph Error: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/api/recommend/image")
async def api_recommend_image(body: dict = None):
    """Mock visual search."""
    return {"status": "success", "products": []}




@app.post("/test/db-search")
async def test_db_search(request: SearchRequest):
    """
    Test StarRocks Vector Search - Query trực tiếp, chỉ lấy internal_ref_code.
    """
    start_time = time.time()

    try:
        # Tạo embedding vector từ query
        embedding = await mock_create_embedding_async(request.final_query)
        v_str = str(embedding)

        # SQL query trực tiếp
        sql = f"""
        SELECT /*+ SET_VAR(ann_params='{{"ef_search":64}}') */
            internal_ref_code,
            approx_cosine_similarity(vector, {v_str}) as score
        FROM shared_source.magento_product_dimension_with_text_embedding__tmp
        ORDER BY score DESC
        LIMIT 50
        """

        # Execute query
        db = StarRocksConnection()
        results = await db.execute_query_async(sql)

        # Trích xuất internal_ref_code
        codes = [item.get("internal_ref_code") for item in results if item.get("internal_ref_code")]

        process_time = time.time() - start_time
        return {
            "status": "success",
            "count": len(codes),
            "process_time_seconds": round(process_time, 4),
            "codes": codes,
        }

    except Exception as e:
        logger.error(f"DB Vector Search Error: {e}")
        raise HTTPException(status_code=500, detail=str(e))


@app.post("/test/db-ping")
async def test_db_ping():
    """
    Test kết nối DB thuần túy (SELECT 1).
    """
    start_time = time.time()
    try:
        db = StarRocksConnection()
        await db.execute_query_async("SELECT 1")

        process_time = time.time() - start_time
        return {"status": "success", "process_time_seconds": round(process_time, 4)}
    except Exception as e:
        raise HTTPException(status_code=500, detail=str(e))


# Endpoint cũ để tương thích (Optional)
@app.post("/test/graph-mock-chat")
async def test_graph_mock_chat(request: SearchRequest):
    return await api_agent_chat(request)


@app.get("/")
async def root():
    return {"message": "Performance Test API is running!", "mode": "MOCK (No OpenAI)"}


# ============================================================
# MAIN
# ============================================================
if __name__ == "__main__":
    print("=" * 60)
    print("🚀 PERFORMANCE TEST SERVER")
    print("=" * 60)
    print("🎭 LLM: MockHighSpeedLLM (No OpenAI)")
    print("🧠 Checkpointer: MemorySaver (No Postgres)")
    print("🔇 Langfuse: Disabled")
    print("⚡ Workers: 4")
    print("⚙️  Loop: ProactorEventLoop (Windows Optimized)")
    print("=" * 60)

    # Note: On Windows, to use multiple workers, we must pass the app as an import string.
    # reload MUST be False when using workers > 1.
    uvicorn.run("hehe.api_server_perf:app", host="0.0.0.0", port=8000, workers=4, reload=False)
