refactor: all codebase

288fcc98 · Vũ Hoàng Anh · c9a437ef · 288fcc98 · 288fcc98 · c9a437ef
Commit 288fcc98 authored Dec 25, 2025 by Vũ Hoàng Anh
19 changed files
--- a/backend/Makefile
+++ b/backend/Makefile
+# Makefile cho CANIFA Chatbot
+.PHONY: up down restart logs build ps clean setup-nginx monitor-up monitor-down
+up:
+	docker-compose up -d --build
+down:
+	docker-compose down
+restart:
+	docker-compose restart backend
+logs:
+	docker-compose logs -f backend
+ps:
+	docker-compose ps
+build:
+	docker-compose build
+clean:
+	docker-compose down -v --rmi all --remove-orphans
+setup-nginx:
+	@echo "🚀 Đang cấu hình Nginx..."
+	sudo cp nginx.conf /etc/nginx/sites-available/chatbot
+	sudo ln -sf /etc/nginx/sites-available/chatbot /etc/nginx/sites-enabled/
+	sudo nginx -t && sudo systemctl restart nginx
+	@echo "✅ Nginx đã được cấu hình và restart!"
--- a/backend/agent/controller.py
+++ b/backend/agent/controller.py
 """
 Fashion Q&A Agent Controller
-Điều phối luồng chạy của Agent, tích hợp ConversationManager (Postgres Memory).
 Switched to LangSmith for tracing (configured via environment variables).
 """
 import json
 import logging
 import uuid
-from collections.abc import AsyncGenerator
-from langchain_core.messages import AIMessage, HumanMessage
+from fastapi import BackgroundTasks
+from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
 from langchain_core.runnables import RunnableConfig
+from common.conversation_manager import ConversationManager, get_conversation_manager
 from common.llm_factory import create_llm
-from common.conversation_manager import get_conversation_manager, ConversationManager
 from config import DEFAULT_MODEL
 from .graph import build_graph
@@ -22,96 +21,117 @@ from .tools.get_tools import get_all_tools
 logger = logging.getLogger(__name__)
 async def chat_controller(
-    query: str, user_id: str, model_name: str = DEFAULT_MODEL, conversation_id: str | None = None, images: list[str] | None = None
+    query: str,
-) -> AsyncGenerator[str, None]:
+    user_id: str,
-    # 1. Khởi tạo & Chuẩn bị (Dependency Injection)
+    background_tasks: BackgroundTasks,
+    model_name: str = DEFAULT_MODEL,
+    images: list[str] | None = None,
+) -> dict:
+    """
+    Controller main logic for non-streaming chat requests.
+    1. Initialize resources (LLM, tools, graph, conversation manager)
+    """
    logger.info(f"▶️ Starting chat_controller with model: {model_name} for user: {user_id}")
    config = get_config()
    config.model_name = model_name
-    # Khởi tạo resources - Factory sẽ tự động chọn provider dựa trên tên model
+    # Enable JSON mode to ensure structured output
-    llm = create_llm(model_name=model_name, streaming=True)
+    llm = create_llm(model_name=model_name, streaming=False, json_mode=True)
    tools = get_all_tools()
    graph = build_graph(config, llm=llm, tools=tools)
    # Init ConversationManager (Singleton)
-    memory = get_conversation_manager()
+    memory = await get_conversation_manager()
-    actual_conv_id = conversation_id or str(uuid.uuid4())
    # LOAD HISTORY & Prepare State
-    # Get history from Postgres (returns list of dicts)
+    history_dicts = await memory.get_chat_history(user_id, limit=20)
-    history_dicts = memory.get_chat_history(user_id, limit=10)
    # Convert to BaseMessage objects
    history = []
-    for h in reversed(history_dicts): # API returns desc, we want chronological for context
+    for h in reversed(history_dicts): 
-        if h['is_human']:
+        if h["is_human"]:
-            history.append(HumanMessage(content=h['message']))
+            history.append(HumanMessage(content=h["message"]))
        else:
-            history.append(AIMessage(content=h['message']))
+            history.append(AIMessage(content=h["message"]))
-    current_human_msg = HumanMessage(content=query)
    initial_state, exec_config = _prepare_execution_context(
-        query=query, user_id=user_id, actual_conv_id=actual_conv_id, history=history, images=images
+        query=query, user_id=user_id, history=history, images=images
    )
-    final_ai_message = None
-    # 3. Stream Engine
    try:
-        async for event in graph.astream(initial_state, config=exec_config, stream_mode="values"):
+        result = await graph.ainvoke(initial_state, config=exec_config)
-            final_ai_message = _extract_last_ai_message(event) or final_ai_message
+        # logger.info(f"Answer result from ai: {result}")
-            # Serialize messages to dicts to avoid "content='...'" string representation
+        # take ai message from result
-            if "messages" in event:
+        final_ai_message = result.get("ai_response")
-                event["messages"] = [
-                    m.dict() if hasattr(m, "dict") else m 
-                    for m in event["messages"]
-                ]
-            yield f"data: {json.dumps(event, default=str, ensure_ascii=False)}\n\n"
+        # Extract product IDs from tool messages
+        product_ids = _extract_product_ids(result.get("messages", []))
-        # 4. Hậu xử lý (Lưu DB)
+        # Save to DB in background after response is sent
-        _handle_post_chat(
+        background_tasks.add_task(
+            _handle_post_chat_async,
            memory=memory,
            user_id=user_id,
            human_query=query,
            ai_msg=final_ai_message,
        )
-        yield "data: [DONE]\n\n"
+        logger.info(f"✅ Request completed for user {user_id} with {len(product_ids)} products")
+        return {
+            "ai_response": final_ai_message.content if final_ai_message else "",
+            "product_ids": product_ids,
+        }
    except Exception as e:
-        logger.error(f"💥 Stream error: {e}", exc_info=True)
+        logger.error(f"💥 Chat error: {e}", exc_info=True)
-        yield f"data: {json.dumps({'error': str(e)})}\n\n"
+        raise
-    finally:
-        logger.info(f"✅ Request completed for conversation {actual_conv_id}")
+def _extract_product_ids(messages: list) -> list[str]:
+    """
+    Extract product internal_ref_code from tool messages (data_retrieval_tool results).
+    Returns list of unique product IDs.
+    """
+    product_ids = []
+    for msg in messages:
+        if isinstance(msg, ToolMessage):
+            try:
+                # Tool result is JSON string
+                tool_result = json.loads(msg.content)
-def _prepare_execution_context(query: str, user_id: str, actual_conv_id: str, history: list, images: list | None):
+                # Check if tool returned products
-    """Tách logic chuẩn bị state và config để giảm độ phức tạp."""
+                if tool_result.get("status") == "success" and "products" in tool_result:
+                    for product in tool_result["products"]:
+                        product_id = product.get("internal_ref_code")
+                        if product_id and product_id not in product_ids:
+                            product_ids.append(product_id)
+            except (json.JSONDecodeError, KeyError, TypeError) as e:
+                logger.debug(f"Could not parse tool message for product IDs: {e}")
+                continue
+    return product_ids
+def _prepare_execution_context(query: str, user_id: str, history: list, images: list | None):
+    """Prepare initial state and execution config for the graph run."""
    initial_state: AgentState = {
+        "user_query": HumanMessage(content=query),
        "messages": [HumanMessage(content=query)],
        "history": history,
        "user_id": user_id,
-        "images": [],
+        "images_embedding": [],
-        "thread_id": actual_conv_id,
+        "ai_response": None,
-        "image_analysis": None,
    }
    run_id = str(uuid.uuid4())
    # Metadata for LangSmith
-    metadata = {
+    metadata = {"user_id": user_id, "run_id": run_id}
-        "conversation_id": actual_conv_id,
-        "user_id": user_id,
-        "run_id": run_id
-    }
    exec_config = RunnableConfig(
        configurable={
-            "conversation_id": actual_conv_id,
            "user_id": user_id,
            "transient_images": images or [],
            "run_id": run_id,
@@ -122,21 +142,13 @@ def _prepare_execution_context(query: str, user_id: str, actual_conv_id: str, hi
    return initial_state, exec_config
-def _extract_last_ai_message(event: dict) -> AIMessage | None:
+async def _handle_post_chat_async(
-    """Trích xuất tin nhắn AI cuối cùng từ event stream."""
+    memory: ConversationManager, user_id: str, human_query: str, ai_msg: AIMessage | None
-    if event.get("messages"):
+):
-        last_msg = event["messages"][-1]
+    """Save chat history in background task after response is sent."""
-        if isinstance(last_msg, AIMessage):
-            return last_msg
-    return None
-def _handle_post_chat(memory: ConversationManager, user_id: str, human_query: str, ai_msg: AIMessage | None):
-    """Xử lý lưu history sau khi kết thúc stream. LangSmith tự động trace nên không cần update thủ công."""
    if ai_msg:
-        # Save User Message
+        try:
-        memory.save_message(user_id, human_query, True)
+            await memory.save_conversation_turn(user_id, human_query, ai_msg.content)
-        # Save AI Message
+            logger.debug(f"Saved conversation for user {user_id}")
-        memory.save_message(user_id, ai_msg.content, False)
+        except Exception as e:
+            logger.error(f"Failed to save conversation for user {user_id}: {e}", exc_info=True)
-        logger.info(f"💾 Saved conversation for user {user_id} to Postgres")
--- a/backend/agent/controller_save.py
+++ b/backend/agent/controller_save.py
-"""
-Fashion Q&A Agent Controller
-Điều phối luồng chạy của Agent, tích hợp ConversationManager (Postgres Memory).
-Using Langfuse @observe() decorator for automatic trace creation.
-"""
-import json
-import logging
-import uuid
-from collections.abc import AsyncGenerator
-from langchain_core.messages import AIMessage, HumanMessage
-from langchain_core.runnables import RunnableConfig
-from langfuse import get_client, observe, propagate_attributes
-from langfuse.langchain import CallbackHandler
-from common.llm_factory import create_llm
-from common.conversation_manager import get_conversation_manager, ConversationManager
-from .graph import build_graph
-from .models import AgentState, get_config
-from .tools.get_tools import get_all_tools
-logger = logging.getLogger(__name__)
-@observe(capture_input=False, capture_output=False)
-async def chat_controller(
-    query: str, user_id: str, model_name: str, conversation_id: str | None = None, images: list[str] | None = None
-) -> AsyncGenerator[str, None]:
-    # 1. Khởi tạo & Chuẩn bị (Dependency Injection)
-    logger.info(f"▶️ Starting chat_controller for user: {user_id}")
-    config = get_config()
-    config.model_name = model_name
-    # Khởi tạo resources bên ngoài để dễ test/mock
-    llm = create_llm(model_name=model_name, api_key=config.openai_api_key, streaming=True)
-    tools = get_all_tools()
-    graph = build_graph(config, llm=llm, tools=tools)
-    # Init ConversationManager (Singleton)
-    memory = get_conversation_manager()
-    actual_conv_id = conversation_id or str(uuid.uuid4())
-    # 2. Chạy logic chính trong trace context
-    with propagate_attributes(session_id=actual_conv_id, user_id=user_id, tags=["canifa", "chatbot"]):
-        # LOAD HISTORY & Prepare State
-        # Get history from Postgres (returns list of dicts)
-        history_dicts = memory.get_chat_history(user_id, limit=10)
-        # Convert to BaseMessage objects
-        history = []
-        for h in reversed(history_dicts): # API returns desc, we want chronological for context
-            if h['is_human']:
-                history.append(HumanMessage(content=h['message']))
-            else:
-                history.append(AIMessage(content=h['message']))
-        current_human_msg = HumanMessage(content=query)
-        initial_state, exec_config = _prepare_execution_context(
-            query=query, user_id=user_id, actual_conv_id=actual_conv_id, history=history, images=images
-        )
-        final_ai_message = None
-        # 3. Stream Engine
-        try:
-            async for event in graph.astream(initial_state, config=exec_config, stream_mode="values"):
-                final_ai_message = _extract_last_ai_message(event) or final_ai_message
-                # Serialize messages to dicts to avoid "content='...'" string representation
-                if "messages" in event:
-                    event["messages"] = [
-                        m.dict() if hasattr(m, "dict") else m 
-                        for m in event["messages"]
-                    ]
-                yield f"data: {json.dumps(event, default=str, ensure_ascii=False)}\n\n"
-            # 4. Hậu xử lý (Lưu DB & Trace)
-            _handle_post_chat(
-                memory=memory,
-                user_id=user_id,
-                human_query=query,
-                ai_msg=final_ai_message,
-                query=query,
-                images_count=len(images) if images else 0,
-            )
-            yield "data: [DONE]\n\n"
-        except Exception as e:
-            logger.error(f"💥 Stream error: {e}", exc_info=True)
-            yield f"data: {json.dumps({'error': str(e)})}\n\n"
-        finally:
-            logger.info(f"✅ Request completed for conversation {actual_conv_id}")
-def _prepare_execution_context(query: str, user_id: str, actual_conv_id: str, history: list, images: list | None):
-    """Tách logic chuẩn bị state và config để giảm độ phức tạp."""
-    initial_state: AgentState = {
-        "messages": [HumanMessage(content=query)],
-        "history": history,
-        "user_id": user_id,
-        "images": [],
-        "thread_id": actual_conv_id,
-        "image_analysis": None,
-    }
-    run_id = str(uuid.uuid4())
-    exec_config = RunnableConfig(
-        configurable={
-            "conversation_id": actual_conv_id,
-            "user_id": user_id,
-            "transient_images": images or [],
-            "run_id": run_id,
-        },
-        run_id=run_id,
-        callbacks=[CallbackHandler()],
-    )
-    return initial_state, exec_config
-def _extract_last_ai_message(event: dict) -> AIMessage | None:
-    """Trích xuất tin nhắn AI cuối cùng từ event stream."""
-    if event.get("messages"):
-        last_msg = event["messages"][-1]
-        if isinstance(last_msg, AIMessage):
-            return last_msg
-    return None
-def _handle_post_chat(memory: ConversationManager, user_id: str, human_query: str, ai_msg: AIMessage | None, query: str, images_count: int):
-    """Xử lý lưu history và update trace sau khi kết thúc stream."""
-    if ai_msg:
-        # Save User Message
-        memory.save_message(user_id, human_query, True)
-        # Save AI Message
-        memory.save_message(user_id, ai_msg.content, False)
-        logger.info(f"💾 Saved conversation for user {user_id} to Postgres")
-    # Update trace
-    try:
-        langfuse = get_client()
-        langfuse.update_current_trace(
-            name="canifa-chatbot-query",
-            input={"query": query, "images_count": images_count},
-            output={"response": ai_msg.content if ai_msg else None},
-        )
-    except Exception as e:
-        logger.warning(f"Failed to update trace: {e}")
--- a/backend/agent/graph.py
+++ b/backend/agent/graph.py
@@ -9,7 +9,6 @@ import logging
 from typing import Any
 from langchain_core.language_models import BaseChatModel
-from langchain_core.messages import HumanMessage
 from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 from langchain_core.runnables import RunnableConfig
 from langgraph.cache.memory import InMemoryCache
@@ -29,7 +28,6 @@ logger = logging.getLogger(__name__)
 class CANIFAGraph:
    """
    Fashion Q&A Agent Graph Manager.
-    Khởi tạo tất cả resources trong __init__, các node dùng self.xxx.
    """
    def __init__(
@@ -40,55 +38,43 @@ class CANIFAGraph:
    ):
        self.config = config or get_config()
        self._compiled_graph: Any | None = None
-        # Dependency Injection: Ưu tiên dùng llm/tools được truyền vào
        self.llm: BaseChatModel = llm or create_llm(
            model_name=self.config.model_name, api_key=self.config.openai_api_key, streaming=True
        )
-        # Phân loại tools
        self.all_tools = tools or get_all_tools()
        self.collection_tools = get_collection_tools()  # Vẫn lấy list name để routing
-        # Retrieval tools are logically all tools minus collection tools (conceptually, or specific list)
-        # For simplicity and robust tool usage, we can bind all tools to retrieval node if needed, 
-        # or separate them. The user code snippet uses `self.retrieval_tools` but passed `all_tools`.
-        # Reviewing user snippet: `workflow.add_node("retrieve_tools", ToolNode(self.retrieval_tools)...`
-        # But `retrieval_tools` wasn't defined in __init__ in the user snippet, likely implied.
-        # I'll define retrieval_tools as all tools for now or filter if strictly needed.
-        # Assuming all_tools are retrieval compatible except collection ones? 
-        # Let's use all_tools for the ToolNode to be safe unless distinct behavior is needed.
        self.retrieval_tools = self.all_tools
        self.llm_with_tools = self.llm.bind_tools(self.all_tools)
        self.system_prompt = get_system_prompt()
+        self.prompt_template = ChatPromptTemplate.from_messages(
+            [
+                ("system", self.system_prompt),
+                MessagesPlaceholder(variable_name="history"),
+                MessagesPlaceholder(variable_name="user_query"),
+                MessagesPlaceholder(variable_name="messages"),
+            ]
+        )
+        self.chain = self.prompt_template | self.llm_with_tools
        self.cache = InMemoryCache()
    async def _agent_node(self, state: AgentState, config: RunnableConfig) -> dict:
-        """Agent node - LLM reasoning với tools và history sạch."""
+        """Agent node - Chỉ việc đổ dữ liệu riêng vào khuôn đã có sẵn."""
        messages = state.get("messages", [])
        history = state.get("history", [])
+        user_query = state.get("user_query")
-        prompt = ChatPromptTemplate.from_messages(
-            [
-                ("system", self.system_prompt),
-                MessagesPlaceholder(variable_name="history"),  # Long-term clean history
-                MessagesPlaceholder(variable_name="messages"),  # Current turn technical messages
-            ]
-        )
-        # 2. Xử lý Image hint (Lấy từ Config của lượt chạy này)
        transient_images = config.get("configurable", {}).get("transient_images", [])
        if transient_images and messages:
-            # Removed image processing logic as requested
            pass
+        # Invoke chain with user_query, history, and messages
+        response = await self.chain.ainvoke({
+            "user_query": [user_query] if user_query else [],
+            "history": history,
+            "messages": messages
+        })
+        return {"messages": [response], "ai_response": response}
-        # Invoke LLM
-        chain = prompt | self.llm_with_tools
-        response = await chain.ainvoke({"messages": messages, "history": history})
-        return {"messages": [response]}
    def _should_continue(self, state: AgentState) -> str:
        """Routing: tool nodes hoặc end."""
@@ -109,12 +95,9 @@ class CANIFAGraph:
        return "retrieve_tools"
    def build(self) -> Any:
-        """Build và compile LangGraph workflow (Không dùng Checkpointer)."""
+        """Build và compile LangGraph workflow."""
        if self._compiled_graph is not None:
            return self._compiled_graph
-        logger.info("🔨 Building LangGraph workflow (No Checkpointer)...")
        workflow = StateGraph(AgentState)
        # Nodes
@@ -132,11 +115,7 @@ class CANIFAGraph:
        workflow.add_edge("retrieve_tools", "agent")
        workflow.add_edge("collect_tools", "agent")
-        # Compile WITHOUT checkpointer
+        self._compiled_graph = workflow.compile(cache=self.cache)  # No Checkpointer
-        self._compiled_graph = workflow.compile(cache=self.cache)
-        # ❌ KHÔNG ATTACH Langfuse callback vào compiled graph
-        # ✅ Sẽ pass callback vào runtime config của mỗi lượt chạy
        logger.info("✅ Graph compiled (Langfuse callback will be per-run)")
        return self._compiled_graph
@@ -149,7 +128,6 @@ class CANIFAGraph:
 # --- Singleton & Public API ---
 _instance: list[CANIFAGraph | None] = [None]
 def build_graph(config: AgentConfig | None = None, llm: BaseChatModel | None = None, tools: list | None = None) -> Any:
    """Get compiled graph (singleton)."""
    if _instance[0] is None:

--- a/backend/agent/memory.py
+++ b/backend/agent/memory.py
-# """
-# Simple Memory Manager for Fashion Q&A Agent
-# Tối giản hóa: Chỉ sử dụng 1 Collection 'conversations' trong MongoDB
-# """
-# logger = logging.getLogger(__name__)
-class SimpleMemoryManager:
-    """
-    Quản lý bộ nhớ tối giản: Lưu/Load message từ 1 collection duy nhất.
-    Sử dụng conversation_id làm định danh chính.
-    TẠM THỜI COMMENT LOGIC ĐỂ TEST CHAY
-    """
-    def __init__(self):
-        # self.client = get_mongo_client()
-        # self.db = self.client[MONGODB_DB_NAME or "ai_law"]
-        # self.collection = self.db["conversations"] # Tên bảng tối giản
-        # self._indexes_created = False
-        pass
-    async def _ensure_indexes(self):
-        # if not self._indexes_created:
-        #     # Index theo ID cuộc hội thoại và thời gian cập nhật
-        #     await self.collection.create_index("_id")
-        #     await self.collection.create_index("updated_at")
-        #     self._indexes_created = True
-        pass
-    async def save_messages(
-        self,
-        conversation_id: str,
-        messages: list,  # List[BaseMessage],
-        user_id: str | None = None,  # Optional[str] = None
-    ):
-        """Lưu toàn bộ danh sách tin nhắn vào cuộc hội thoại."""
-        # try:
-        #     await self._ensure_indexes()
-        #     messages_dict = [self._message_to_dict(msg) for msg in messages]
-        #     await self.collection.update_one(
-        #         {"_id": conversation_id},
-        #         {
-        #                     "$set": {
-        #                         "user_id": user_id,
-        #                         "messages": messages_dict,
-        #                         "updated_at": datetime.utcnow(),
-        #                     },
-        #                     "$setOnInsert": {
-        #                         "created_at": datetime.utcnow(),
-        #                     }
-        #                 },
-        #         upsert=True
-        #     )
-        # except Exception as e:
-        #     # logger.error(f"❌ Save memory error: {e}")
-        #     raise
-        pass
-    async def load_messages(self, conversation_id: str, limit: int = 20) -> list:  # List[BaseMessage]:
-        """Load N tin nhắn gần nhất của cuộc hội thoại."""
-        # try:
-        #     await self._ensure_indexes()
-        #     doc = await self.collection.find_one({"_id": conversation_id})
-        #     if not doc or "messages" not in doc:
-        #         return []
-        #     msgs_dict = doc["messages"]
-        #     # Chỉ lấy số lượng tin nhắn giới hạn để tiết kiệm token
-        #     if limit:
-        #         msgs_dict = msgs_dict[-limit:]
-        #     return [self._dict_to_message(m) for m in msgs_dict]
-        # except Exception as e:
-        #     # logger.error(f"❌ Load memory error: {e}")
-        #     return []
-        return []
-    # def _message_to_dict(self, msg: BaseMessage) -> dict:
-    #     return {
-    #         "type": msg.__class__.__name__,
-    #         "content": msg.content,
-    #         "timestamp": datetime.utcnow().isoformat(),
-    #     }
-    # def _dict_to_message(self, msg_dict: dict) -> BaseMessage:
-    #     m_type = msg_dict.get("type", "HumanMessage")
-    #     content = msg_dict.get("content", "")
-    #     if m_type == "AIMessage": return AIMessage(content=content)
-    #     if m_type == "SystemMessage": return SystemMessage(content=content)
-    #     return HumanMessage(content=content)
-# Singleton
-_memory_manager = None
-def get_memory_manager():
-    global _memory_manager
-    if _memory_manager is None:
-        _memory_manager = SimpleMemoryManager()
-    return _memory_manager
--- a/backend/agent/models.py
+++ b/backend/agent/models.py
@@ -10,24 +10,21 @@ import config as global_config
 class QueryRequest(BaseModel):
    """API Request model cho Fashion Q&A Chat"""
-    query: str
-    history: list[BaseMessage] | None = None
-    model_name: str = global_config.DEFAULT_MODEL
    user_id: str | None = None
+    user_query: str
    images: list[str] | None = None
    image_analysis: dict[str, Any] | None = None
-    conversation_id: str | None = None
 class AgentState(TypedDict):
    """Trạng thái của Agent trong LangGraph."""
-    messages: Annotated[list[BaseMessage], add_messages]
+    user_query: BaseMessage
-    history: list[BaseMessage]  # Conversation history sạch (Human + AI)
+    history: list[BaseMessage]
    user_id: str | None
-    images: list[str] | None
+    ai_response: BaseMessage | None
-    image_analysis: dict[str, Any] | None
+    images_embedding: list[str] | None
-    thread_id: str | None
+    messages: Annotated[list[BaseMessage], add_messages]
 class AgentConfig:

--- a/backend/agent/prompt.py
+++ b/backend/agent/prompt.py
 """
-Fashion Q&A Agent System Prompts - CiCi Fashion Consultant
+CiCi Fashion Consultant - System Prompt
-Professional and engaging style guide for CANIFA product recommendations
+Tư vấn thời trang CANIFA chuyên nghiệp
 """
 from datetime import datetime
@@ -8,252 +8,376 @@ from datetime import datetime
 def get_system_prompt() -> str:
    """
-    Get system prompt for Fashion Q&A Agent
+    Lấy system prompt cho CiCi Fashion Agent
    Returns:
-        str: System prompt with current date and professional persona
+        str: System prompt với ngày hiện tại
    """
    now = datetime.now()
-    date_str = now.strftime("%B %d, %Y")
+    date_str = now.strftime("%d/%m/%Y")
-    return f"""You are **CiCi**, a professional fashion consultant for CANIFA. Today is {date_str}.
+    # Không dùng f-string để tránh conflict với LangChain template variables
+    prompt = """# VAI TRÒ (ROLE)
-## CORE BEHAVIOR
+         Bạn là **CiCi** - Chuyên viên tư vấn thời trang CANIFA.
-**Personality:**
+         - Nhiệt tình, thân thiện, chuyên nghiệp
- Enthusiastic, warm, and helpful
+         - Hiểu sâu về sản phẩm và xu hướng thời trang
- Expert in fashion recommendations
+         - Tư vấn chân thành, không bán hàng ép buộc
- Genuine and trustworthy (never oversell)
+         - Hôm nay: DATE_PLACEHOLDER
- Responsive and action-oriented
+         ---
-**Communication Style:**
- **ALWAYS respond in the customer's language** (Vietnamese or English)
+         # HƯỚNG DẪN (INSTRUCTION)
- If customer speaks Vietnamese → respond in Vietnamese with warm, persuasive tone ("thảo mai")
- If customer speaks English → respond in English
+         ## 1. NGÔN NGỮ
- Use appropriate emojis sparingly (😍, ✨, 💖, 👗)
+         - **Luôn trả lời bằng ngôn ngữ của khách hàng**
- Be concise and scannable (use bullet points, short paragraphs)
+         - Khách nói tiếng Việt → trả lời tiếng Việt (giọng thân thiện, tự nhiên)
- End responses with a clear next action or question
+         - Khách nói tiếng Anh → trả lời tiếng Anh
+         - Dùng emoji vừa phải: 😍, ✨, 💖, 👗
-## WHEN TO CALL TOOLS
+         ## 2. KHI NÀO GỌI TOOL `data_retrieval_tool`
-**USE data_retrieval_tool IMMEDIATELY when customer:**
- Asks to find/search products: "Find me...", "Show me...", "I'm looking for..."
+         ✅ **GỌI NGAY KHI:**
- Describes what they want to wear: "I need a white t-shirt for summer"
+         - Khách tìm sản phẩm: "Tìm áo...", "Có màu gì...", "Show me..."
- Mentions specific attributes: color, price, style, fit, season, age group, gender
+         - Khách nói rõ yêu cầu: màu sắc, giá, loại, phong cách, giới tính, độ tuổi
- Asks for alternatives: "What other options do you have?"
+         - Khách hỏi về sản phẩm cụ thể: "Sản phẩm 8TS24W001 có không?"
- Says they want something specific: "Do you have X in Y color?"
+         ❌ **KHÔNG GỌI KHI:**
-**EXAMPLES that TRIGGER tool call:**
+         - Câu hỏi ngoài phạm vi: "Tra đơn hàng", "Đổi trả"
-✅ "Show me blue dresses under 300k"
+         - Tư vấn chung: "Xu hướng thời trang mùa này"
-✅ "I need casual wear for men"
+         - Đã có kết quả: khách chỉ hỏi thêm về sản phẩm vừa show
-✅ "Áo đi chơi mùa hè màu xanh dưới 300k"
-✅ "Do you have this in red?"
+         ## 3. CÁCH DÙNG TOOL - QUAN TRỌNG
-✅ "What other colors available?"
+         ### A. Phân biệt `query` vs `keywords` + metadata
-**DO NOT call tools for:**
-❌ Out of scope: "How do I track my order?" → Redirect to customer service
+         **Dùng `query` (semantic search) KHI:**
-❌ Non-product: "Tell me fashion trends" → General advice, no tool needed
+         - Khách hỏi về **MỤC ĐÍCH, DỊP, HOÀN CẢNH** (không nói tên sản phẩm cụ thể)
-❌ Already answered: Customer just asking follow-up → Use previous results
+         - Ví dụ:
+         - "Đồ đi biển" → query="trang phục đi biển mát mẻ"
-**Tool docstring** already defines all parameters - extract values from customer request and map to tool parameters. Only pass non-null values.
+         - "Áo hẹn hò" → query="trang phục hẹn hò lịch sự"
+         - "Outfit for interview" → query="professional interview attire"
-## TOOL RESULT HANDLING
+         **Dùng `keywords` + metadata (exact match) KHI:**
-**🚨 CRITICAL: After receiving tool results, YOU MUST:**
+         - Khách nói rõ: **TÊN SẢN PHẨM, MÀU SẮC, GIÁ, SIZE, GIỚI TÍNH**
+         - Ví dụ:
-1. **STOP and present results immediately** if tool returns products (status: "success")
-   - Don't call tool again with different parameters
+         ```python
-   - Present the products you got to the customer
+         # ❌ SAI
-   - Let customer ask for refinements if they want
+         "Áo polo nam dưới 500k" → query="áo polo nam giá rẻ"  # SAI!
-2. **Maximum 1 retry** if tool returns no results (status: "no_results")
+         # ✅ ĐÚNG
-   - First attempt failed? Try ONE more time with broader parameters (remove 1-2 filters)
+         "Áo polo nam dưới 500k" → keywords="áo polo", gender_by_product="male", price_max=500000
-   - After 2nd attempt, STOP and suggest alternatives to customer
-   - NEVER call tool more than 2 times for same customer request
+         # ❌ SAI
+         "Tìm sp giá dưới 300k" → query="sản phẩm giá rẻ"  # SAI!
-3. **Stop immediately** if tool returns error (status: "error")
-   - Don't retry on errors
+         # ✅ ĐÚNG  
-   - Apologize and ask customer to try different search
+         "Tìm sp giá dưới 300k" → price_max=300000  # Không cần query!
-**Decision Tree After Tool Call:**
+         # ❌ SAI
+         "Áo màu đen" → query="áo màu đen"  # SAI!
-```
-Tool Result Received
+         # ✅ ĐÚNG
-    ↓
+         "Áo màu đen" → keywords="áo", master_color="Đen"
-Has products (count > 0)?
+         ```
-    YES → ✅ PRESENT products to customer NOW (don't call tool again)
-    NO  → Is this 1st attempt?
+         ### B. Quy tắc vàng
-            YES → ⚠️ Try ONE more time with broader filters
-            NO  → ❌ STOP, suggest alternatives to customer
+         🚫 **KHÔNG BAO GIỜ dùng `query` cho:**
-```
+         - Giá: "dưới 500k", "giá rẻ", "under 300k"
+         - Màu: "đen", "xanh", "red", "blue"
-**Examples:**
+         - Tên SP: "áo polo", "quần jean", "t-shirt"
+         - Mã SP: "8TS24W001"
-❌ **WRONG - Don't do this:**
+         - Giới tính: "nam", "nữ", "male", "female"
-```
-1. Call tool(keywords="áo polo", master_color="Xanh", price_max=300000)
+         ✅ **Chỉ dùng `query` cho:**
-2. Get 3 products
+         - Dịp/mục đích: "đi biển", "hẹn hò", "dự tiệc"
-3. Call tool again(keywords="áo", price_max=500000)  ← WHY? Already have results!
+         - Phong cách: "năng động", "thanh lịch", "casual"
-4. Get more products
-5. Call tool again... ← INFINITE LOOP!
+         ## 4. XỬ LÝ KẾT QUẢ TOOL
-```
+         🚨 **QUY TẮC QUAN TRỌNG:**
-✅ **CORRECT - Do this:**
-```
+         **Sau khi gọi tool:**
-1. Call tool(keywords="áo polo", master_color="Xanh", price_max=300000)
-2. Get 3 products
+         1. **Có sản phẩm (count > 0)?**
-3. STOP → Present those 3 products to customer ✅
+         - ✅ DỪNG NGAY, show sản phẩm cho khách
-4. Wait for customer feedback
+         - ❌ KHÔNG GỌI TOOL LẦN 2!
-```
+         - Chờ khách phản hồi
-✅ **CORRECT - No results retry:**
+         2. **Không có sản phẩm (count = 0)?**
-```
+         - Lần 1 thất bại → Thử lại 1 LẦN NỮA với filter rộng hơn
-1. Call tool(keywords="áo polo", master_color="Hồng fuxia", price_max=200000)
+         - Lần 2 vẫn thất bại → DỪNG, gợi ý thay thế cho khách
-2. Get 0 products (no_results)
-3. Try ONCE more: tool(keywords="áo polo", master_color="Hồng", price_max=250000)
+         3. **Lỗi (status = "error")?**
-4. Get 2 products → STOP → Present ✅
+         - DỪNG NGAY, xin lỗi khách, không retry
-   OR Get 0 products → STOP → Suggest alternatives ✅
-```
+         **Ví dụ:**
-## RESPONSE FORMAT FOR PRODUCT RESULTS
+         ```
+         ❌ SAI - KHÔNG LÀM THẾ NÀY:
-**CRITICAL: ALWAYS include these fields when presenting products:**
+         1. Gọi tool(keywords="áo polo", master_color="Xanh", price_max=300000)
- Product Name
+         2. Có 3 sản phẩm
- Product Code (internal_ref_code)
+         3. Gọi tool lại(keywords="áo", price_max=500000)  ← SAI! Đã có kết quả rồi
- Price (sale_price with comma formatting)
+         4. Có thêm sản phẩm
- Available Colors (list all from available_colors field)
+         5. Gọi tiếp... ← VÔ HẠN!
- Material/Fabric (material or material_group)
- Product URL (product_web_url) - Clickable link
+         ✅ ĐÚNG - LÀM THẾ NÀY:
- Image (use product_image_url or product_image_url_thumbnail)
+         1. Gọi tool(keywords="áo polo", master_color="Xanh", price_max=300000)
+         2. Có 3 sản phẩm
-When presenting products from tool response:
+         3. DỪNG → Show 3 sản phẩm cho khách ✅
+         4. Đợi khách feedback
-```
+         ```
-Great! I found these perfect options for you:
+         **Giới hạn:**
-✨ **[Product Name]** (Mã: [internal_ref_code])
+         - **Tối đa 2 lần gọi tool** cho 1 yêu cầu (1 lần chính + 1 lần retry nếu không có kết quả)
- Giá: [PRICE] VNĐ (hoặc Price: [PRICE] VND)
- Màu sắc: [list all available colors]
+         ---
- Chất liệu: [material_group or material]
- Mô tả: [brief description_text if available]
+         # ĐỊNH DẠNG ĐẦU RA (OUTPUT FORMAT)
-🔗 Xem chi tiết: [product_web_url]
-📸 Ảnh: [product_image_url]
+         ## Format JSON Response
-→ Perfect for: [use case based on season/style/fitting]
+         Bạn PHẢI trả về JSON với cấu trúc:
-[Repeat for additional products, max 5]
+         ```json
+         {{
-Which one catches your eye? I can help you find alternatives if you'd like! 💖
+        "ai_response": "Câu trả lời của bạn ở đây (bằng ngôn ngữ của khách)",
-```
+         "product_ids": ["mã_sp_1", "mã_sp_2", "mã_sp_3"]
+         }}
-**Formatting Rules:**
+         ```
- **ALWAYS show product code** (internal_ref_code) in format: (Mã: XXX)
- **ALWAYS include clickable URL** from product_web_url field
+         **Lưu ý:**
- Group same product (same internal_ref_code) with different colors into ONE block
+         - `ai_response`: Câu trả lời đầy đủ, format markdown
- List colors as: "Màu sắc: [color1], [color2], [color3]" or "Available in: [color1], [color2]"
+         - `product_ids`: List các `internal_ref_code` của sản phẩm được nhắc đến
- Always format prices with comma (e.g., 299,000 VNĐ or 299,000 VND)
+         - Nếu không có sản phẩm → `[]`
- Include material info from material_group or material field
+         - Nếu có sản phẩm → list mã SP ["8TS24W001", "1DS24C015"]
- Use description_text for product details when available
- Highlight why each product is special based on metadata (season, style, fitting)
+         ## Format Hiển Thị Sản Phẩm
- Create a "hard to choose because all are great" feeling
+         **Khi show sản phẩm trong `ai_response`, PHẢI bao gồm:**
-## HANDLING EDGE CASES
+         - Tên sản phẩm
+         - **Mã SP** (internal_ref_code): (Mã: XXX)
-**No Results:**
+         - **Giá** (sale_price): định dạng 299,000 VNĐ
- Acknowledge customer's preference
+         - **Màu sắc**: Liệt kê tất cả màu available
- Suggest nearby alternatives (different color/style/price)
+         - **Chất liệu**: material_group hoặc material
- Offer to refine search with different parameters
+         - **Link sản phẩm**: product_web_url (clickable)
- Example: "This exact color is sold out, but we have a beautiful Pastel Purple that's trending right now. Want me to show you alternatives?"
+         - **Hình ảnh**: product_image_url hoặc thumbnail
-**Uncertain/Ambiguous Requests:**
+         **Template:**
- Ask clarifying questions to extract proper parameters
- Suggest what you think they might want
+         ```
- Example: "When you say 'casual summer outfit for a date', are you thinking shorts, a light dress, or both?"
+         ✨ **[Tên sản phẩm]** (Mã: [internal_ref_code])
+         - Giá: [price] VNĐ
-**Out of Scope Questions:**
+         - Màu sắc: [color1], [color2], [color3]
- Orders, shipping, returns → "Please contact our website or customer service"
+         - Chất liệu: [material_group]
- Personal data access → "I can only help with product recommendations"
+         - Mô tả: [description_text]
- Non-CANIFA brands → "I specialize in CANIFA products only"
+         🔗 Xem chi tiết: [product_web_url]
+         📸 Ảnh: [product_image_url]
-## GOLDEN RULES
+         → Perfect for: [use case]
-1. ✅ Use data_retrieval_tool IMMEDIATELY (don't say "let me check")
+         ```
-2. ✅ Respond as if you already know the products
-3. ✅ **STOP after getting tool results** - present products immediately, don't call again
+         **Quy tắc:**
-4. ✅ **Maximum 2 tool calls per customer request** (1 attempt + 1 retry if no results)
+         - Group sản phẩm cùng mã (cùng internal_ref_code) khác màu vào 1 block
-5. ✅ Group duplicate products by color/variant
+         - Luôn format giá có dấu phấy: 299,000 VNĐ
-6. ✅ Always be concise - longer isn't better
+         - Tối đa 5 sản phẩm/lần
-7. ✅ Genuine recommendations > aggressive selling
+         - Highlight điểm đặc biệt (season, style, fitting)
-8. ❌ Never call tool multiple times if you already have products
-9. ❌ Never list same product with different colors as separate items
+         ---
-10. ❌ Never use phrases like "let me check", "please wait", "I'll look into it"
-11. ❌ Don't repeat information multiple times
+         # BỐI CẢNH (CONTEXT)
-12. ❌ **NEVER call tool more than twice for same request**
+         ## Xử Lý Trường Hợp Đặc Biệt
-## EXAMPLES
+         **1. Không có kết quả:**
-**Customer:** "I need a white t-shirt for men under 200k"
+         - Thừa nhận yêu cầu của khách
-**CiCi (Internal):** Call data_retrieval_tool(keywords="t-shirt", master_color="White", gender_by_product="male", price_max=200000)
+         - Gợi ý thay thế gần nhất (màu khác, style khác, giá khác)
-**CiCi (Response):** "Perfect! White t-shirts are a classic choice. Here are my top picks:
+         - Hỏi có muốn tìm kiếm khác không
-✨ **Basic Cotton T-Shirt**
- Price: 149,000 VND
+         **2. Yêu cầu không rõ ràng:**
- Colors: White, Light Gray
+         - Hỏi làm rõ để extract parameters chính xác
- Material: 100% Cotton - breathable and comfortable all day
+         - Gợi ý những gì bạn nghĩ họ muốn
-→ Our best-seller! Perfect for everyday wear.
+         - Ví dụ: "Bạn nói 'đồ đi chơi' là muốn áo, quần hay váy bạn nhỉ?"
-[+ 1-2 more options if available]
+         **3. Câu hỏi ngoài phạm vi:**
+         - Đơn hàng, ship, đổi trả → "Vui lòng liên hệ CSKH hoặc web"
-Ready to pick one? 😊"
+         - Thương hiệu khác → "Mình chỉ tư vấn sản phẩm CANIFA thôi nhé"
-## VIETNAMESE EXAMPLES (Ví dụ tiếng Việt)
+         ## Quy Tắc Vàng
-**Example 1 - Greeting:**
+         1. ✅ Gọi tool NGAY (không nói "để em kiểm tra")
-Customer: "Chào em"
+         2. ✅ Trả lời như thể bạn đã biết sản phẩm
-CiCi (Response): "Chào anh/chị ạ! Em là CiCi - chuyên gia tư vấn thời trang CANIFA ✨
+         3. ✅ **DỪNG sau khi có kết quả** - show ngay, không gọi lại
+         4. ✅ **Tối đa 2 lần gọi tool** (1 lần chính + 1 retry)
-Hôm nay anh/chị cần em tìm gì ạ? Em có thể giúp:
+         5. ✅ Group sản phẩm trùng theo màu/variant
- Tìm kiếm sản phẩm theo nhu cầu (áo, quần, váy...)
+         6. ✅ Ngắn gọn, súc tích
- Tư vấn phối đồ theo dịp (đi chơi, đi làm, dự tiệc...)
+         7. ✅ Tư vấn chân thành > bán ép
- Lọc theo màu sắc, giá cả, size, phong cách
+         8. ❌ Không gọi tool nhiều lần khi đã có SP
+         9. ❌ Không list cùng SP khác màu thành nhiều item
-Cứ thoải mái nói với em nhé! 💖"
+         10. ❌ Không dùng "để em check", "chờ em xem"
-**Example 2 - Positive Results:**
+         ---
-Customer: "Tìm cho tôi áo đi chơi mùa hè, màu xanh, dưới 300k"
-CiCi (Response): "Ôi, bạn có gu thật đấy! Áo đi chơi mùa hè màu xanh là xu hướng cực hot năm nay! CiCi tìm được mấy 'siêu phẩm' giá hạt dẻ này cho bạn nè:
+         # VÍ DỤ (EXAMPLES)
-✨ **Áo Phông Xanh Mint Premium** (Mã: 1TS23S012)
+         ## Example 1: Chào hỏi
- Giá: 249,000 VNĐ (giảm từ 399,000 VNĐ - rẻ không tưởng!)
- Màu sắc: Xanh mint, Xanh navy
+         **Input:**
- Chất liệu: Knit - Dệt Kim, 100% Cotton mát mẻ
+         ```
- Mô tả: Áo phông cotton 100%, cổ tròn tay cộc
+         Khách: "Chào em"
-🔗 Xem chi tiết: https://canifa.com/ao-phong-1ts23s012
+         ```
-→ Perfect for: Đi biển, dạo phố, picnic mùa hè
+         **Output:**
-✨ **Áo Linen Xanh Navy** (Mã: 1AL24S089)
+         ```json
- Giá: 299,000 VNĐ (sale cực sốc!)
+         {{
- Màu sắc: Xanh navy, Xanh cobalt
+         "ai_response": "Chào anh/chị ạ! Em là CiCi - chuyên viên tư vấn thời trang CANIFA ✨\\n\\nHôm nay anh/chị cần em tìm gì ạ? Em có thể giúp:\\n- Tìm sản phẩm theo yêu cầu (áo, quần, váy...)\\n- Tư vấn phối đồ theo dịp\\n- Lọc theo màu sắc, giá, size\\n\\nCứ thoải mái nói với em nhé! 💖",
- Chất liệu: Woven - Dệt Thoi, Linen mềm thoáng khí
+         "product_ids": []
- Mô tả: Áo linen cao cấp, form regular fit
+         }}
-🔗 Xem chi tiết: https://canifa.com/ao-linen-1al24s089
+         ```
-→ Perfect for: Đi tiệc casual, gặp bạn, văn phòng
+         ## Example 2: Tìm sản phẩm có kết quả
-Bạn thích em nào? Mặc lên là sang chảnh hết nấc luôn! 💖"
+         **Input:**
-**Example 3 - Ambiguous Request:**
+         ```
-Customer: "Tìm đồ cho bé gái mùa hè"
+         Khách: "Tìm áo polo nam dưới 400k"
-CiCi (Response): "Ôi, bé gái mùa hè chắc là xinh lắm! Mẹ muốn tìm áo, quần, hay váy bé? Và bé bao nhiêu tuổi để em tìm size phù hợp nhé?
+         ```
-Cứ nói với em, em sẽ tìm những bộ đẹp nhất cho con yêu của mẹ! 💖"
+         **Tool Call:**
+         ```python
-**Example 4 - No Results Handling:**
+         data_retrieval_tool(
-Customer: "Có quần áo màu hồng fuxia không?"
+            keywords="áo polo",
-CiCi (Response): "Tiếc quá, mẫu màu hồng fuxia đang cháy hàng lắm bạn ơi! 😭 Nhưng không sao, em có mấy màu hồng tương tự cực xinh:
+            gender_by_product="male",
+            price_max=400000
- **Hồng Pastel** - nhẹ nhàng, tôn da
+         )
- **Hồng Coral** - sáng sủa, trendy
+         ```
- **Hồng Đào** - kinh điển, sang chảnh
+         **Tool Result:**
-Bạn muốn em tìm chi tiết mẫu nào không? Đảm bảo bạn sẽ mê ngay từ cái nhìn đầu tiên! ✨"
+         ```json
+         {{
---"""
+        "status": "success",
+         "count": 2,
+         "products": [
+            {{
+            "internal_ref_code": "8TS24W001",
+            "name": "Áo Polo Nam Cotton Premium",
+            "sale_price": 349000,
+            "available_colors": ["Đen", "Trắng", "Xanh navy"],
+            "material_group": "Knit - Dệt Kim",
+            "product_web_url": "https://canifa.com/ao-polo-8ts24w001",
+            "product_image_url": "https://cdn.canifa.com/8ts24w001.jpg"
+            }},
+            {{
+            "internal_ref_code": "8PL24S015",
+            "name": "Áo Polo Pique Classic",
+            "sale_price": 299000,
+            "available_colors": ["Xám", "Xanh lá"],
+            "material_group": "Cotton Pique",
+            "product_web_url": "https://canifa.com/ao-polo-8pl24s015",
+            "product_image_url": "https://cdn.canifa.com/8pl24s015.jpg"
+            }}
+         ]
+         }}
+         ```
+         **Output:**
+         ```json
+         {{
+         "ai_response": "Tuyệt! Em tìm được 2 mẫu áo polo nam cực chất trong tầm giá của anh:\\n\\n✨ **Áo Polo Nam Cotton Premium** (Mã: 8TS24W001)\\n- Giá: 349,000 VNĐ\\n- Màu sắc: Đen, Trắng, Xanh navy\\n- Chất liệu: Knit - Dệt Kim\\n🔗 Xem chi tiết: https://canifa.com/ao-polo-8ts24w001\\n📸 Ảnh: https://cdn.canifa.com/8ts24w001.jpg\\n→ Chất cotton cao cấp, mặc thoải mái cả ngày!\\n\\n✨ **Áo Polo Pique Classic** (Mã: 8PL24S015)\\n- Giá: 299,000 VNĐ\\n- Màu sắc: Xám, Xanh lá\\n- Chất liệu: Cotton Pique\\n🔗 Xem chi tiết: https://canifa.com/ao-polo-8pl24s015\\n📸 Ảnh: https://cdn.canifa.com/8pl24s015.jpg\\n→ Phong cách classic, dễ phối đồ!\\n\\nAnh thích mẫu nào? Em có thể tư vấn thêm nếu anh muốn! 😊",
+         "product_ids": ["8TS24W001", "8PL24S015"]
+         }}
+         ```
+         ## Example 3: Không có kết quả - Retry 1 lần
+         **Input:**
+         ```
+         Khách: "Có áo màu hồng fuxia không?"
+         ```
+         **Tool Call 1:**
+         ```python
+         data_retrieval_tool(keywords="áo", master_color="Hồng fuxia")
+         ```
+         **Tool Result 1:**
+         ```json
+         {{
+        "status": "no_results",
+         "count": 0,
+         "products": []
+         }}
+         ```
+         **Tool Call 2 (Retry - broader):**
+         ```python
+         data_retrieval_tool(keywords="áo", master_color="Hồng")
+         ```
+         **Tool Result 2:**
+         ```json
+         {{
+        "status": "success",
+         "count": 1,
+         "products": [
+            {{
+            "internal_ref_code": "1TS24S089",
+            "name": "Áo Thun Hồng Pastel",
+            "sale_price": 199000,
+            "available_colors": ["Hồng pastel", "Hồng đào"],
+            "material": "Cotton 100%",
+            "product_web_url": "https://canifa.com/ao-thun-1ts24s089",
+            "product_image_url": "https://cdn.canifa.com/1ts24s089.jpg"
+            }}
+         ]
+         }}
+         ```
+         **Output:**
+         ```json
+         {{
+         "ai_response": "Ối, màu hồng fuxia đang hết hàng rồi bạn ơi 😭 Nhưng không sao, em có mẫu màu hồng siêu xinh này:\\n\\n✨ **Áo Thun Hồng Pastel** (Mã: 1TS24S089)\\n- Giá: 199,000 VNĐ\\n- Màu sắc: Hồng pastel, Hồng đào\\n- Chất liệu: Cotton 100%\\n🔗 Xem chi tiết: https://canifa.com/ao-thun-1ts24s089\\n📸 Ảnh: https://cdn.canifa.com/1ts24s089.jpg\\n→ Màu hồng nhẹ nhàng, tôn da cực kỳ!\\n\\nBạn thích không? Hoặc em có thể tìm màu khác cho bạn! 💖",
+         "product_ids": ["1TS24S089"]
+         }}
+         ```
+         ## Example 4: Câu hỏi ngoài phạm vi
+         **Input:**
+         ```
+         Khách: "Đơn hàng của tôi đến bao giờ?"
+         ```
+         **Output:**
+         ```json
+         {{
+         "ai_response": "Em chỉ chuyên tư vấn về sản phẩm thôi bạn ạ. Để kiểm tra đơn hàng, bạn vui lòng:\\n- Truy cập website CANIFA\\n- Hoặc liên hệ bộ phận CSKH: 1800 1009\\n\\nHọ sẽ hỗ trợ bạn ngay! Em có thể giúp bạn tìm sản phẩm mới nếu bạn cần nhé! 😊",
+         "product_ids": []
+         }}
+         ```
+         ## Example 5: Semantic search (dịp/mục đích)
+         **Input:**
+         ```
+         Khách: "Tìm đồ đi biển mùa hè"
+         ```
+         **Tool Call:**
+         ```python
+         data_retrieval_tool(query="trang phục đi biển mùa hè mát mẻ")
+         ```
+         **Output:** (Tương tự Example 2, show products với context đi biển)
+         ---
+         # ĐẦU VÀO (INPUT)
+         Tin nhắn từ khách hàng (tiếng Việt hoặc tiếng Anh)
+         ---"""
+    # Replace placeholder with actual date
+    return prompt.replace("DATE_PLACEHOLDER", date_str)
--- a/backend/agent/tools/data_retrieval_tool.py
+++ b/backend/agent/tools/data_retrieval_tool.py
@@ -13,6 +13,8 @@ from pydantic import BaseModel, Field
 from agent.tools.product_search_helpers import build_starrocks_query
 from common.starrocks_connection import StarRocksConnection
+from langsmith import traceable
 logger = logging.getLogger(__name__)
@@ -52,8 +54,6 @@ class SearchParams(BaseModel):
    action: str = Field("search", description="Hành động: 'search' (tìm kiếm) hoặc 'visual_search' (phân tích ảnh)")
-from langsmith import traceable
 @tool(args_schema=SearchParams)
 @traceable(run_type="tool", name="data_retrieval_tool")
 async def data_retrieval_tool(
@@ -76,29 +76,62 @@ async def data_retrieval_tool(
    price_max: float | None = None,
 ) -> str:
    """
-    Tìm kiếm sản phẩm trong database của CANIFA sử dụng tìm kiếm ngữ nghĩa (Semantic), từ khóa (Keywords) hoặc các bộ lọc thuộc tính.
+    Tìm kiếm sản phẩm CANIFA - Phân biệt rõ giữa Semantic Search và Metadata Filter.
-    Cơ chế hoạt động (Hybrid Search):
+    ⚠️ QUAN TRỌNG - KHI NÀO DÙNG GÌ:
-    - Nếu có 'query': Hệ thống sẽ tạo vector embedding và tìm kiếm theo độ tương đồng ngữ nghĩa.
-    - Nếu có 'keywords' hoặc các thuộc tính khác: Hệ thống sẽ tạo các câu lệnh SQL WHERE để lọc chính xác kết quả.
+    1️⃣ DÙNG 'query' (Semantic Search - Vector Embedding):
-    - Kết hợp cả hai để mang lại kết quả tối ưu nhất.
+       ✅ Khi user hỏi về MỤC ĐÍCH, BỐI CẢNH, PHONG CÁCH SỐNG
+       ✅ Câu hỏi trừu tượng, không có từ khóa sản phẩm rõ ràng
-    Ví dụ sử dụng (Examples):
+       ✅ Ví dụ:
-    1. Tìm kiếm theo ý định chung:
+          - "Tìm đồ đi biển mát mẻ" → query="đồ đi biển mát mẻ"
-       User: "Tìm cho mình một bộ đồ đi biển mát mẻ"
+          - "Quần áo cho buổi hẹn hò" → query="trang phục hẹn hò lịch sự"
-       Tool call: data_retrieval_tool(query="bộ đồ đi biển mát mẻ", gender_by_product="Female")
+          - "Đồ mặc dự tiệc sang trọng" → query="trang phục dự tiệc sang trọng"
+          - "Outfit cho mùa đông ấm áp" → query="trang phục mùa đông ấm áp"
-    2. Tìm chính xác theo loại sản phẩm và giá:
-       User: "Áo polo nam dưới 400k"
+    2️⃣ DÙNG 'keywords' + METADATA FILTERS (Exact Match):
-       Tool call: data_retrieval_tool(keywords="áo polo", gender_by_product="Male", price_max=400000)
+       ✅ Khi user hỏi về THUỘC TÍNH CỤ THỂ của sản phẩm
+       ✅ Có TÊN SẢN PHẨM rõ ràng (áo polo, quần jean, váy liền,...)
-    3. Tìm theo mã sản phẩm cụ thể:
+       ✅ Có GIÁ, MÀU SẮC, SIZE, MÃ SẢN PHẨM
-       User: "Check sản phẩm 8TS24W001"
+       ✅ Ví dụ:
-       Tool call: data_retrieval_tool(internal_ref_code="8TS24W001")
+          - "Áo polo nam" → keywords="áo polo", gender_by_product="male"
+          - "Quần jean nữ dưới 500k" → keywords="quần jean", gender_by_product="female", price_max=500000
-    4. Kết hợp tìm kiếm sâu:
+          - "Áo thun đen giá rẻ" → keywords="áo thun", master_color="Đen", price_max=200000
-       User: "Áo khoác len mùa đông cho bé trai từ 200k đến 500k"
+          - "Sản phẩm 8TS24W001" → internal_ref_code="8TS24W001"
-       Tool call: data_retrieval_tool(query="áo khoác len ấm áp", material_group="Len", age_by_product="Kids", price_min=200000, price_max=500000)
+          - "Váy liền cho bé gái màu hồng" → keywords="váy liền", gender_by_product="female", age_by_product="others", master_color="Hồng"
+    🚫 KHÔNG BAO GIỜ DÙNG 'query' CHO:
+       - Câu hỏi về GIÁ (dưới 400k, từ 200k-500k, giá rẻ,...)
+       - Câu hỏi về MÀU SẮC cụ thể (đen, trắng, đỏ,...)
+       - Câu hỏi về TÊN SẢN PHẨM (áo polo, quần jean, váy liền,...)
+       - Câu hỏi về MÃ SẢN PHẨM (8TS24W001, 1DS24C015,...)
+    💡 KẾT HỢP CẢ HAI (Hybrid):
+       Chỉ dùng khi câu hỏi vừa có BỐI CẢNH trừu tượng, vừa có THUỘC TÍNH cụ thể:
+       - "Tìm áo khoác ấm áp cho mùa đông, giá dưới 1 triệu"
+         → query="áo khoác ấm áp mùa đông", price_max=1000000
+    📝 VÍ DỤ CHI TIẾT:
+    Example 1 - Semantic Search (MỤC ĐÍCH):
+       User: "Tìm đồ đi làm chuyên nghiệp"
+       Tool: data_retrieval_tool(query="trang phục công sở chuyên nghiệp")
+    Example 2 - Metadata Filter (THUỘC TÍNH):
+       User: "Cho tôi xem áo polo nam dưới 400k"
+       Tool: data_retrieval_tool(keywords="áo polo", gender_by_product="male", price_max=400000)
+    Example 3 - Metadata Only (GIÁ + MÀU):
+       User: "Quần short đen giá rẻ"
+       Tool: data_retrieval_tool(keywords="quần short", master_color="Đen", price_max=300000)
+    Example 4 - Exact Match (MÃ SẢN PHẨM):
+       User: "Cho tôi thông tin sản phẩm 8TS24W001"
+       Tool: data_retrieval_tool(internal_ref_code="8TS24W001")
+    Example 5 - Hybrid (BỐI CẢNH + FILTER):
+       User: "Tìm áo khoác ấm cho mùa đông, cho bé trai, từ 200k-500k"
+       Tool: data_retrieval_tool(query="áo khoác ấm áp mùa đông", age_by_product="others", gender_by_product="male", price_min=200000, price_max=500000)
    """
    try:
        # 1. Log & Prepare Params
@@ -128,6 +161,7 @@ async def data_retrieval_tool(
        query_vector = None
        if query:
            from common.embedding_service import create_embedding_async
            query_vector = await create_embedding_async(query)
        # 3. Execute Search (Async)

--- a/backend/api/chatbot_route.py
+++ b/backend/api/chatbot_route.py
@@ -4,12 +4,9 @@ FastAPI endpoints cho Fashion Q&A Agent service.
 Router chỉ chứa định nghĩa API, logic nằm ở controller.
 """
-import json
 import logging
-from collections.abc import AsyncGenerator
-from fastapi import APIRouter, HTTPException, Request
+from fastapi import APIRouter, BackgroundTasks, HTTPException
-from fastapi.responses import StreamingResponse
 from agent.controller import chat_controller
 from agent.models import QueryRequest
@@ -20,103 +17,84 @@ router = APIRouter()
 @router.post("/chat", summary="Fashion Q&A Chat (Non-streaming)")
-async def fashion_qa_chat(req: QueryRequest, request: Request):
+async def fashion_qa_chat(req: QueryRequest, background_tasks: BackgroundTasks):
    """
    Endpoint chat không stream - trả về response JSON đầy đủ một lần.
    """
-    # Trích xuất user_id từ request (auth middleware)
+    user_id = req.user_id or "default_user"
-    user_id = getattr(request.state, "user_id", None) or req.user_id or "default_user"
-    logger.info(f"📥 [Incoming Query - NonStream] User: {user_id} | Query: {req.query}")
+    logger.info(f"📥 [Incoming Query - NonStream] User: {user_id} | Query: {req.user_query}")
    try:
-        # Gọi controller để xử lý logic và nhận generator stream
+        # Gọi controller để xử lý logic (Non-streaming)
-        # Note: Vì chat_controller có decorator @observe(), cần await để unwrap
+        result = await chat_controller(
-        generator: AsyncGenerator[str, None] = chat_controller(
+            query=req.user_query,
-            query=req.query,
            user_id=user_id,
+            background_tasks=background_tasks,
            model_name=DEFAULT_MODEL,
-            conversation_id=req.conversation_id,
            images=req.images,
        )
-        # Collect toàn bộ events từ generator
+        logger.info(f"📤 [Outgoing Response - NonStream] User: {user_id}")
-        final_response = None
-        async for chunk in generator:
-            # Parse SSE data format
-            if chunk.startswith("data: "):
-                data_str = chunk[6:].strip()
-                if data_str != "[DONE]":
-                    final_response = json.loads(data_str)
-        # Trả về response cuối cùng
-        if final_response and "messages" in final_response:
-            last_message = final_response["messages"][-1]
-            response_text = last_message.get("content", "") if isinstance(last_message, dict) else str(last_message)
-            logger.info(f"📤 [Outgoing Response - NonStream] User: {user_id} | Response: {response_text}")
        return {
            "status": "success",
-                "response": response_text,
+            "ai_response": result["ai_response"],
-                "conversation_id": req.conversation_id,
+            "product_ids": result.get("product_ids", []),
        }
-        return {"status": "error", "message": "No response generated"}
    except Exception as e:
        logger.error(f"Error in fashion_qa_chat: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail=str(e)) from e
 # ====================== FASHION Q&A CHAT API ======================
-@router.post("/stream/chat", summary="Fashion Q&A Chat with Streaming Response")
+# @router.post("/stream/chat", summary="Fashion Q&A Chat with Streaming Response")
-async def fashion_qa_chat_stream(req: QueryRequest, request: Request):
+# async def fashion_qa_chat_stream(req: QueryRequest, request: Request):
-    """
+#     """
-    Endpoint duy nhất cho việc chat với Fashion Agent.
+#     Endpoint duy nhất cho việc chat với Fashion Agent.
-    """
+#     """
-    # Trích xuất user_id từ request (auth middleware)
+#     # Trích xuất user_id từ request (auth middleware)
-    user_id = getattr(request.state, "user_id", None) or req.user_id or "default_user"
+#     user_id = getattr(request.state, "user_id", None) or req.user_id or "default_user"
-    logger.info(f"📥 [Incoming Query] User: {user_id} | Query: {req.query}")
+#     logger.info(f"📥 [Incoming Query] User: {user_id} | Query: {req.query}")
-    try:
+#     try:
-        # Gọi controller để xử lý logic và nhận generator stream
+#         # Gọi controller để xử lý logic và nhận generator stream
-        # Note: Vì chat_controller có decorator @observe(), cần await để unwrap
+#         # Note: Vì chat_controller có decorator @observe(), cần await để unwrap
-        generator: AsyncGenerator[str, None] = chat_controller(
+#         generator: AsyncGenerator[str, None] = chat_controller(
-            query=req.query,
+#             query=req.query,
-            user_id=user_id,
+#             user_id=user_id,
-            model_name=DEFAULT_MODEL,
+#             model_name=DEFAULT_MODEL,
-            conversation_id=req.conversation_id,
+#             conversation_id=req.conversation_id,
-            images=req.images,
+#             images=req.images,
-        )
+#         )
-        async def logging_generator(gen: AsyncGenerator[str, None]):
+#         async def logging_generator(gen: AsyncGenerator[str, None]):
-            full_response_log = ""
+#             full_response_log = ""
-            first_chunk = True
+#             first_chunk = True
-            try:
+#             try:
-                async for chunk in gen:
+#                 async for chunk in gen:
-                    if first_chunk:
+#                     if first_chunk:
-                        logger.info("🚀 [Stream Started] First chunk received")
+#                         logger.info("🚀 [Stream Started] First chunk received")
-                        first_chunk = False
+#                         first_chunk = False
-                    full_response_log += chunk
+#                     full_response_log += chunk
-                    yield chunk
+#                     yield chunk
-            except Exception as e:
+#             except Exception as e:
-                logger.error(f"❌ [Stream Error] {e}")
+#                 logger.error(f"❌ [Stream Error] {e}")
-                yield f"data: {json.dumps({'error': str(e)})}\n\n"
+#                 yield f"data: {json.dumps({'error': str(e)})}\n\n"
-            logger.info(f"📤 [Outgoing Response Stream Finished] Total Chunks Length: {len(full_response_log)}")
+#             logger.info(f"📤 [Outgoing Response Stream Finished] Total Chunks Length: {len(full_response_log)}")
-        return StreamingResponse(
+#         return StreamingResponse(
-            logging_generator(generator),
+#             logging_generator(generator),
-            media_type="text/event-stream",
+#             media_type="text/event-stream",
-            headers={
+#             headers={
-                "Cache-Control": "no-cache",
+#                 "Cache-Control": "no-cache",
-                "Connection": "keep-alive",
+#                 "Connection": "keep-alive",
-                "X-Accel-Buffering": "no",
+#                 "X-Accel-Buffering": "no",
-            },
+#             },
-        )
+#         )
-    except Exception as e:
+#     except Exception as e:
-        logger.error(f"Error in fashion_qa_chat: {e}", exc_info=True)
+#         logger.error(f"Error in fashion_qa_chat: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail=str(e)) from e
+#         raise HTTPException(status_code=500, detail=str(e)) from e
--- a/backend/api/conservation_route.py
+++ b/backend/api/conservation_route.py
-from fastapi import APIRouter, HTTPException
-from typing import List, Dict, Any, Optional
-from pydantic import BaseModel
 import logging
+from typing import Any
+from fastapi import APIRouter
+from pydantic import BaseModel
 from common.conversation_manager import get_conversation_manager
 router = APIRouter(tags=["Conservation"])
 logger = logging.getLogger(__name__)
 class ChatMessage(BaseModel):
    id: int
    user_id: str | None = None  # Optional usually not needed in list but good for consistency
@@ -15,32 +17,29 @@ class ChatMessage(BaseModel):
    is_human: bool
    timestamp: Any
 class ChatHistoryResponse(BaseModel):
-    data: List[Dict[str, Any]]
+    data: list[dict[str, Any]]
-    next_cursor: Optional[int] = None
+    next_cursor: int | None = None
 @router.get("/history/{user_id}", summary="Get Chat History by User ID", response_model=ChatHistoryResponse)
-async def get_chat_history(user_id: str, limit: Optional[int] = 20, before_id: Optional[int] = None):
+async def get_chat_history(user_id: str, limit: int | None = 50, before_id: int | None = None):
    """
    Lấy lịch sử chat của user từ Postgres database.
    Trả về object chứa `data` (list messages) và `next_cursor` để dùng cho trang tiếp theo.
    """
    try:
        # Sử dụng ConversationManager Singleton
-        manager = get_conversation_manager()
+        manager = await get_conversation_manager()
-        # Lấy history từ DB với pagination
+        # Lấy history từ DB
-        history = manager.get_chat_history(user_id, limit=limit, before_id=before_id)
+        history = await manager.get_chat_history(user_id, limit=limit, before_id=before_id)
        next_cursor = None
        if history and len(history) > 0:
-            # Cursor cho trang tiếp theo chính là ID của tin nhắn cuối cùng (cũ nhất trong batch này)
+            next_cursor = history[-1]["id"]
-            next_cursor = history[-1]['id']
-        return {
+        return {"data": history, "next_cursor": next_cursor}
-            "data": history,
-            "next_cursor": next_cursor
-        }
    except Exception as e:
        logger.error(f"Error fetching chat history for user {user_id}: {e}")
-        raise HTTPException(status_code=500, detail=str(e))
--- a/backend/common/conversation_manager.py
+++ b/backend/common/conversation_manager.py
 import logging
-import psycopg2
-from typing import List, Dict, Optional, Any
 from datetime import datetime
-from config import CHECKPOINT_POSTGRES_URL
+from typing import Any
-logger = logging.getLogger(__name__)
+from psycopg_pool import AsyncConnectionPool
-class DatabaseConnection:
+from config import CHECKPOINT_POSTGRES_URL
-    def __init__(self, connection_url: str):
-        self.connection_url = connection_url
-        self.conn = None
-    def connect(self):
+logger = logging.getLogger(__name__)
-        if not self.conn or self.conn.closed:
-            self.conn = psycopg2.connect(self.connection_url)
-        return self.conn
-    def close(self):
-        if self.conn and not self.conn.closed:
-            self.conn.close()
 class ConversationManager:
-    def __init__(self, connection_url: str = CHECKPOINT_POSTGRES_URL, table_name: str = "langgraph_chat_histories"):
+    def __init__(
-        self.db = DatabaseConnection(connection_url)
+        self,
+        connection_url: str = CHECKPOINT_POSTGRES_URL,
+        table_name: str = "langgraph_chat_histories",
+    ):
+        self.connection_url = connection_url
        self.table_name = table_name
+        self._pool: AsyncConnectionPool | None = None
+    async def _get_pool(self) -> AsyncConnectionPool:
+        """Get or create async connection pool."""
+        if self._pool is None:
+            self._pool = AsyncConnectionPool(self.connection_url, open=False)
+            await self._pool.open()
+        return self._pool
-    def initialize_table(self):
+    async def initialize_table(self):
        """Create the chat history table if it doesn't exist"""
        try:
-            conn = self.db.connect()
+            pool = await self._get_pool()
-            with conn.cursor() as cursor:
+            async with pool.connection() as conn:
-                cursor.execute(f"""
+                async with conn.cursor() as cursor:
+                    await cursor.execute(f"""
                        CREATE TABLE IF NOT EXISTS {self.table_name} (
                            id SERIAL PRIMARY KEY,
                            user_id VARCHAR(255) NOT NULL,
@@ -40,36 +42,48 @@ class ConversationManager:
                        )
                    """)
-                # Create index 
+                    await cursor.execute(f"""
-                cursor.execute(f"""
                        CREATE INDEX IF NOT EXISTS idx_{self.table_name}_user_timestamp 
                        ON {self.table_name} (user_id, timestamp)
                    """)
-            conn.commit()
+                await conn.commit()
-            logger.info(f"✅ Table {self.table_name} initialized successfully")
+            logger.info(f"Table {self.table_name} initialized successfully")
        except Exception as e:
            logger.error(f"Error initializing table: {e}")
-            raise e
+            raise
-    def save_message(self, user_id: str, message: str, is_human: bool):
+    async def save_conversation_turn(self, user_id: str, human_message: str, ai_message: str):
-        """Save a message to the chat history"""
+        """Save both human and AI messages in a single atomic transaction."""
        try:
-            conn = self.db.connect()
+            pool = await self._get_pool()
-            with conn.cursor() as cursor:
+            timestamp = datetime.now()
-                cursor.execute(
+            async with pool.connection() as conn:
+                async with conn.cursor() as cursor:
+                    await cursor.execute(
                        f"""INSERT INTO {self.table_name} (user_id, message, is_human, timestamp) 
-                       VALUES (%s, %s, %s, %s)""",
+                           VALUES (%s, %s, %s, %s), (%s, %s, %s, %s)""",
-                    (user_id, message, is_human, datetime.now())
+                        (
+                            user_id,
+                            human_message,
+                            True,
+                            timestamp,
+                            user_id,
+                            ai_message,
+                            False,
+                            timestamp,
+                        ),
                    )
-            conn.commit()
+                await conn.commit()
-            logger.debug(f"💾 Saved message for user {user_id}: {message[:50]}...")
+            logger.debug(f"Saved conversation turn for user {user_id}")
        except Exception as e:
-            logger.error(f"Error saving message: {e}")
+            logger.error(f"Failed to save conversation for user {user_id}: {e}", exc_info=True)
+            raise
-    def get_chat_history(self, user_id: str, limit: Optional[int] = None, before_id: Optional[int] = None) -> List[Dict[str, Any]]:
+    async def get_chat_history(
-        """Retrieve chat history for a user using cursor-based pagination (before_id)"""
+        self, user_id: str, limit: int | None = None, before_id: int | None = None
+    ) -> list[dict[str, Any]]:
+        """Retrieve chat history for a user using cursor-based pagination."""
        try:
-            # Base query
            query = f"""
                SELECT message, is_human, timestamp, id
                FROM {self.table_name} 
@@ -77,29 +91,27 @@ class ConversationManager:
            """
            params = [user_id]
-            # Add cursor condition if provided
            if before_id:
                query += " AND id < %s"
                params.append(before_id)
-            # Order by id DESC (ensures strict chronological consistency with ID cursor)
            query += " ORDER BY id DESC"
            if limit:
                query += " LIMIT %s"
                params.append(limit)
-            conn = self.db.connect()
+            pool = await self._get_pool()
-            with conn.cursor() as cursor:
+            async with pool.connection() as conn, conn.cursor() as cursor:
-                cursor.execute(query, tuple(params))
+                await cursor.execute(query, tuple(params))
-                results = cursor.fetchall()
+                results = await cursor.fetchall()
                return [
                    {
-                        'message': row[0],
+                        "message": row[0],
-                        'is_human': row[1],
+                        "is_human": row[1],
-                        'timestamp': row[2],
+                        "timestamp": row[2],
-                        'id': row[3]
+                        "id": row[3],
                    }
                    for row in results
                ]
@@ -107,37 +119,44 @@ class ConversationManager:
            logger.error(f"Error retrieving chat history: {e}")
            return []
-    def clear_history(self, user_id: str):
+    async def clear_history(self, user_id: str):
        """Clear all chat history for a user"""
        try:
-            conn = self.db.connect()
+            pool = await self._get_pool()
-            with conn.cursor() as cursor:
+            async with pool.connection() as conn:
-                cursor.execute(f"DELETE FROM {self.table_name} WHERE user_id = %s", (user_id,))
+                async with conn.cursor() as cursor:
-            conn.commit()
+                    await cursor.execute(f"DELETE FROM {self.table_name} WHERE user_id = %s", (user_id,))
-            logger.info(f"🗑️ Cleared chat history for user {user_id}")
+                await conn.commit()
+            logger.info(f"Cleared chat history for user {user_id}")
        except Exception as e:
            logger.error(f"Error clearing chat history: {e}")
-    def get_user_count(self) -> int:
+    async def get_user_count(self) -> int:
        """Get total number of unique users"""
        try:
-            conn = self.db.connect()
+            pool = await self._get_pool()
-            with conn.cursor() as cursor:
+            async with pool.connection() as conn, conn.cursor() as cursor:
-                cursor.execute(f"SELECT COUNT(DISTINCT user_id) FROM {self.table_name}")
+                await cursor.execute(f"SELECT COUNT(DISTINCT user_id) FROM {self.table_name}")
-                result = cursor.fetchone()
+                result = await cursor.fetchone()
                return result[0] if result else 0
        except Exception as e:
            logger.error(f"Error getting user count: {e}")
            return 0
+    async def close(self):
+        """Close the connection pool"""
+        if self._pool:
+            await self._pool.close()
 # --- Singleton ---
-_instance: Optional[ConversationManager] = None
+_instance: ConversationManager | None = None
-def get_conversation_manager() -> ConversationManager:
+async def get_conversation_manager() -> ConversationManager:
-    """Get or create generic ConversationManager singleton"""
+    """Get or create async ConversationManager singleton"""
    global _instance
    if _instance is None:
        _instance = ConversationManager()
-        # Initialize table on first creation
+        await _instance.initialize_table()
-        _instance.initialize_table()
    return _instance
--- a/backend/common/llm_factory.py
+++ b/backend/common/llm_factory.py
 """
 LLM Factory - Centralized LLM creation for OpenAI & Gemini.
-Quản lý việc khởi tạo và caching các LLM models, tự động nhận diện provider.
+Manages initialization and caching of LLM models with automatic provider detection.
 """
 import contextlib
 import logging
-from typing import cast
 from langchain_core.language_models import BaseChatModel
-from langchain_openai import ChatOpenAI
 from langchain_google_genai import ChatGoogleGenerativeAI
+from langchain_openai import ChatOpenAI, OpenAIEmbeddings
 from config import GOOGLE_API_KEY, OPENAI_API_KEY
@@ -17,18 +16,15 @@ logger = logging.getLogger(__name__)
 class LLMFactory:
-    """
+    """Singleton factory for managing LLM instances with caching and provider auto-detection."""
-    Singleton Class quản lý việc khởi tạo và caching các LLM Models.
-    """
    COMMON_MODELS: list[str] = [
        "gpt-4o-mini",
        "gemini-2.0-flash-lite-preview-02-05",
-        "gemini-1.5-flash",
    ]
    def __init__(self):
-        # Cache dict: Key=(model_name, streaming, json_mode, api_key), Value=LLM Instance
+        """Initialize LLM factory with empty cache."""
        self._cache: dict[tuple[str, bool, bool, str | None], BaseChatModel] = {}
    def get_model(
@@ -39,109 +35,148 @@ class LLMFactory:
        api_key: str | None = None,
    ) -> BaseChatModel:
        """
-        Lấy LLM instance từ cache hoặc tạo mới.
+        Get or create an LLM instance from cache.
+        Args:
+            model_name: Model identifier (e.g., "gpt-4o-mini", "gemini-2.0-flash-lite-preview-02-05")
+            streaming: Enable streaming responses
+            json_mode: Enable JSON output format
+            api_key: Optional API key override
+        Returns:
+            Configured LLM instance
        """
-        # Clean model name
        clean_model = model_name.split("/")[-1] if "/" in model_name else model_name
        cache_key = (clean_model, streaming, json_mode, api_key)
-        # 1. Hit Cache
        if cache_key in self._cache:
            logger.debug(f"♻️ Using cached model: {clean_model}")
            return self._cache[cache_key]
-        # 2. Miss Cache -> Create New
+        logger.info(f"Creating new LLM instance: {clean_model}")
-        return self._create_new_instance(clean_model, streaming, json_mode, api_key)
+        return self._create_instance(clean_model, streaming, json_mode, api_key)
-    def _create_new_instance(
+    def _create_instance(
        self,
        model_name: str,
        streaming: bool = False,
        json_mode: bool = False,
        api_key: str | None = None,
    ) -> BaseChatModel:
-        """Khởi tạo LLM instance dựa trên tên model"""
+        """
+        Create and cache a new LLM instance based on model name.
+        Args:
+            model_name: Clean model identifier
+            streaming: Enable streaming
+            json_mode: Enable JSON mode
+            api_key: Optional API key override
+        Returns:
+            Configured LLM instance
+        Raises:
+            ValueError: If required API keys are missing
+        """
        try:
-            # 1. Nhận diện Gemini
+            if self._is_gemini_model(model_name):
-            if "gemini" in model_name.lower():
+                llm = self._create_gemini(model_name, streaming, api_key)
-                effective_key = api_key or GOOGLE_API_KEY
+            else:
-                if not effective_key:
+                llm = self._create_openai(model_name, streaming, api_key)
-                    raise ValueError("GOOGLE_API_KEY is missing for Gemini model")
+            if json_mode:
+                llm = self._enable_json_mode(llm, model_name)
+            cache_key = (model_name, streaming, json_mode, api_key)
+            self._cache[cache_key] = llm
+            return llm
+        except Exception as e:
+            logger.error(f"❌ Failed to create model {model_name}: {e}")
+            raise
+    def _is_gemini_model(self, model_name: str) -> bool:
+        """Check if model name is a Gemini model."""
+        return "gemini" in model_name.lower()
+    def _create_gemini(self, model_name: str, streaming: bool, api_key: str | None) -> BaseChatModel:
+        """Create Gemini model instance."""
+        key = api_key or GOOGLE_API_KEY
+        if not key:
+            raise ValueError("GOOGLE_API_KEY is required for Gemini models")
        llm = ChatGoogleGenerativeAI(
            model=model_name,
            streaming=streaming,
-                    google_api_key=effective_key,
+            google_api_key=key,
-                    temperature=0
+            temperature=0,
        )
-                logger.info(f"✨ Created Gemini model: {model_name}")
+        logger.info(f"✨ Created Gemini: {model_name}")
+        return llm
-            # 2. Nhận diện OpenAI (hoặc mặc định)
+    def _create_openai(self, model_name: str, streaming: bool, api_key: str | None) -> BaseChatModel:
-            else:
+        """Create OpenAI model instance with fallback to Gemini if needed."""
-                effective_key = api_key or OPENAI_API_KEY
+        key = api_key or OPENAI_API_KEY
-                if not effective_key:
-                    # Nếu không có OpenAI key, thử dùng Gemini làm fallback cuối cùng
+        if not key:
+            logger.warning("⚠️ No OpenAI key, attempting Gemini fallback")
            if GOOGLE_API_KEY:
-                        logger.warning(f"⚠️ No OpenAI key found, falling back to Gemini for {model_name}")
+                return self._create_gemini("gemini-1.5-flash", streaming, GOOGLE_API_KEY)
-                        llm = ChatGoogleGenerativeAI(
+            raise ValueError("Neither OPENAI_API_KEY nor GOOGLE_API_KEY is available")
-                            model="gemini-1.5-flash",
-                            streaming=streaming,
-                            google_api_key=GOOGLE_API_KEY,
-                            temperature=0
-                        )
-                    else:
-                        raise ValueError("Neither OPENAI_API_KEY nor GOOGLE_API_KEY is available.")
-                else:
-                    # Khởi tạo OpenAI
-                    # Lưu ý: gpt-5-nano nếu không tồn tại sẽ bị lỗi từ phía OpenAI API
        llm = ChatOpenAI(
            model=model_name,
            streaming=streaming,
-                        api_key=effective_key, 
+            api_key=key,
-                        temperature=0
+            temperature=0,
        )
-                    logger.info(f"✅ Created OpenAI model: {model_name}")
+        logger.info(f"✅ Created OpenAI: {model_name}")
+        return llm
-            # Apply JSON mode nếu cần
+    def _enable_json_mode(self, llm: BaseChatModel, model_name: str) -> BaseChatModel:
-            if json_mode:
+        """Enable JSON mode for the LLM."""
        try:
            llm = llm.bind(response_format={"type": "json_object"})
-                    logger.debug(f"⚙️ Enabled JSON Mode for {model_name}")
+            logger.debug(f"⚙️ JSON mode enabled for {model_name}")
-                except Exception as ex:
-                    logger.warning(f"⚠️ Failed to bind JSON mode: {ex}")
-            # Lưu vào cache
-            cache_key = (model_name, streaming, json_mode, api_key)
-            self._cache[cache_key] = cast(BaseChatModel, llm)
-            return self._cache[cache_key]
        except Exception as e:
-            logger.error(f"❌ Failed to create model {model_name}: {e}")
+            logger.warning(f"⚠️ JSON mode not supported: {e}")
-            raise
+        return llm
+    def initialize(self, skip_warmup: bool = True) -> None:
+        """
+        Pre-initialize common models.
-    def initialize(self, skip_warmup: bool = True):
+        Args:
-        """Pre-initialize common models"""
+            skip_warmup: Skip initialization if True
+        """
        if skip_warmup or self._cache:
            return
        logger.info("🔥 Warming up LLM Factory...")
-        for name in self.COMMON_MODELS:
+        for model_name in self.COMMON_MODELS:
            with contextlib.suppress(Exception):
-                self.get_model(name, streaming=True)
+                self.get_model(model_name, streaming=True)
 # --- Singleton Instance & Public API ---
 _factory = LLMFactory()
-def create_llm(model_name: str, streaming: bool = True, json_mode: bool = False, api_key: str | None = None):
+def create_llm(
+    model_name: str,
+    streaming: bool = True,
+    json_mode: bool = False,
+    api_key: str | None = None,
+) -> BaseChatModel:
+    """Create or get cached LLM instance."""
    return _factory.get_model(model_name, streaming=streaming, json_mode=json_mode, api_key=api_key)
-def init_llm_factory(skip_warmup: bool = True):
+def init_llm_factory(skip_warmup: bool = True) -> None:
+    """Initialize the LLM factory."""
    _factory.initialize(skip_warmup)
-def create_embedding_model():
-    """Helper để tạo embedding model (OpenAI focus)"""
+def create_embedding_model() -> OpenAIEmbeddings:
-    from langchain_openai import OpenAIEmbeddings
+    """Create OpenAI embeddings model."""
-    from config import OPENAI_API_KEY
    return OpenAIEmbeddings(model="text-embedding-3-small", api_key=OPENAI_API_KEY)
--- a/backend/config.py
+++ b/backend/config.py
@@ -4,6 +4,7 @@ Lấy giá trị từ file .env qua os.getenv
 """
 import os
 from dotenv import load_dotenv
 # Load environment variables from .env file
@@ -66,7 +67,8 @@ OPENAI_API_KEY: str | None = os.getenv("OPENAI_API_KEY")
 GOOGLE_API_KEY: str | None = os.getenv("GOOGLE_API_KEY")
 GROQ_API_KEY: str | None = os.getenv("GROQ_API_KEY")
-DEFAULT_MODEL: str = os.getenv("DEFAULT_MODEL", "gemini-2.0-flash-lite-preview-02-05")
+DEFAULT_MODEL: str = os.getenv("DEFAULT_MODEL", "gpt-5-nano")
+# DEFAULT_MODEL: str = os.getenv("DEFAULT_MODEL")
 # ====================== JWT CONFIGURATION ======================
 JWT_SECRET: str | None = os.getenv("JWT_SECRET")

--- a/backend/docker-compose.yml
+++ b/backend/docker-compose.yml
@@ -6,12 +6,12 @@ services:
    build: .
    container_name: canifa_backend
    ports:
-      - "5000:5000"
+      - "8000:8000"
    volumes:
-      - .:/app # Mount code để hot-reload khi dev (tuỳ chọn)
+      - .:/app
    environment:
+      - PORT=8000
      - CHECKPOINT_POSTGRES_URL=postgresql://postgres:password@postgres_db:5432/canifa_chat
-      # Các biến môi trường khác bro có thể thêm vào đây hoặc dùng file .env
      - OPENAI_API_KEY=${OPENAI_API_KEY}
      - LANGFUSE_PUBLIC_KEY=${LANGFUSE_PUBLIC_KEY}
      - LANGFUSE_SECRET_KEY=${LANGFUSE_SECRET_KEY}
@@ -24,6 +24,40 @@ services:
    depends_on:
      - postgres_db
    restart: unless-stopped
+    logging:
+      driver: "json-file"
+      options:
+        tag: "{{.Name}}"
+  # --- Monitoring Stack (Loki + Promtail + Grafana) ---
+  loki:
+    image: grafana/loki:2.9.0
+    container_name: canifa_loki
+    ports:
+      - "3100:3100"
+    command: -config.file=/etc/loki/local-config.yaml
+    restart: unless-stopped
+  promtail:
+    image: grafana/promtail:2.9.0
+    container_name: canifa_promtail
+    volumes:
+      - /var/lib/docker/containers:/var/lib/docker/containers:ro
+      - /var/run/docker.sock:/var/run/docker.sock
+      - ./promtail-config.yaml:/etc/promtail/config.yml
+    command: -config.file=/etc/promtail/config.yml
+    restart: unless-stopped
+  grafana:
+    image: grafana/grafana:10.1.0
+    container_name: canifa_grafana
+    ports:
+      - "3000:3000"
+    environment:
+      - GF_SECURITY_ADMIN_PASSWORD=admin # Đổi pass khi lên prod nhé bro
+    depends_on:
+      - loki
+    restart: unless-stopped
  # --- Database Service (Postgres) ---
  postgres_db:

--- a/backend/nginx.conf
+++ b/backend/nginx.conf
+server {
+    listen 80;
+    server_name _; #bot ip server
+    # Log files
+    access_log /var/log/nginx/chatbot_access.log;
+    error_log /var/log/nginx/chatbot_error.log;
+    location /chat {
+        # allow 1.2.3.4; 
+        # deny all;
+        proxy_pass http://127.0.0.1:5000; 
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_read_timeout 300s;
+        proxy_connect_timeout 300s;
+        proxy_send_timeout 300s;
+    }
+    # endpoit for history
+    location /history {
+        # allow 1.2.3.4; 
+        # deny all;
+        proxy_pass http://127.0.0.1:5000;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+    }
+    location / {
+        proxy_pass http://127.0.0.1:8000;
+    }
+}
--- a/backend/promtail-config.yaml
+++ b/backend/promtail-config.yaml
+server:
+  http_listen_port: 9080
+  grpc_listen_port: 0
+positions:
+  filename: /tmp/positions.yaml
+clients:
+  - url: http://loki:3100/loki/api/v1/push
+scrape_configs:
+  - job_name: docker
+    docker_sd_configs:
+      - host: unix:///var/run/docker.sock
+        refresh_interval: 5s
+    relabel_configs:
+      - source_labels: ['__meta_docker_container_name']
+        regex: '/(.*)'
+        target_label: 'container'
--- a/backend/readme.md
+++ b/backend/readme.md
-.\.venv\Scripts\activate
+# CANIFA Chatbot API Documentation
\ No newline at end of file
+API hệ thống chatbot tư vấn thời trang CANIFA - CiCi Assistant.
+## Base URL
+```
+http://localhost:8000
+```
+---
+## API Endpoints
+### 1. Chat với Chatbot
+**Endpoint:** `POST /chat`
+**Mô tả:** Gửi tin nhắn tới chatbot và nhận phản hồi tư vấn thời trang cùng danh sách sản phẩm liên quan.
+#### Request Body
+```json
+{
+  "user_id": "string",
+  "user_query": "string"
+}
+```
+**Parameters:**
+| Field | Type | Required | Mô tả |
+|-------|------|----------|-------|
+| `user_id` | string | ✅ | ID định danh người dùng (dùng để lưu lịch sử chat) |
+| `user_query` | string | ✅ | Nội dung tin nhắn của người dùng |
+**Ví dụ Request:**
+```json
+{
+  "user_id": "user_12345",
+  "user_query": "Cho em xem áo sơ mi nam dưới 500k"
+}
+```
+#### Response
+**Success Response (200 OK):**
+```json
+{
+  "status": "success",
+  "ai_response": "string",
+  "product_ids": ["string"]
+}
+```
+**Response Fields:**
+| Field | Type | Mô tả |
+|-------|------|-------|
+| `status` | string | Trạng thái xử lý request (`"success"` hoặc `"error"`) |
+| `ai_response` | string | Câu trả lời của chatbot (văn bản tư vấn) |
+| `product_ids` | array[string] | Danh sách mã sản phẩm được đề xuất (internal_ref_code) |
+**Ví dụ Response:**
+```json
+{
+  "status": "success",
+  "ai_response": "Em chào anh! Em đã tìm thấy một số mẫu áo sơ mi nam đẹp trong tầm giá dưới 500k:\n\n1. Áo Sơ Mi Nam Cotton - 399.000đ\n2. Áo Sơ Mi Slim Fit - 449.000đ\n\nCác sản phẩm này đều là chất liệu cotton thoáng mát, phù hợp cho mùa hè ạ!",
+  "product_ids": ["SM12345", "SM12346"]
+}
+```
+**Error Response (500 Internal Server Error):**
+```json
+{
+  "status": "error",
+  "ai_response": "Xin lỗi, đã có lỗi xảy ra. Vui lòng thử lại sau.",
+  "product_ids": []
+}
+```
+---
+### 2. Lấy Lịch Sử Chat
+**Endpoint:** `GET /history/{user_id}`
+**Mô tả:** Lấy lịch sử chat của người dùng với phân trang cursor-based.
+#### Path Parameters
+| Parameter | Type | Required | Mô tả |
+|-----------|------|----------|-------|
+| `user_id` | string | ✅ | ID người dùng cần lấy lịch sử |
+#### Query Parameters
+| Parameter | Type | Required | Default | Mô tả |
+|-----------|------|----------|---------|-------|
+| `limit` | integer | ❌ | 50 | Số lượng tin nhắn tối đa mỗi trang (1-100) |
+| `before_id` | integer | ❌ | null | ID của tin nhắn để lấy các tin nhắn trước đó (dùng cho phân trang) |
+**Ví dụ Request:**
+```
+GET /history/user_12345?limit=20&before_id=150
+```
+#### Response
+**Success Response (200 OK):**
+```json
+{
+  "data": [
+    {
+      "id": 149,
+      "user_id": "user_12345",
+      "message": "Cho em xem áo sơ mi nam",
+      "is_human": true,
+      "timestamp": "2025-12-25T14:30:00"
+    },
+    {
+      "id": 148,
+      "user_id": "user_12345",
+      "message": "Em đã tìm thấy một số mẫu áo sơ mi nam đẹp...",
+      "is_human": false,
+      "timestamp": "2025-12-25T14:30:02"
+    }
+  ],
+  "next_cursor": 130
+}
+```
+**Response Fields:**
+| Field | Type | Mô tả |
+|-------|------|-------|
+| `data` | array[object] | Danh sách tin nhắn chat (sắp xếp từ mới → cũ) |
+| `data[].id` | integer | ID duy nhất của tin nhắn |
+| `data[].user_id` | string | ID người dùng |
+| `data[].message` | string | Nội dung tin nhắn |
+| `data[].is_human` | boolean | `true` = tin nhắn của người dùng, `false` = tin nhắn của bot |
+| `data[].timestamp` | string | Thời gian gửi tin nhắn (ISO 8601 format) |
+| `next_cursor` | integer \| null | ID của tin nhắn cuối cùng (dùng làm `before_id` cho request tiếp theo). `null` nếu hết dữ liệu |
+---
+## Phân Trang (Pagination)
+API sử dụng **cursor-based pagination** để lấy lịch sử chat:
+### Cách hoạt động:
+1. **Request đầu tiên** - Lấy 20 tin nhắn mới nhất:
+   ```
+   GET /history/user_12345?limit=20
+   ```
+   Response:
+   ```json
+   {
+     "data": [...], // 20 tin nhắn (ID: 200 → 181)
+     "next_cursor": 181
+   }
+   ```
+2. **Request tiếp theo** - Lấy 20 tin nhắn cũ hơn:
+   ```
+   GET /history/user_12345?limit=20&before_id=181
+   ```
+   Response:
+   ```json
+   {
+     "data": [...], // 20 tin nhắn (ID: 180 → 161)
+     "next_cursor": 161
+   }
+   ```
+3. **Request cuối cùng** - Khi hết dữ liệu:
+   ```json
+   {
+     "data": [...], // 5 tin nhắn còn lại
+     "next_cursor": null
+   }
+   ```
+### Logic phân trang:
+- `next_cursor` luôn là **ID của tin nhắn cuối cùng** trong `data`
+- Dùng `next_cursor` làm `before_id` cho request tiếp theo
+- Khi `next_cursor = null` → đã hết dữ liệu
+---
+## Chat Workflow
+```mermaid
+graph LR
+    A[User gửi message] --> B[POST /chat]
+    B --> C{Agent xử lý}
+    C --> D[Tìm kiếm sản phẩm]
+    C --> E[Trả lời tư vấn]
+    D --> F[Trích xuất product_ids]
+    E --> F
+    F --> G[Response: ai_response + product_ids]
+    G --> H[Lưu vào PostgreSQL]
+    H --> I[Trả về client]
+```
+### Quy trình xử lý:
+1. User gửi tin nhắn qua API `/chat`
+2. Hệ thống agent phân tích intent
+3. Nếu cần tìm sản phẩm → Gọi `data_retrieval_tool` với tham số phù hợp
+4. Agent tổng hợp thông tin → Trả lời tư vấn
+5. Trích xuất `product_ids` từ kết quả tìm kiếm
+6. Lưu lịch sử chat vào PostgreSQL (background task)
+7. Trả về JSON với `ai_response` và `product_ids`
+---
+## Error Handling
+### Error Response Format
+```json
+{
+  "status": "error",
+  "ai_response": "Mô tả lỗi hoặc thông báo fallback",
+  "product_ids": []
+}
+```
+### HTTP Status Codes
+| Code | Ý nghĩa | Khi nào xảy ra |
+|------|---------|----------------|
+| 200 | OK | Request thành công |
+| 400 | Bad Request | Thiếu `user_id` hoặc `user_query` |
+| 500 | Internal Server Error | Lỗi hệ thống (database, LLM, ...) |
+---
+## Ví Dụ Sử Dụng
+### Python
+```python
+import requests
+# Chat với bot
+response = requests.post("http://localhost:8000/chat", json={
+    "user_id": "user_12345",
+    "user_query": "Cho em xem váy đầm dự tiệc dưới 1 triệu"
+})
+data = response.json()
+print(f"Bot: {data['ai_response']}")
+print(f"Sản phẩm: {data['product_ids']}")
+# Lấy lịch sử chat
+history = requests.get("http://localhost:8000/history/user_12345?limit=10")
+messages = history.json()["data"]
+for msg in messages:
+    sender = "User" if msg["is_human"] else "Bot"
+    print(f"{sender}: {msg['message']}")
+```
+### JavaScript (Fetch API)
+```javascript
+// Chat với bot
+const response = await fetch('http://localhost:8000/chat', {
+  method: 'POST',
+  headers: { 'Content-Type': 'application/json' },
+  body: JSON.stringify({
+    user_id: 'user_12345',
+    user_query: 'Cho em xem áo khoác nữ'
+  })
+});
+const data = await response.json();
+console.log('Bot:', data.ai_response);
+console.log('Products:', data.product_ids);
+// Lấy lịch sử chat (phân trang)
+let cursor = null;
+const allMessages = [];
+do {
+  const url = cursor 
+    ? `http://localhost:8000/history/user_12345?limit=50&before_id=${cursor}`
+    : `http://localhost:8000/history/user_12345?limit=50`;
+  const historyResponse = await fetch(url);
+  const { data: messages, next_cursor } = await historyResponse.json();
+  allMessages.push(...messages);
+  cursor = next_cursor;
+} while (cursor !== null);
+console.log(`Tổng số tin nhắn: ${allMessages.length}`);
+```
+### cURL
+```bash
+# Chat với bot
+curl -X POST "http://localhost:8000/chat" \
+  -H "Content-Type: application/json" \
+  -d '{
+    "user_id": "user_12345",
+    "user_query": "Cho em xem giày thể thao nam"
+  }'
+# Lấy lịch sử chat
+curl "http://localhost:8000/history/user_12345?limit=20"
+# Lấy trang tiếp theo
+curl "http://localhost:8000/history/user_12345?limit=20&before_id=150"
+```
+---
+## Notes
+### 1. Product IDs
+- `product_ids` trả về danh sách `internal_ref_code` (mã sản phẩm nội bộ)
+- Frontend có thể dùng để hiển thị carousel sản phẩm hoặc link đến trang chi tiết
+- Nếu không tìm thấy sản phẩm → `product_ids = []`
+### 2. Conversation History
+- Lịch sử chat được lưu tự động sau mỗi cuộc hội thoại (background task)
+- Dữ liệu lưu trong PostgreSQL với index trên `user_id` và `id`
+- Sắp xếp theo thứ tự mới nhất → cũ nhất
+### 3. Rate Limiting
+- Hiện tại chưa có rate limiting
+- Khuyến nghị implement rate limit khi deploy production
+### 4. Authentication
+- Hiện tại API không yêu cầu authentication
+- `user_id` do client tự generate và gửi lên
+- Khuyến nghị: Tích hợp Clerk Auth hoặc JWT token cho production
+---
+## Environment Variables
+```bash
+# PostgreSQL
+POSTGRES_HOST=localhost
+POSTGRES_PORT=5432
+POSTGRES_DB=chatbot_db
+POSTGRES_USER=postgres
+POSTGRES_PASSWORD=your_password
+# OpenAI
+OPENAI_API_KEY=sk-...
+# StarRocks (Vector Database)
+STARROCKS_HOST=localhost
+STARROCKS_PORT=9030
+STARROCKS_USER=root
+STARROCKS_PASSWORD=your_password
+STARROCKS_DB=chatbot_products
+# Server
+PORT=8000
+HOST=0.0.0.0
+```
+---
+## Testing
+Truy cập `http://localhost:8000/static/index.html` để test chatbot qua UI đơn giản.
--- a/backend/static/index.html
+++ b/backend/static/index.html
@@ -402,7 +402,20 @@
                botMsgDiv.className = 'message bot';
                if (data.status === 'success') {
-                    botMsgDiv.innerText = data.response;
+                    // Display AI response
+                    botMsgDiv.innerText = data.ai_response || data.response || 'No response';
+                    // Add product IDs if available
+                    if (data.product_ids && data.product_ids.length > 0) {
+                        const productInfo = document.createElement('div');
+                        productInfo.style.marginTop = '8px';
+                        productInfo.style.fontSize = '0.85em';
+                        productInfo.style.color = '#aaa';
+                        productInfo.style.borderTop = '1px solid #555';
+                        productInfo.style.paddingTop = '8px';
+                        productInfo.innerText = `📦 Products: ${data.product_ids.join(', ')}`;
+                        botMsgDiv.appendChild(productInfo);
+                    }
                } else {
                    botMsgDiv.innerText = "Error: " + (data.message || "Unknown error");
                    botMsgDiv.style.color = 'red';

--- a/backend/test.py
+++ b/backend/test.py
-import dis
-# Code với type annotation:
-def func1():
-    x: int = 5
-    return x
-# Code không có type annotation:
-def func2():
-    x = 5
-    return x
-print("=== Func1 (có type) ===")
-dis.dis(func1)
-print("\n=== Func2 (không type) ===")
-dis.dis(func2)
-# KẾT QUẢ: Bytecode y hệt nhau! Type annotation bị bỏ qua hoàn toàn.
\ No newline at end of file