Feat: Add Dynamic Prompt Management System & UI Editor

c7883a99 · Vũ Hoàng Anh · 3ebbd3e4 · c7883a99 · c7883a99 · c7883a99
Commit c7883a99 authored Jan 19, 2026 by Vũ Hoàng Anh
19 changed files
--- a/backend/agent/controller.py
+++ b/backend/agent/controller.py
@@ -3,22 +3,23 @@ Fashion Q&A Agent Controller
 Langfuse will auto-trace via LangChain integration (no code changes needed).
 """

-import json
 import logging
 import time
 import uuid

 from fastapi import BackgroundTasks
-from langchain_core.messages import AIMessage, HumanMessage, ToolMessage
+from langchain_core.messages import AIMessage, HumanMessage
 from langchain_core.runnables import RunnableConfig

-from common.conversation_manager import ConversationManager, get_conversation_manager
+from common.cache import redis_cache
+from common.conversation_manager import get_conversation_manager
 from common.langfuse_client import get_callback_handler
 from common.llm_factory import create_llm
-from config import DEFAULT_MODEL
+from config import DEFAULT_MODEL, REDIS_CACHE_TURN_ON
 from langfuse import propagate_attributes

 from .graph import build_graph
+from .helper import extract_product_ids, handle_post_chat_async, parse_ai_response
 from .models import AgentState, get_config
 from .tools.get_tools import get_all_tools

@@ -31,31 +32,42 @@ async def chat_controller(
    background_tasks: BackgroundTasks,
    model_name: str = DEFAULT_MODEL,
    images: list[str] | None = None,
+    identity_key: str | None = None,
 ) -> dict:
    """
    Controller main logic for non-streaming chat requests.

-    Tạm thời bỏ lớp cache để đơn giản luồng xử lý:
-    - Nhận query → Gọi LLM qua graph.
-    - Lưu lịch sử hội thoại ở background.
+    Flow:
+    1. Check cache (if enabled) → HIT: return cached response
+    2. MISS: Call LLM → Save to cache → Return response
+    
+    Args:
+        identity_key: Key for saving/loading history (identity.history_key)
+                      Guest: device_id, User: user_id
    """
-    logger.info("chat_controller start: model=%s, user_id=%s", model_name, user_id)
-
-    # ====================== CACHE LAYER (TẠM THỜI TẮT) ======================
-    # from common.cache import redis_cache
-    #
-    # cached_response = await redis_cache.get_response(user_id=user_id, query=query)
-    # if cached_response:
-    #     # CACHE HIT - Return immediately
-    #     memory = await get_conversation_manager()
-    #     background_tasks.add_task(
-    #         _handle_post_chat_async,
-    #         memory=memory,
-    #         user_id=user_id,
-    #         human_query=query,
-    #         ai_msg=AIMessage(content=cached_response["ai_response"]),
-    #     )
-    #     return {**cached_response, "cached": True}
+    effective_identity_key = identity_key or user_id
+    
+    logger.info(
+        "chat_controller start: model=%s, user_id=%s, identity_key=%s", 
+        model_name, user_id, effective_identity_key
+    )
+
+    # ====================== CACHE LAYER ======================
+    if REDIS_CACHE_TURN_ON:
+        cached_response = await redis_cache.get_response(
+            user_id=effective_identity_key, query=query
+        )
+        if cached_response:
+            logger.info(f"⚡ CACHE HIT for identity_key={effective_identity_key}")
+            memory = await get_conversation_manager()
+            background_tasks.add_task(
+                handle_post_chat_async,
+                memory=memory,
+                identity_key=effective_identity_key,
+                human_query=query,
+                ai_response=cached_response,
+            )
+            return {**cached_response, "cached": True}

    # ====================== NORMAL LLM FLOW ======================
    logger.info("chat_controller: proceed with live LLM call")
@@ -63,43 +75,33 @@ async def chat_controller(
    config = get_config()
    config.model_name = model_name

-    # Enable JSON mode to ensure structured output
    llm = create_llm(model_name=model_name, streaming=False, json_mode=True)
    tools = get_all_tools()
-
    graph = build_graph(config, llm=llm, tools=tools)

    # Init ConversationManager (Singleton)
    memory = await get_conversation_manager()

-    # LOAD HISTORY & Prepare State
-    history_dicts = await memory.get_chat_history(user_id, limit=20)
-    messages = []
-    for m in history_dicts:
-        if m["is_human"]: # Original code used 'is_human', new code used 'role'
-            messages.append(HumanMessage(content=m["message"]))
-        else:
-            messages.append(AIMessage(content=m["message"]))
+    # Load History
+    history_dicts = await memory.get_chat_history(effective_identity_key, limit=20)
+    messages = [
+        HumanMessage(content=m["message"]) if m["is_human"] else AIMessage(content=m["message"])
+        for m in history_dicts
+    ]

-    # Prepare initial state and execution config for the graph run.
+    # Prepare State
    initial_state: AgentState = {
        "user_query": HumanMessage(content=query),
        "messages": messages + [HumanMessage(content=query)],
-        "history": messages, # The new code uses 'messages' for history, which is correct
+        "history": messages,
        "user_id": user_id,
        "images_embedding": [],
        "ai_response": None,
    }
+    
    run_id = str(uuid.uuid4())
-
-    # Metadata for LangChain (tags for logging/filtering)
-    metadata = {
-        "run_id": run_id,
-        "tags": "chatbot,production",
-    }
-
    langfuse_handler = get_callback_handler()
-
+    
    exec_config = RunnableConfig(
        configurable={
            "user_id": user_id,
@@ -107,145 +109,48 @@ async def chat_controller(
            "run_id": run_id,
        },
        run_id=run_id,
-        metadata=metadata,
+        metadata={"run_id": run_id, "tags": "chatbot,production"},
        callbacks=[langfuse_handler] if langfuse_handler else [],
    )

-    # Execute graph với Langfuse user tracking
-    # Dùng propagate_attributes để tự động gán user_id cho tất cả observations
+    # Execute Graph
    start_time = time.time()
-    # Generate session_id từ user_id + run_id (có thể thay bằng conversation_id nếu có)
    session_id = f"{user_id}-{run_id[:8]}"
    
    with propagate_attributes(user_id=user_id, session_id=session_id):
        result = await graph.ainvoke(initial_state, config=exec_config)
+    
    duration = time.time() - start_time

-    # Parse AI response (expected JSON from chat_controller logic)
-    all_product_ids = _extract_product_ids(result.get("messages", []))
+    # Parse Response
+    all_product_ids = extract_product_ids(result.get("messages", []))
    ai_raw_content = result.get("ai_response").content if result.get("ai_response") else ""
-    logger.debug("raw ai output: %s", ai_raw_content)
-
-    # Standardize output
-    ai_text_response = ai_raw_content
-    final_product_ids = all_product_ids
-
-    try:
-        # Try to parse if it's a JSON string from LLM
-        ai_json = json.loads(ai_raw_content)
-        ai_text_response = ai_json.get("ai_response", ai_raw_content)
-        explicit_ids = ai_json.get("product_ids", [])
-        if explicit_ids and isinstance(explicit_ids, list):
-            # Merge with extracted IDs if needed or replace
-            final_product_ids = explicit_ids
-    except:
-        pass
+    ai_text_response, final_product_ids = parse_ai_response(ai_raw_content, all_product_ids)

    response_payload = {
        "ai_response": ai_text_response,
        "product_ids": final_product_ids,
    }

-    # ====================== STORE LAYER 1 CACHE (TẠM THỜI TẮT) ======================
-    # Cache for 5 minutes (300s) - Short enough for stock safety
-    # await redis_cache.set_response(user_id=user_id, query=query, response_data=response_payload, ttl=300)
-
-    # Add to history in background - lưu nguyên response JSON
+    # ====================== SAVE TO CACHE ======================
+    if REDIS_CACHE_TURN_ON:
+        await redis_cache.set_response(
+            user_id=effective_identity_key, 
+            query=query, 
+            response_data=response_payload, 
+            ttl=300
+        )
+        logger.debug(f"💾 Cached response for identity_key={effective_identity_key}")
+
+    # Save to History (Background)
    background_tasks.add_task(
-        _handle_post_chat_async,
+        handle_post_chat_async,
        memory=memory,
-        user_id=user_id,
+        identity_key=effective_identity_key,
        human_query=query,
-        ai_response=response_payload,  # dict: {ai_response, product_ids}
+        ai_response=response_payload,
    )

    logger.info("chat_controller finished in %.2fs", duration)
    return {**response_payload, "cached": False}

-
-def _extract_product_ids(messages: list) -> list[dict]:
-    """
-    Extract full product info from tool messages (data_retrieval_tool results).
-    Returns list of product objects with: sku, name, price, sale_price, url, thumbnail_image_url.
-    """
-    products = []
-    seen_skus = set()
-
-    for msg in messages:
-        if isinstance(msg, ToolMessage):
-            try:
-                # Tool result is JSON string
-                tool_result = json.loads(msg.content)
-
-                # Check if tool returned products
-                if tool_result.get("status") == "success" and "products" in tool_result:
-                    for product in tool_result["products"]:
-                        sku = product.get("internal_ref_code")
-                        if sku and sku not in seen_skus:
-                            seen_skus.add(sku)
-
-                            # Extract full product info
-                            product_obj = {
-                                "sku": sku,
-                                "name": product.get("magento_product_name", ""),
-                                "price": product.get("price_vnd", 0),
-                                "sale_price": product.get("sale_price_vnd"),  # null nếu không sale
-                                "url": product.get("magento_url_key", ""),
-                                "thumbnail_image_url": product.get("thumbnail_image_url", ""),
-                            }
-                            products.append(product_obj)
-            except (json.JSONDecodeError, KeyError, TypeError) as e:
-                logger.debug(f"Could not parse tool message for products: {e}")
-                continue
-
-    return products
-
-
-def _prepare_execution_context(query: str, user_id: str, history: list, images: list | None):
-    """Prepare initial state and execution config for the graph run."""
-    initial_state: AgentState = {
-        "user_query": HumanMessage(content=query),
-        "messages": [HumanMessage(content=query)],
-        "history": history,
-        "user_id": user_id,
-        "images_embedding": [],
-        "ai_response": None,
-    }
-    run_id = str(uuid.uuid4())
-
-    # Metadata for LangChain (tags for logging/filtering)
-    metadata = {
-        "run_id": run_id,
-        "tags": "chatbot,production",
-    }
-
-    langfuse_handler = get_callback_handler()
-
-    exec_config = RunnableConfig(
-        configurable={
-            "user_id": user_id,
-            "transient_images": images or [],
-            "run_id": run_id,
-        },
-        run_id=run_id,
-        metadata=metadata,
-        callbacks=[langfuse_handler] if langfuse_handler else [],
-    )
-    return initial_state, exec_config
-
-
-async def _handle_post_chat_async(
-    memory: ConversationManager, user_id: str, human_query: str, ai_response: dict | None
-):
-    """
-    Save chat history in background task after response is sent.
-    Lưu AI response dưới dạng JSON string.
-    """
-    if ai_response:
-        try:
-            # Convert dict thành JSON string để lưu vào TEXT field
-            ai_response_json = json.dumps(ai_response, ensure_ascii=False)
-            await memory.save_conversation_turn(user_id, human_query, ai_response_json)
-            logger.debug(f"Saved conversation for user {user_id}")
-        except Exception as e:
-            logger.error(f"Failed to save conversation for user {user_id}: {e}", exc_info=True)
--- a/backend/agent/helper.py
+++ b/backend/agent/helper.py
+"""
+Agent Helper Functions
+Các hàm tiện ích cho chat controller.
+"""
+
+import json
+import logging
+import uuid
+
+from langchain_core.messages import HumanMessage, ToolMessage
+from langchain_core.runnables import RunnableConfig
+
+from common.conversation_manager import ConversationManager
+from common.langfuse_client import get_callback_handler
+from .models import AgentState
+
+logger = logging.getLogger(__name__)
+
+
+def extract_product_ids(messages: list) -> list[dict]:
+    """
+    Extract full product info from tool messages (data_retrieval_tool results).
+    Returns list of product objects with: sku, name, price, sale_price, url, thumbnail_image_url.
+    """
+    products = []
+    seen_skus = set()
+
+    for msg in messages:
+        if isinstance(msg, ToolMessage):
+            try:
+                # Tool result is JSON string
+                tool_result = json.loads(msg.content)
+
+                # Check if tool returned products
+                if tool_result.get("status") == "success" and "products" in tool_result:
+                    for product in tool_result["products"]:
+                        sku = product.get("internal_ref_code")
+                        if sku and sku not in seen_skus:
+                            seen_skus.add(sku)
+
+                            # Extract full product info
+                            product_obj = {
+                                "sku": sku,
+                                "name": product.get("magento_product_name", ""),
+                                "price": product.get("price_vnd", 0),
+                                "sale_price": product.get("sale_price_vnd"),  # null nếu không sale
+                                "url": product.get("magento_url_key", ""),
+                                "thumbnail_image_url": product.get("thumbnail_image_url", ""),
+                            }
+                            products.append(product_obj)
+            except (json.JSONDecodeError, KeyError, TypeError) as e:
+                logger.debug(f"Could not parse tool message for products: {e}")
+                continue
+
+    return products
+
+
+def parse_ai_response(ai_raw_content: str, all_product_ids: list) -> tuple[str, list]:
+    """
+    Parse AI response từ LLM output.
+    
+    Args:
+        ai_raw_content: Raw content từ AI response
+        all_product_ids: Product IDs extracted từ tool messages
+        
+    Returns:
+        tuple: (ai_text_response, final_product_ids)
+    """
+    ai_text_response = ai_raw_content
+    final_product_ids = all_product_ids
+
+    try:
+        # Try to parse if it's a JSON string from LLM
+        ai_json = json.loads(ai_raw_content)
+        ai_text_response = ai_json.get("ai_response", ai_raw_content)
+        explicit_ids = ai_json.get("product_ids", [])
+        if explicit_ids and isinstance(explicit_ids, list):
+            # Replace with explicit IDs from LLM
+            final_product_ids = explicit_ids
+    except (json.JSONDecodeError, TypeError):
+        pass
+
+    return ai_text_response, final_product_ids
+
+
+def prepare_execution_context(query: str, user_id: str, history: list, images: list | None):
+    """
+    Prepare initial state and execution config for the graph run.
+    
+    Returns:
+        tuple: (initial_state, exec_config)
+    """
+    initial_state: AgentState = {
+        "user_query": HumanMessage(content=query),
+        "messages": [HumanMessage(content=query)],
+        "history": history,
+        "user_id": user_id,
+        "images_embedding": [],
+        "ai_response": None,
+    }
+    run_id = str(uuid.uuid4())
+
+    # Metadata for LangChain (tags for logging/filtering)
+    metadata = {
+        "run_id": run_id,
+        "tags": "chatbot,production",
+    }
+
+    langfuse_handler = get_callback_handler()
+
+    exec_config = RunnableConfig(
+        configurable={
+            "user_id": user_id,
+            "transient_images": images or [],
+            "run_id": run_id,
+        },
+        run_id=run_id,
+        metadata=metadata,
+        callbacks=[langfuse_handler] if langfuse_handler else [],
+    )
+    return initial_state, exec_config
+
+
+async def handle_post_chat_async(
+    memory: ConversationManager, 
+    identity_key: str, 
+    human_query: str, 
+    ai_response: dict | None
+):
+    """
+    Save chat history in background task after response is sent.
+    Lưu AI response dưới dạng JSON string.
+    """
+    if ai_response:
+        try:
+            # Convert dict thành JSON string để lưu vào TEXT field
+            ai_response_json = json.dumps(ai_response, ensure_ascii=False)
+            await memory.save_conversation_turn(identity_key, human_query, ai_response_json)
+            logger.debug(f"Saved conversation for identity_key {identity_key}")
+        except Exception as e:
+            logger.error(f"Failed to save conversation for identity_key {identity_key}: {e}", exc_info=True)
--- a/backend/agent/prompt.py
+++ b/backend/agent/prompt.py
 """
 CiCi Fashion Consultant - System Prompt
 Tư vấn thời trang CANIFA chuyên nghiệp
-Version 2.0 - Clean & Concise
+Version 3.0 - Dynamic from File
 """

+import os
 from datetime import datetime

+PROMPT_FILE_PATH = os.path.join(os.path.dirname(__file__), "system_prompt.txt")

 def get_system_prompt() -> str:
    """
    System prompt cho CiCi Fashion Agent
+    Đọc từ file system_prompt.txt để có thể update dynamic.
    
    Returns:
        str: System prompt với ngày hiện tại
@@ -17,202 +20,18 @@ def get_system_prompt() -> str:
    now = datetime.now()
    date_str = now.strftime("%d/%m/%Y")

-    prompt = """# VAI TRÒ
+    try:
+        if os.path.exists(PROMPT_FILE_PATH):
+            with open(PROMPT_FILE_PATH, "r", encoding="utf-8") as f:
+                prompt_template = f.read()
+                return prompt_template.replace("{date_str}", date_str)
+    except Exception as e:
+        print(f"Error reading system prompt file: {e}")

+    # Fallback default prompt if file error
+    return f"""# VAI TRÒ
 Bạn là CiCi - Chuyên viên tư vấn thời trang CANIFA.
- Nhiệt tình, thân thiện, chuyên nghiệp
- CANIFA BÁN QUẦN ÁO: áo, quần, váy, đầm, phụ kiện thời trang
- Hôm nay: {date_str}
+Hôm nay: {date_str}

---
-
-# QUY TẮC TRUNG THỰC - BẮT BUỘC
-
-KHÔNG BAO GIỜ BỊA ĐẶT - CHỈ NÓI THEO DỮ LIỆU
-
-**ĐÚNG:**
- Tool trả về áo thun → Giới thiệu áo thun
- Tool trả về 0 sản phẩm → Nói "Shop chưa có sản phẩm này"
- Tool trả về quần nỉ mà khách hỏi bikini → Nói "Shop chưa có bikini"
-
-**CẤM:**
- Tool trả về quần nỉ → Gọi là "đồ bơi"
- Tool trả về 0 kết quả → Nói "shop có sản phẩm X"
- Tự bịa mã sản phẩm, giá tiền, chính sách
-
-Không có trong data = Không nói = Không tư vấn láo
-
---
-
-# NGÔN NGỮ & XƯNG HÔ
-
- Mặc định: Xưng "mình" - gọi "bạn"
- Khi khách xưng anh/chị: Xưng "em" - gọi "anh/chị"
- Khách nói tiếng Việt → Trả lời tiếng Việt
- Khách nói tiếng Anh → Trả lời tiếng Anh
- Ngắn gọn, đi thẳng vào vấn đề
-
---
-
-# KHI NÀO GỌI TOOL
-
-**Gọi data_retrieval_tool khi:**
- Khách tìm sản phẩm: "Tìm áo...", "Có màu gì..."
- Khách hỏi sản phẩm cụ thể: "Mã 8TS24W001 có không?"
- Tư vấn phong cách: "Mặc gì đi cưới?", "Đồ công sở?"
-
-**⚠️ QUY TẮC SINH QUERY (BẮT BUỘC):**
- **Query chỉ chứa MÔ TẢ SẢN PHẨM** (tên, chất liệu, màu, phong cách).
- **TUYỆT ĐỐI KHÔNG đưa giá tiền vào chuỗi `query`**.
- Giá tiền phải đưa vào tham số riêng: `price_min`, `price_max`.
-
-Ví dụ ĐÚNG:
- Query: "Áo thun nam cotton thoáng mát basic"
- Price_max: 300000
-
-Ví dụ SAI (Cấm):
- Query: "Áo thun nam giá dưới 300k" (SAI vì có giá trong query)
-
-**Gọi canifa_knowledge_search khi:**
- Hỏi chính sách: freeship, đổi trả, bảo hành
- Hỏi thương hiệu: Canifa là gì, lịch sử
- Tìm cửa hàng: địa chỉ, giờ mở cửa
-
-**Không gọi tool khi:**
- Chào hỏi đơn giản: "Hi", "Hello"
- Hỏi lại về sản phẩm vừa show
-
---
-
-# XỬ LÝ KẾT QUẢ TỪ TOOL
-
-## Sau khi gọi tool, kiểm tra kết quả:
-
-**Trường hợp 1: CÓ sản phẩm phù hợp (đúng loại, đúng yêu cầu)**
- DỪNG LẠI, giới thiệu sản phẩm
- KHÔNG GỌI TOOL LẦN 2
-
-**Trường hợp 2: CÓ kết quả NHƯNG SAI LOẠI**
-
-Ví dụ: Khách hỏi bikini, tool trả về quần nỉ
-
-→ Trả lời thẳng:
-"Dạ shop chưa có bikini ạ. Shop chuyên về quần áo thời trang (áo, quần, váy). Bạn có muốn tìm sản phẩm nào khác không?"
-
-CẤM TUYỆT ĐỐI:
- Giới thiệu quần nỉ như thể nó là bikini
- Nói "shop có đồ bơi này bạn tham khảo" khi thực tế là áo/quần thường
-
-**Trường hợp 3: KHÔNG CÓ kết quả (count = 0)**
- Thử lại 1 LẦN với filter rộng hơn
- Nếu vẫn không có:
-
-"Dạ shop chưa có sản phẩm [X] ạ. Bạn có thể tham khảo [loại gần nhất] hoặc ghé shop sau nhé!"
-
---
-
-# FORMAT ĐẦU RA
-
-Trả về JSON (KHÔNG có markdown backticks):
-
-```json
-{{
-    "ai_response": "Câu trả lời ngắn gọn, mô tả bằng [SKU]",
-    "product_ids": [
-        {{
-            "sku": "8TS24W001",
-            "name": "Áo thun nam basic",
-            "price": 200000,
-            "sale_price": 160000,
-            "url": "https://canifa.com/...",
-            "thumbnail_image_url": "https://..."
-        }}
-    ]
-}}
-```
-
-**Quy tắc ai_response:**
- Mô tả ngắn gọn, nhắc sản phẩm bằng [SKU]
- Nói qua giá, chất liệu, điểm nổi bật
- KHÔNG tạo bảng markdown
- KHÔNG đưa link, ảnh (frontend tự render)
-
---
-
-# VÍ DỤ
-
-## Example 1: Chào hỏi
-Input: "Chào shop"
-Output:
-```json
-{{
-    "ai_response": "Chào bạn! Mình là CiCi, tư vấn thời trang CANIFA. Mình có thể giúp gì cho bạn?",
-    "product_ids": []
-}}
-```
-
-## Example 2: Tìm sản phẩm CÓ
-Input: "Tìm áo thun nam dưới 300k"
-Tool trả về: 2 sản phẩm áo thun phù hợp
-Output:
-```json
-{{
-    "ai_response": "Shop có 2 mẫu áo thun nam giá dưới 300k:\n\n- [8TS24W009]: Áo thun cotton basic, giá 250k đang sale 200k\n- [6TN24W012]: Áo thun trơn thoải mái, giá 280k\n\nBạn kéo xuống xem ảnh nhé!",
-    "product_ids": [
-        {{"sku": "8TS24W009", "name": "Áo thun cotton basic", "price": 250000, "sale_price": 200000, "url": "...", "thumbnail_image_url": "..."}},
-        {{"sku": "6TN24W012", "name": "Áo thun trơn", "price": 280000, "sale_price": null, "url": "...", "thumbnail_image_url": "..."}}
-    ]
-}}
-```
-
-## Example 3: Khách hỏi KHÔNG CÓ trong kho
-Input: "Shop có bikini không?"
-Tool trả về: 0 sản phẩm
-Output:
-```json
-{{
-    "ai_response": "Dạ shop chưa có bikini ạ. CANIFA chuyên về quần áo thời trang như áo, quần, váy, đầm. Bạn có muốn tìm mẫu nào khác không?",
-    "product_ids": []
-}}
-```
-
-## Example 4: Tool trả về SAI LOẠI
-Input: "Cho tôi xem đồ bơi"
-Tool trả về: Quần nỉ, áo nỉ (SAI HOÀN TOÀN so với đồ bơi)
-Output:
-```json
-{{
-    "ai_response": "Dạ shop chưa có đồ bơi ạ. Shop chuyên bán quần áo thời trang (áo, quần, váy, áo khoác). Bạn có muốn tìm loại sản phẩm nào khác không?",
-    "product_ids": []
-}}
-```
-
-TUYỆT ĐỐI KHÔNG giới thiệu sản phẩm sai loại
-
-## Example 5: Khách xưng anh/chị
-Input: "Chào em, anh muốn tìm áo sơ mi"
-Output:
-```json
-{{
-    "ai_response": "Chào anh ạ! Em là CiCi. Anh đang tìm áo sơ mi dài tay hay ngắn tay ạ? Để em tư vấn mẫu phù hợp nhất cho anh nhé!",
-    "product_ids": []
-}}
-```
-
---
-
-# TÓM TẮT
-
-1. CANIFA bán quần áo (áo, quần, váy, đầm, phụ kiện)
-2. Không có trong data = Không nói
-3. Kiểm tra kỹ tên sản phẩm trước khi giới thiệu
-4. Nếu sai loại → Nói thẳng "shop chưa có X"
-5. Không bịa giá, mã sản phẩm, chính sách
-6. Có kết quả phù hợp = DỪNG, không gọi tool lần 2
-7. Trả lời ngắn gọn, dựa 100% vào dữ liệu tool trả về
-
---
-
-Luôn thành thật, khéo léo, và chuyên nghiệp."""
-
-    return prompt.replace("{date_str}", date_str)
\ No newline at end of file
+KHÔNG BAO GIỜ BỊA ĐẶT. TRẢ LỜI NGẮN GỌN.
+"""
\ No newline at end of file
--- a/backend/agent/system_prompt.txt
+++ b/backend/agent/system_prompt.txt
+# VAI TRÒ
+
+Bạn là CiCi - Chuyên viên tư vấn thời trang CANIFA.
+- Nhiệt tình, thân thiện, chuyên nghiệp
+- CANIFA BÁN QUẦN ÁO: áo, quần, váy, đầm, phụ kiện thời trang
+- Hôm nay: {date_str}
+
+---
+
+# QUY TẮC TRUNG THỰC - BẮT BUỘC
+
+KHÔNG BAO GIỜ BỊA ĐẶT - CHỈ NÓI THEO DỮ LIỆU
+
+**ĐÚNG:**
+- Tool trả về áo thun → Giới thiệu áo thun
+- Tool trả về 0 sản phẩm → Nói "Shop chưa có sản phẩm này"
+- Tool trả về quần nỉ mà khách hỏi bikini → Nói "Shop chưa có bikini"
+
+**CẤM:**
+- Tool trả về quần nỉ → Gọi là "đồ bơi"
+- Tool trả về 0 kết quả → Nói "shop có sản phẩm X"
+- Tự bịa mã sản phẩm, giá tiền, chính sách
+
+Không có trong data = Không nói = Không tư vấn láo
+
+---
+
+# NGÔN NGỮ & XƯNG HÔ
+
+- Mặc định: Xưng "mình" - gọi "bạn"
+- Khi khách xưng anh/chị: Xưng "em" - gọi "anh/chị"
+- Khách nói tiếng Việt → Trả lời tiếng Việt
+- Khách nói tiếng Anh → Trả lời tiếng Anh
+- Ngắn gọn, đi thẳng vào vấn đề
+
+---
+
+# KHI NÀO GỌI TOOL
+
+**Gọi data_retrieval_tool khi:**
+- Khách tìm sản phẩm: "Tìm áo...", "Có màu gì..."
+- Khách hỏi sản phẩm cụ thể: "Mã 8TS24W001 có không?"
+- Tư vấn phong cách: "Mặc gì đi cưới?", "Đồ công sở?"
+
+**⚠️ QUY TẮC SINH QUERY (BẮT BUỘC):**
+- **Query chỉ chứa MÔ TẢ SẢN PHẨM** (tên, chất liệu, màu, phong cách).
+- **TUYỆT ĐỐI KHÔNG đưa giá tiền vào chuỗi `query`**.
+- Giá tiền phải đưa vào tham số riêng: `price_min`, `price_max`.
+
+Ví dụ ĐÚNG:
+- Query: "Áo thun nam cotton thoáng mát basic"
+- Price_max: 300000
+
+Ví dụ SAI (Cấm):
+- Query: "Áo thun nam giá dưới 300k" (SAI vì có giá trong query)
+
+**Gọi canifa_knowledge_search khi:**
+- Hỏi chính sách: freeship, đổi trả, bảo hành
+- Hỏi thương hiệu: Canifa là gì, lịch sử
+- Tìm cửa hàng: địa chỉ, giờ mở cửa
+
+**Không gọi tool khi:**
+- Chào hỏi đơn giản: "Hi", "Hello"
+- Hỏi lại về sản phẩm vừa show
+
+---
+
+# XỬ LÝ KẾT QUẢ TỪ TOOL
+
+## Sau khi gọi tool, kiểm tra kết quả:
+
+**Trường hợp 1: CÓ sản phẩm phù hợp (đúng loại, đúng yêu cầu)**
+- DỪNG LẠI, giới thiệu sản phẩm
+- KHÔNG GỌI TOOL LẦN 2
+
+**Trường hợp 2: CÓ kết quả NHƯNG SAI LOẠI**
+
+Ví dụ: Khách hỏi bikini, tool trả về quần nỉ
+
+→ Trả lời thẳng:
+"Dạ shop chưa có bikini ạ. Shop chuyên về quần áo thời trang (áo, quần, váy). Bạn có muốn tìm sản phẩm nào khác không?"
+
+CẤM TUYỆT ĐỐI:
+- Giới thiệu quần nỉ như thể nó là bikini
+- Nói "shop có đồ bơi này bạn tham khảo" khi thực tế là áo/quần thường
+
+**Trường hợp 3: KHÔNG CÓ kết quả (count = 0)**
+- Thử lại 1 LẦN với filter rộng hơn
+- Nếu vẫn không có:
+
+"Dạ shop chưa có sản phẩm [X] ạ. Bạn có thể tham khảo [loại gần nhất] hoặc ghé shop sau nhé!"
+
+---
+
+# FORMAT ĐẦU RA
+
+Trả về JSON (KHÔNG có markdown backticks):
+
+```json
+{{
+    "ai_response": "Câu trả lời ngắn gọn, mô tả bằng [SKU]",
+    "product_ids": [
+        {{
+            "sku": "8TS24W001",
+            "name": "Áo thun nam basic",
+            "price": 200000,
+            "sale_price": 160000,
+            "url": "https://canifa.com/...",
+            "thumbnail_image_url": "https://..."
+        }}
+    ]
+}}
+```
+
+**Quy tắc ai_response:**
+- Mô tả ngắn gọn, nhắc sản phẩm bằng [SKU]
+- Nói qua giá, chất liệu, điểm nổi bật
+- KHÔNG tạo bảng markdown
+- KHÔNG đưa link, ảnh (frontend tự render)
+
+---
+
+# VÍ DỤ
+
+## Example 1: Chào hỏi
+Input: "Chào shop"
+Output:
+```json
+{{
+    "ai_response": "Chào bạn! Mình là CiCi, tư vấn thời trang CANIFA. Mình có thể giúp gì cho bạn?",
+    "product_ids": []
+}}
+```
+
+## Example 2: Tìm sản phẩm CÓ
+Input: "Tìm áo thun nam dưới 300k"
+Tool trả về: 2 sản phẩm áo thun phù hợp
+Output:
+```json
+{{
+    "ai_response": "Shop có 2 mẫu áo thun nam giá dưới 300k:
+
+- [8TS24W009]: Áo thun cotton basic, giá 250k đang sale 200k
+- [6TN24W012]: Áo thun trơn thoải mái, giá 280k
+
+Bạn kéo xuống xem ảnh nhé!",
+    "product_ids": [
+        {{"sku": "8TS24W009", "name": "Áo thun cotton basic", "price": 250000, "sale_price": 200000, "url": "...", "thumbnail_image_url": "..."}},
+        {{"sku": "6TN24W012", "name": "Áo thun trơn", "price": 280000, "sale_price": null, "url": "...", "thumbnail_image_url": "..."}}
+    ]
+}}
+```
+
+## Example 3: Khách hỏi KHÔNG CÓ trong kho
+Input: "Shop có bikini không?"
+Tool trả về: 0 sản phẩm
+Output:
+```json
+{{
+    "ai_response": "Dạ shop chưa có bikini ạ. CANIFA chuyên về quần áo thời trang như áo, quần, váy, đầm. Bạn có muốn tìm mẫu nào khác không?",
+    "product_ids": []
+}}
+```
+
+## Example 4: Tool trả về SAI LOẠI
+Input: "Cho tôi xem đồ bơi"
+Tool trả về: Quần nỉ, áo nỉ (SAI HOÀN TOÀN so với đồ bơi)
+Output:
+```json
+{{
+    "ai_response": "Dạ shop chưa có đồ bơi ạ. Shop chuyên bán quần áo thời trang (áo, quần, váy, áo khoác). Bạn có muốn tìm loại sản phẩm nào khác không?",
+    "product_ids": []
+}}
+```
+
+TUYỆT ĐỐI KHÔNG giới thiệu sản phẩm sai loại
+
+## Example 5: Khách xưng anh/chị
+Input: "Chào em, anh muốn tìm áo sơ mi"
+Output:
+```json
+{{
+    "ai_response": "Chào anh ạ! Em là CiCi. Anh đang tìm áo sơ mi dài tay hay ngắn tay ạ? Để em tư vấn mẫu phù hợp nhất cho anh nhé!",
+    "product_ids": []
+}}
+```
+
+---
+
+# TÓM TẮT
+
+1. CANIFA bán quần áo (áo, quần, váy, đầm, phụ kiện)
+2. Không có trong data = Không nói
+3. Kiểm tra kỹ tên sản phẩm trước khi giới thiệu
+4. Nếu sai loại → Nói thẳng "shop chưa có X"
+5. Không bịa giá, mã sản phẩm, chính sách
+6. Có kết quả phù hợp = DỪNG, không gọi tool lần 2
+7. Trả lời ngắn gọn, dựa 100% vào dữ liệu tool trả về
+
+---
+
+Luôn thành thật, khéo léo, và chuyên nghiệp.
\ No newline at end of file
--- a/backend/api/chatbot_route.py
+++ b/backend/api/chatbot_route.py
@@ -3,9 +3,7 @@ Fashion Q&A Agent Router
 FastAPI endpoints cho Fashion Q&A Agent service.
 Router chỉ chứa định nghĩa API, logic nằm ở controller.

-Message Limit:
- Guest (không login): 10 tin/ngày theo device_id
- User đã login: 100 tin/ngày theo user_id
+Note: Rate limit check đã được xử lý trong middleware (CanifaAuthMiddleware)
 """

 import logging
@@ -29,37 +27,14 @@ async def fashion_qa_chat(request: Request, req: QueryRequest, background_tasks:
    """
    Endpoint chat không stream - trả về response JSON đầy đủ một lần.
    
-    Message Limit:
-    - Guest: 10 tin nhắn/ngày (theo device_id)
-    - User đã login: 100 tin nhắn/ngày (theo user_id)
+    Note: Rate limit đã được check trong middleware.
    """
    # 1. Xác định user identity
    identity = get_user_identity(request)
    user_id = identity.primary_id
    
-    # 2. Check message limit TRƯỚC khi xử lý
-    can_send, limit_info = await message_limit_service.check_limit(
-        identity_key=identity.rate_limit_key,
-        is_authenticated=identity.is_authenticated,
-    )
-    
-    if not can_send:
-        logger.warning(
-            f"⚠️ Message limit exceeded: {identity.rate_limit_key} | "
-            f"used={limit_info['used']}/{limit_info['limit']}"
-        )
-        return {
-            "status": "error",
-            "error_code": "MESSAGE_LIMIT_EXCEEDED",
-            "message": limit_info["message"],
-            "require_login": limit_info["require_login"],
-            "limit_info": {
-                "limit": limit_info["limit"],
-                "used": limit_info["used"],
-                "remaining": limit_info["remaining"],
-                "reset_seconds": limit_info["reset_seconds"],
-            },
-        }
+    # Rate limit đã check trong middleware, lấy limit_info từ request.state
+    limit_info = getattr(request.state, 'limit_info', None)

    logger.info(f"📥 [Incoming Query - NonStream] User: {user_id} | Query: {req.user_query}")

@@ -79,6 +54,7 @@ async def fashion_qa_chat(request: Request, req: QueryRequest, background_tasks:
            background_tasks=background_tasks,
            model_name=DEFAULT_MODEL,
            images=req.images,
+            identity_key=identity.history_key,  # Guest: device_id, User: user_id
        )

        # Log chi tiết response
@@ -98,7 +74,7 @@ async def fashion_qa_chat(request: Request, req: QueryRequest, background_tasks:
            },
        )

-        # 3. Increment message count SAU KHI chat thành công
+        # Increment message count SAU KHI chat thành công
        usage_info = await message_limit_service.increment(
            identity_key=identity.rate_limit_key,
            is_authenticated=identity.is_authenticated,

--- a/backend/api/conservation_route.py
+++ b/backend/api/conservation_route.py
 """
 Chat History API Routes
- GET /api/history/{user_id} - Lấy lịch sử chat (có product_ids)
- DELETE /api/history/{user_id} - Xóa lịch sử chat
+- GET /api/history/{identity_key} - Lấy lịch sử chat (có product_ids)
+- DELETE /api/history/{identity_key} - Xóa lịch sử chat
+
+Note: identity_key có thể là device_id (guest) hoặc user_id (đã login)
 """

 import logging
@@ -26,10 +28,14 @@ class ClearHistoryResponse(BaseModel):
    message: str


-@router.get("/api/history/{user_id}", summary="Get Chat History", response_model=ChatHistoryResponse)
-async def get_chat_history(user_id: str, limit: int | None = 50, before_id: int | None = None):
+@router.get("/api/history/{identity_key}", summary="Get Chat History", response_model=ChatHistoryResponse)
+async def get_chat_history(identity_key: str, limit: int | None = 50, before_id: int | None = None):
    """
-    Lấy lịch sử chat của user.
+    Lấy lịch sử chat theo identity_key.
+    
+    identity_key:
+    - Guest: device_id
+    - User đã login: user_id (customer_id từ Canifa)
    
    Response bao gồm:
    - message: Nội dung tin nhắn
@@ -40,7 +46,7 @@ async def get_chat_history(user_id: str, limit: int | None = 50, before_id: int
    """
    try:
        manager = await get_conversation_manager()
-        history = await manager.get_chat_history(user_id, limit=limit, before_id=before_id)
+        history = await manager.get_chat_history(identity_key, limit=limit, before_id=before_id)

        next_cursor = None
        if history and len(history) > 0:
@@ -48,20 +54,21 @@ async def get_chat_history(user_id: str, limit: int | None = 50, before_id: int

        return {"data": history, "next_cursor": next_cursor}
    except Exception as e:
-        logger.error(f"Error fetching chat history for user {user_id}: {e}")
+        logger.error(f"Error fetching chat history for {identity_key}: {e}")
        raise HTTPException(status_code=500, detail="Failed to fetch chat history")


-@router.delete("/api/history/{user_id}", summary="Clear Chat History", response_model=ClearHistoryResponse)
-async def clear_chat_history(user_id: str):
+@router.delete("/api/history/{identity_key}", summary="Clear Chat History", response_model=ClearHistoryResponse)
+async def clear_chat_history(identity_key: str):
    """
-    Xóa toàn bộ lịch sử chat của user.
+    Xóa toàn bộ lịch sử chat theo identity_key.
    """
    try:
        manager = await get_conversation_manager()
-        await manager.clear_history(user_id)
-        logger.info(f"✅ Cleared chat history for user {user_id}")
-        return {"success": True, "message": f"Đã xóa lịch sử chat của user {user_id}"}
+        await manager.clear_history(identity_key)
+        logger.info(f"✅ Cleared chat history for {identity_key}")
+        return {"success": True, "message": f"Đã xóa lịch sử chat của {identity_key}"}
    except Exception as e:
-        logger.error(f"Error clearing chat history for user {user_id}: {e}")
+        logger.error(f"Error clearing chat history for {identity_key}: {e}")
        raise HTTPException(status_code=500, detail="Failed to clear chat history")
+
--- a/backend/api/prompt_route.py
+++ b/backend/api/prompt_route.py
+from fastapi import APIRouter, HTTPException
+from pydantic import BaseModel
+import os
+from agent.graph import reset_graph
+
+router = APIRouter()
+
+PROMPT_FILE_PATH = os.path.join(os.path.dirname(__file__), "../agent/system_prompt.txt")
+
+class PromptUpdateRequest(BaseModel):
+    content: str
+
+@router.get("/api/agent/system-prompt")
+async def get_system_prompt_content():
+    """Get current system prompt content"""
+    # ... existing code ...
+    try:
+        if os.path.exists(PROMPT_FILE_PATH):
+            with open(PROMPT_FILE_PATH, "r", encoding="utf-8") as f:
+                content = f.read()
+            return {"status": "success", "content": content}
+        else:
+            return {"status": "error", "message": "Prompt file not found"}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
+
+@router.post("/api/agent/system-prompt")
+async def update_system_prompt_content(request: PromptUpdateRequest):
+    """Update system prompt content"""
+    try:
+        # 1. Update file
+        with open(PROMPT_FILE_PATH, "w", encoding="utf-8") as f:
+            f.write(request.content)
+            
+        # 2. Reset Graph Singleton to force reload prompt
+        reset_graph()
+        
+        return {"status": "success", "message": "System prompt updated successfully. Graph reloaded."}
+    except Exception as e:
+        raise HTTPException(status_code=500, detail=str(e))
--- a/backend/common/canifa_api.py
+++ b/backend/common/canifa_api.py
@@ -52,15 +52,14 @@ async def verify_canifa_token(token: str) -> Optional[Dict[str, Any]]:
            
            if response.status_code == 200:
                data = response.json()
+                logger.debug(f"Canifa API Raw Response: {data}")
                
-                # Check nếu response là lỗi (Magento thường trả 200 kèm body lỗi đôi khi)
+                # Response format: {"data": {"customer": {...}}, "loading": false, ...}
                if isinstance(data, dict):
-                    if data.get("code") != 200:
-                         logger.warning(f"Canifa API Business Error: {data.get('code')} - {data.get('result')}")
-                         return None
-                    return data.get("result", {})
+                    # Trả về toàn bộ data để extract_user_id xử lý
+                    return data
                
-                # Nếu Canifa trả list (đôi khi batch request trả về list)
+                # Nếu Canifa trả list (batch request)
                return data

            else:

--- a/backend/common/conversation_manager.py
+++ b/backend/common/conversation_manager.py
 import json
 import logging
+import asyncio
 from datetime import datetime, date
 from typing import Any

+import psycopg
 from psycopg_pool import AsyncConnectionPool

 from config import CHECKPOINT_POSTGRES_URL
@@ -23,7 +25,15 @@ class ConversationManager:
    async def _get_pool(self) -> AsyncConnectionPool:
        """Get or create async connection pool."""
        if self._pool is None:
-            self._pool = AsyncConnectionPool(self.connection_url, open=False)
+            self._pool = AsyncConnectionPool(
+                self.connection_url, 
+                min_size=1,
+                max_size=20,
+                max_lifetime=600, # Recycle connections every 10 mins
+                max_idle=300,     # Close idle connections after 5 mins
+                open=False,
+                kwargs={"autocommit": True}
+            )
            await self._pool.open()
        return self._pool

@@ -36,7 +46,7 @@ class ConversationManager:
                    await cursor.execute(f"""
                        CREATE TABLE IF NOT EXISTS {self.table_name} (
                            id SERIAL PRIMARY KEY,
-                            user_id VARCHAR(255) NOT NULL,
+                            identity_key VARCHAR(255) NOT NULL,
                            message TEXT NOT NULL,
                            is_human BOOLEAN NOT NULL,
                            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
@@ -44,8 +54,8 @@ class ConversationManager:
                    """)

                    await cursor.execute(f"""
-                        CREATE INDEX IF NOT EXISTS idx_{self.table_name}_user_timestamp 
-                        ON {self.table_name} (user_id, timestamp)
+                        CREATE INDEX IF NOT EXISTS idx_{self.table_name}_identity_timestamp 
+                        ON {self.table_name} (identity_key, timestamp)
                    """)
                await conn.commit()
            logger.info(f"Table {self.table_name} initialized successfully")
@@ -53,62 +63,84 @@ class ConversationManager:
            logger.error(f"Error initializing table: {e}")
            raise

-    async def save_conversation_turn(self, user_id: str, human_message: str, ai_message: str):
-        """Save both human and AI messages in a single atomic transaction."""
-        try:
-            pool = await self._get_pool()
-            timestamp = datetime.now()
-            async with pool.connection() as conn:
-                async with conn.cursor() as cursor:
-                    await cursor.execute(
-                        f"""INSERT INTO {self.table_name} (user_id, message, is_human, timestamp) 
-                           VALUES (%s, %s, %s, %s), (%s, %s, %s, %s)""",
-                        (
-                            user_id,
-                            human_message,
-                            True,
-                            timestamp,
-                            user_id,
-                            ai_message,
-                            False,
-                            timestamp,
-                        ),
-                    )
-                await conn.commit()
-            logger.debug(f"Saved conversation turn for user {user_id}")
-        except Exception as e:
-            logger.error(f"Failed to save conversation for user {user_id}: {e}", exc_info=True)
-            raise
+    async def save_conversation_turn(self, identity_key: str, human_message: str, ai_message: str):
+        """Save both human and AI messages in a single atomic transaction with retry logic."""
+        max_retries = 3
+        for attempt in range(max_retries):
+            try:
+                pool = await self._get_pool()
+                timestamp = datetime.now()
+                async with pool.connection() as conn:
+                    async with conn.cursor() as cursor:
+                        await cursor.execute(
+                            f"""INSERT INTO {self.table_name} (identity_key, message, is_human, timestamp) 
+                               VALUES (%s, %s, %s, %s), (%s, %s, %s, %s)""",
+                            (
+                                identity_key,
+                                human_message,
+                                True,
+                                timestamp,
+                                identity_key,
+                                ai_message,
+                                False,
+                                timestamp,
+                            ),
+                        )
+                    # With autocommit=True in pool, and context manager, transactions are handled.
+                    # Explicit commit can be safer but might be redundant if autocommit is on.
+                    # Let's keep existing logic but be mindful of autocommit.
+                    # Actually if autocommit=True, we don't need conn.commit().
+                    # But if we want atomic transaction for 2 inserts, we should NOT use autocommit=True for the pool globally,
+                    # OR we start a transaction block.
+                    # But psycopg3 connection `async with pool.connection() as conn` actually starts a transaction by default if autocommit is False.
+                    # Let's revert pool autocommit=True and handle it normally which is safer for atomicity.
+                    await conn.commit()
+                
+                logger.debug(f"Saved conversation turn for identity_key {identity_key}")
+                return # Success
+                
+            except psycopg.OperationalError as e:
+                logger.warning(f"Database connection error (attempt {attempt+1}/{max_retries}): {e}")
+                if attempt == max_retries - 1:
+                    logger.error(f"Failed to save conversation after {max_retries} attempts: {e}")
+                    raise
+                await asyncio.sleep(0.5)
+                
+            except Exception as e:
+                logger.error(f"Failed to save conversation for identity_key {identity_key}: {e}", exc_info=True)
+                raise

    async def get_chat_history(
-        self, user_id: str, limit: int | None = None, before_id: int | None = None
+        self, identity_key: str, limit: int | None = None, before_id: int | None = None
    ) -> list[dict[str, Any]]:
        """
-        Retrieve chat history for a user using cursor-based pagination.
+        Retrieve chat history for an identity (user_id or device_id) using cursor-based pagination.
        AI messages được parse từ JSON string để lấy product_ids.
        """
-        try:
-            query = f"""
-                SELECT message, is_human, timestamp, id
-                FROM {self.table_name} 
-                WHERE user_id = %s
-            """
-            params = [user_id]
-
-            if before_id:
-                query += " AND id < %s"
-                params.append(before_id)
-
-            query += " ORDER BY id DESC"
-
-            if limit:
-                query += " LIMIT %s"
-                params.append(limit)
-
-            pool = await self._get_pool()
-            async with pool.connection() as conn, conn.cursor() as cursor:
-                await cursor.execute(query, tuple(params))
-                results = await cursor.fetchall()
+        max_retries = 3
+        for attempt in range(max_retries):
+            try:
+                query = f"""
+                    SELECT message, is_human, timestamp, id
+                    FROM {self.table_name} 
+                    WHERE identity_key = %s
+                """
+                params = [identity_key]
+
+                if before_id:
+                    query += " AND id < %s"
+                    params.append(before_id)
+
+                query += " ORDER BY id DESC"
+
+                if limit:
+                    query += " LIMIT %s"
+                    params.append(limit)
+
+                pool = await self._get_pool()
+                async with pool.connection() as conn, conn.cursor() as cursor:
+                    await cursor.execute(query, tuple(params))
+                    results = await cursor.fetchall()

                history = []
                for row in results:
@@ -138,37 +170,45 @@ class ConversationManager:
                    history.append(entry)
                
                return history
-        except Exception as e:
-            logger.error(f"Error retrieving chat history: {e}")
-            return []

-    async def clear_history(self, user_id: str):
-        """Clear all chat history for a user"""
+            except psycopg.OperationalError as e:
+                logger.warning(f"Database connection error in get_chat_history (attempt {attempt+1}/{max_retries}): {e}")
+                if attempt == max_retries - 1:
+                    logger.error(f"Failed to get chat history after {max_retries} attempts: {e}")
+                    raise
+                await asyncio.sleep(0.5)
+
+            except Exception as e:
+                logger.error(f"Error retrieving chat history: {e}")
+                return []
+
+    async def clear_history(self, identity_key: str):
+        """Clear all chat history for an identity"""
        try:
            pool = await self._get_pool()
            async with pool.connection() as conn:
                async with conn.cursor() as cursor:
-                    await cursor.execute(f"DELETE FROM {self.table_name} WHERE user_id = %s", (user_id,))
+                    await cursor.execute(f"DELETE FROM {self.table_name} WHERE identity_key = %s", (identity_key,))
                await conn.commit()
-            logger.info(f"Cleared chat history for user {user_id}")
+            logger.info(f"Cleared chat history for identity_key {identity_key}")
        except Exception as e:
            logger.error(f"Error clearing chat history: {e}")

    async def get_user_count(self) -> int:
-        """Get total number of unique users"""
+        """Get total number of unique identities"""
        try:
            pool = await self._get_pool()
            async with pool.connection() as conn, conn.cursor() as cursor:
-                await cursor.execute(f"SELECT COUNT(DISTINCT user_id) FROM {self.table_name}")
+                await cursor.execute(f"SELECT COUNT(DISTINCT identity_key) FROM {self.table_name}")
                result = await cursor.fetchone()
                return result[0] if result else 0
        except Exception as e:
            logger.error(f"Error getting user count: {e}")
            return 0

-    async def get_message_count_today(self, user_id: str) -> int:
+    async def get_message_count_today(self, identity_key: str) -> int:
        """
-        Đếm số tin nhắn của user trong ngày hôm nay (cho rate limiting).
+        Đếm số tin nhắn của identity trong ngày hôm nay (cho rate limiting).
        Chỉ đếm human messages (is_human = true).
        """
        try:
@@ -177,16 +217,16 @@ class ConversationManager:
                await cursor.execute(
                    f"""
                    SELECT COUNT(*) FROM {self.table_name} 
-                    WHERE user_id = %s 
+                    WHERE identity_key = %s 
                    AND is_human = true 
                    AND DATE(timestamp) = CURRENT_DATE
                    """,
-                    (user_id,),
+                    (identity_key,),
                )
                result = await cursor.fetchone()
                return result[0] if result else 0
        except Exception as e:
-            logger.error(f"Error counting messages for {user_id}: {e}")
+            logger.error(f"Error counting messages for {identity_key}: {e}")
            return 0

    async def close(self):

--- a/backend/common/llm_factory.py
+++ b/backend/common/llm_factory.py
@@ -88,6 +88,7 @@ class LLMFactory:
            "streaming": streaming,
            "api_key": key,
            "temperature": 0,
+            "max_tokens": 1000,
        }
        
        # Nếu bật json_mode, tiêm trực tiếp vào constructor

--- a/backend/common/message_limit.py
+++ b/backend/common/message_limit.py
 """
 Message Limit Service
 Giới hạn số tin nhắn theo ngày:
- Guest (không login): 10 tin/ngày theo device_id
- User đã login: 100 tin/ngày theo user_id
+- Guest (không login): RATE_LIMIT_GUEST tin/ngày theo device_id
+- User đã login: RATE_LIMIT_USER tin/ngày theo user_id

 Lưu trữ: Redis (dùng chung với cache.py)
 """
 from __future__ import annotations

 import logging
-import os
 from datetime import datetime

 from common.cache import redis_cache
+from config import RATE_LIMIT_GUEST, RATE_LIMIT_USER

 logger = logging.getLogger(__name__)


 # =============================================================================
-# CONFIGURATION
+# CONFIGURATION (from config.py)
 # =============================================================================

-GUEST_LIMIT_PER_DAY = int(os.getenv("MESSAGE_LIMIT_GUEST", "3"))  # Tạm set 3 để test
-USER_LIMIT_PER_DAY = int(os.getenv("MESSAGE_LIMIT_USER", "100"))
-
 # Redis key prefix
 MESSAGE_COUNT_PREFIX = "msg_limit:"

@@ -32,6 +29,10 @@ class MessageLimitService:
    Service quản lý giới hạn tin nhắn theo ngày.
    Dùng Redis để lưu trữ, tự động reset mỗi ngày.
    
+    Limits:
+    - Guest (device_id): RATE_LIMIT_GUEST (default: 10)
+    - User (user_id): RATE_LIMIT_USER (default: 100)
+    
    Usage:
        from common.message_limit import message_limit_service
        
@@ -60,18 +61,18 @@ class MessageLimitService:
        if MessageLimitService._initialized:
            return
        
-        # Fallback in-memory storage: { "device_id": {"guest": 0, "user": 0} }
-        self._memory_storage: dict[str, dict[str, int]] = {}
+        # Fallback in-memory storage: { "identity_key": count }
+        self._memory_storage: dict[str, int] = {}
        self._memory_date: str = ""
        
-        # Limits
-        self.guest_limit = 3  # Test limit
-        self.total_limit = 5  # Test limit
+        # Limits from config
+        self.guest_limit = RATE_LIMIT_GUEST  # Default: 10
+        self.user_limit = RATE_LIMIT_USER    # Default: 100
        
        MessageLimitService._initialized = True
        logger.info(
            f"✅ MessageLimitService initialized "
-            f"(Guest Limit: {self.guest_limit}, Total Limit: {self.total_limit})"
+            f"(Guest Limit: {self.guest_limit}, User Limit: {self.user_limit})"
        )
    
    # =========================================================================
@@ -200,19 +201,19 @@ class MessageLimitService:
        
        # 2. Logic Checking
        can_send = True
-        limit_display = self.total_limit
+        limit_display = self.user_limit
        message = ""
        require_login = False
        
-        # Check Total Limit (Hard limit cho device)
-        if total_used >= self.total_limit:
+        # Check User Limit (Hard limit cho identity)
+        if total_used >= self.user_limit:
            can_send = False
            # Thông báo khi hết tổng quota (dù là user hay guest)
            if is_authenticated:
-                message = f"Bạn đã sử dụng hết {self.total_limit} tin nhắn hôm nay. Quay lại vào ngày mai nhé!"
+                message = f"Bạn đã sử dụng hết {self.user_limit} tin nhắn hôm nay. Quay lại vào ngày mai nhé!"
            else:
-                # Guest dùng hết 100 tin (hiếm, vì guest bị chặn ở 10 rồi, trừ khi login rồi logout)
-                message = f"Thiết bị này đã đạt giới hạn {self.total_limit} tin nhắn hôm nay."
+                # Guest dùng hết user_limit tin (hiếm, vì guest bị chặn ở guest_limit rồi)
+                message = f"Thiết bị này đã đạt giới hạn {self.user_limit} tin nhắn hôm nay."
        
        # Check Guest Limit (nếu chưa login và chưa bị chặn bởi total)
        elif not is_authenticated:
@@ -222,19 +223,19 @@ class MessageLimitService:
                require_login = True
                message = (
                    f"Bạn đã dùng hết {self.guest_limit} tin nhắn miễn phí. "
-                    f"Đăng nhập ngay để dùng tiếp (tối đa {self.total_limit} tin/ngày)!"
+                    f"Đăng nhập ngay để dùng tiếp (tối đa {self.user_limit} tin/ngày)!"
                )
        
        # 3. Build Remaining Info
-        # Nếu là guest: remaining = min(guest_remaining, total_remaining)
-        # Thực ra guest chỉ care guest_remaining vì guest < total
+        # Nếu là guest: remaining = min(guest_remaining, user_remaining)
+        # Thực ra guest chỉ care guest_remaining vì guest < user
        if is_authenticated:
-             remaining = max(0, self.total_limit - total_used)
+             remaining = max(0, self.user_limit - total_used)
        else:
-             # Guest bị chặn bởi guest_limit hoặc total_limit (trường hợp login rồi logout)
+             # Guest bị chặn bởi guest_limit hoặc user_limit (trường hợp login rồi logout)
             guest_remaining = max(0, self.guest_limit - guest_used)
-             total_remaining = max(0, self.total_limit - total_used)
-             remaining = min(guest_remaining, total_remaining)
+             user_remaining = max(0, self.user_limit - total_used)
+             remaining = min(guest_remaining, user_remaining)

        info = {
            "limit": limit_display,

--- a/backend/common/middleware.py
+++ b/backend/common/middleware.py
@@ -38,18 +38,24 @@ PUBLIC_PATH_PREFIXES = [


 # =============================================================================
-# AUTH MIDDLEWARE CLASS
+# AUTH + RATE LIMIT MIDDLEWARE CLASS
 # =============================================================================

-class ClerkAuthMiddleware(BaseHTTPMiddleware):
+# Paths that need rate limit check
+RATE_LIMITED_PATHS = [
+    "/api/agent/chat",
+]
+
+class CanifaAuthMiddleware(BaseHTTPMiddleware):
    """
-    Clerk Authentication Middleware
+    Canifa Authentication + Rate Limit Middleware
    
    Flow:
-    1. Frontend gửi request với Authorization: Bearer <clerk_token>
-    2. Middleware verify token và extract user_id
-    3. Attach user info vào request.state.user và request.state.user_id
-    4. Routes lấy trực tiếp từ request.state (không cần Depends)
+    1. Frontend gửi request với Authorization: Bearer <canifa_token>
+    2. Middleware verify token với Canifa API → extract customer_id
+    3. Check message rate limit (Guest: 10, User: 100)
+    4. Attach user info vào request.state
+    5. Routes lấy trực tiếp từ request.state
    """

    async def dispatch(self, request: Request, call_next: Callable):
@@ -68,9 +74,12 @@ class ClerkAuthMiddleware(BaseHTTPMiddleware):
        if any(path.startswith(prefix) for prefix in PUBLIC_PATH_PREFIXES):
            return await call_next(request)

-        # ✅ Authentication Process
+        # =====================================================================
+        # STEP 1: AUTHENTICATION (Canifa API)
+        # =====================================================================
        try:
            auth_header = request.headers.get("Authorization")
+            device_id = request.headers.get("device_id", "")

            # ========== DEV MODE: Bypass auth ==========
            dev_user_id = request.headers.get("X-Dev-User-Id")
@@ -79,56 +88,105 @@ class ClerkAuthMiddleware(BaseHTTPMiddleware):
                request.state.user = {"customer_id": dev_user_id}
                request.state.user_id = dev_user_id
                request.state.is_authenticated = True
+                request.state.device_id = device_id or dev_user_id
                return await call_next(request)

            # --- TRƯỜNG HỢP 1: KHÔNG CÓ TOKEN -> GUEST ---
            if not auth_header or not auth_header.startswith("Bearer "):
-                # Guest Mode (Không User ID, Không Auth)
-                # logger.debug(f"ℹ️ Guest access (no token) for {path}")
                request.state.user = None
                request.state.user_id = None
                request.state.is_authenticated = False
-                return await call_next(request)
-
-            # --- TRƯỜNG HỢP 2: CÓ TOKEN -> GỌI CANIFA VERIFY ---
-            token = auth_header.replace("Bearer ", "")
-            
-            # Import Lazy để tránh circular import nếu có
-            from common.canifa_api import verify_canifa_token, extract_user_id_from_canifa_response
-
-            try:
-                # 1. Gọi API Canifa
-                user_data = await verify_canifa_token(token)
+                request.state.device_id = device_id
+            else:
+                # --- TRƯỜNG HỢP 2: CÓ TOKEN -> GỌI CANIFA VERIFY ---
+                token = auth_header.replace("Bearer ", "")
                
-                # 2. Lấy User ID
-                user_id = await extract_user_id_from_canifa_response(user_data)
-                
-                if user_id:
-                    # ✅ VERIFY THÀNH CÔNG -> USER VIP
-                    request.state.user = user_data
-                    request.state.user_id = user_id
-                    request.state.token = token
-                    request.state.is_authenticated = True
-                    logger.debug(f"✅ Auth Success: User {user_id}")
-                else:
-                    # ❌ VERIFY FAILED -> GUEST
-                    logger.warning(f"⚠️ Invalid Canifa Token (No ID found) -> Guest Mode")
+                from common.canifa_api import verify_canifa_token, extract_user_id_from_canifa_response
+
+                try:
+                    user_data = await verify_canifa_token(token)
+                    user_id = await extract_user_id_from_canifa_response(user_data)
+                    
+                    if user_id:
+                        request.state.user = user_data
+                        request.state.user_id = user_id
+                        request.state.token = token
+                        request.state.is_authenticated = True
+                        request.state.device_id = device_id
+                        logger.debug(f"✅ Canifa Auth Success: User {user_id}")
+                    else:
+                        logger.warning(f"⚠️ Invalid Canifa Token -> Guest Mode")
+                        request.state.user = None
+                        request.state.user_id = None
+                        request.state.is_authenticated = False
+                        request.state.device_id = device_id
+                        
+                except Exception as e:
+                    logger.error(f"❌ Canifa Auth Error: {e} -> Guest Mode")
                    request.state.user = None
                    request.state.user_id = None
                    request.state.is_authenticated = False
-                    
-            except Exception as e:
-                logger.error(f"❌ Canifa Auth Error: {e} -> Guest Mode")
-                request.state.user = None
-                request.state.user_id = None
-                request.state.is_authenticated = False
+                    request.state.device_id = device_id

        except Exception as e:
-            logger.error(f"❌ Middleware Unexpected Error: {e}")
-            # Fallback an toàn: Guest mode
+            logger.error(f"❌ Middleware Auth Error: {e}")
            request.state.user = None
            request.state.user_id = None
            request.state.is_authenticated = False
+            request.state.device_id = request.headers.get("device_id", "")
+
+        # =====================================================================
+        # STEP 2: RATE LIMIT CHECK (Chỉ cho các path cần limit)
+        # =====================================================================
+        if path in RATE_LIMITED_PATHS:
+            try:
+                from common.message_limit import message_limit_service
+                from fastapi.responses import JSONResponse
+                
+                # Lấy identity_key làm rate limit key
+                # Guest: device_id → limit 10
+                # User: user_id → limit 100
+                is_authenticated = request.state.is_authenticated
+                if is_authenticated and request.state.user_id:
+                    rate_limit_key = request.state.user_id
+                else:
+                    rate_limit_key = request.state.device_id
+                
+                if rate_limit_key:
+                    can_send, limit_info = await message_limit_service.check_limit(
+                        identity_key=rate_limit_key,
+                        is_authenticated=is_authenticated,
+                    )
+                    
+                    # Lưu limit_info vào request.state để route có thể dùng
+                    request.state.limit_info = limit_info
+                    
+                    if not can_send:
+                        logger.warning(
+                            f"⚠️ Rate Limit Exceeded: {rate_limit_key} | "
+                            f"used={limit_info['used']}/{limit_info['limit']}"
+                        )
+                        return JSONResponse(
+                            status_code=429,
+                            content={
+                                "status": "error",
+                                "error_code": "MESSAGE_LIMIT_EXCEEDED",
+                                "message": limit_info["message"],
+                                "require_login": limit_info["require_login"],
+                                "limit_info": {
+                                    "limit": limit_info["limit"],
+                                    "used": limit_info["used"],
+                                    "remaining": limit_info["remaining"],
+                                    "reset_seconds": limit_info["reset_seconds"],
+                                },
+                            },
+                        )
+                else:
+                    logger.warning(f"⚠️ No identity_key for rate limiting")
+                    
+            except Exception as e:
+                logger.error(f"❌ Rate Limit Check Error: {e}")
+                # Cho phép request tiếp tục nếu lỗi rate limit
            
        return await call_next(request)

@@ -181,7 +239,7 @@ class MiddlewareManager:
        
        Args:
            app: FastAPI application
-            enable_auth: Bật Clerk authentication middleware
+            enable_auth: Bật Canifa authentication middleware
            enable_rate_limit: Bật rate limiting
            enable_cors: Bật CORS middleware
            cors_origins: List origins cho CORS (default: ["*"])
@@ -221,10 +279,10 @@ class MiddlewareManager:
        logger.info(f"✅ CORS middleware enabled (origins: {origins})")
    
    def _setup_auth(self, app: FastAPI) -> None:
-        """Setup Clerk auth middleware."""
-        app.add_middleware(ClerkAuthMiddleware)
+        """Setup Canifa auth middleware."""
+        app.add_middleware(CanifaAuthMiddleware)
        self._auth_enabled = True
-        logger.info("✅ Clerk Auth middleware enabled")
+        logger.info("✅ Canifa Auth middleware enabled")
    
    def _setup_rate_limit(self, app: FastAPI) -> None:
        """Setup rate limiting."""

--- a/backend/common/user_identity.py
+++ b/backend/common/user_identity.py
@@ -48,7 +48,7 @@ class UserIdentity:
    def langfuse_metadata(self) -> dict:
        """Metadata cho Langfuse"""
        return {
-            "device_id": self.device_id,
+            "device_id": self.device_id,    
            "is_authenticated": self.is_authenticated,
        }
    
@@ -61,12 +61,24 @@ class UserIdentity:
    
    @property
    def history_key(self) -> str:
-        """Key để lưu/load chat history (theo device_id)"""
+        """
+        Key để lưu/load chat history.
+        - Guest (chưa login): device_id
+        - User (đã login): user_id (customer_id từ Canifa)
+        """
+        if self.is_authenticated and self.user_id:
+            return self.user_id
        return self.device_id
    
    @property
    def rate_limit_key(self) -> str:
-        """Key cho rate limiting (luôn theo device_id, limit tùy login status)"""
+        """
+        Key cho rate limiting.
+        - Guest (chưa login): device_id → limit 10
+        - User (đã login): user_id → limit 100
+        """
+        if self.is_authenticated and self.user_id:
+            return self.user_id
        return self.device_id


@@ -97,8 +109,8 @@ def get_user_identity(request: Request) -> UserIdentity:
        user_id = request.state.user_id
        is_authenticated = True
    
-    # 3. Primary ID
-    primary_id = user_id if user_id else device_id
+    # 3. Primary ID - LUÔN LUÔN LÀ device_id
+    primary_id = device_id
    
    identity = UserIdentity(
        primary_id=primary_id,

--- a/backend/config.py
+++ b/backend/config.py
@@ -53,6 +53,8 @@ __all__ = [
    "STARROCKS_PORT",
    "STARROCKS_USER",
    "USE_MONGO_CONVERSATION",
+    "RATE_LIMIT_GUEST",
+    "RATE_LIMIT_USER",
 ]

 # ====================== SUPABASE CONFIGURATION ======================
@@ -134,3 +136,8 @@ OTEL_EXPORTER_JAEGER_AGENT_PORT = os.getenv("OTEL_EXPORTER_JAEGER_AGENT_PORT")
 OTEL_SERVICE_NAME = os.getenv("OTEL_SERVICE_NAME")
 OTEL_TRACES_EXPORTER = os.getenv("OTEL_TRACES_EXPORTER")
 OTEL_EXPORTER_JAEGER_AGENT_SPLIT_OVERSIZED_BATCHES = os.getenv("OTEL_EXPORTER_JAEGER_AGENT_SPLIT_OVERSIZED_BATCHES")
+
+RATE_LIMIT_GUEST: int = int(os.getenv("RATE_LIMIT_GUEST", "10"))
+RATE_LIMIT_USER: int = int(os.getenv("RATE_LIMIT_USER", "100"))
+
+
--- a/backend/server.py
+++ b/backend/server.py
@@ -14,6 +14,7 @@ from fastapi.staticfiles import StaticFiles

 from api.chatbot_route import router as chatbot_router
 from api.conservation_route import router as conservation_router
+from api.prompt_route import router as prompt_router
 from common.cache import redis_cache
 from common.langfuse_client import get_langfuse_client
 from common.middleware import middleware_manager
@@ -57,13 +58,14 @@ async def startup_event():
 middleware_manager.setup(
    app,
    enable_auth=True,        # 👈 Bật lại Auth để test logic Guest/User
-    enable_rate_limit=True,   # 👈 Bật rate limiting
+    enable_rate_limit=False,  # 👈 Tắt slowapi vì đã có business rate limit
    enable_cors=True,         # 👈 Bật CORS
    cors_origins=["*"],       # 👈 Trong production nên limit origins
 )

 app.include_router(conservation_router)
 app.include_router(chatbot_router)
+app.include_router(prompt_router)


 # --- MOCK API FOR LOAD TESTING ---

--- a/backend/static/index.html
+++ b/backend/static/index.html
@@ -186,6 +186,13 @@
            border: 1px solid #552b2b;
        }

+        .message.rate-limit-error {
+            background: linear-gradient(135deg, #3d2d2d 0%, #2d2d3d 100%);
+            border: 1px solid #ff6b6b;
+            padding: 16px;
+            max-width: 350px;
+        }
+
        .timestamp {
            font-size: 0.7em;
            opacity: 0.7;
@@ -424,6 +431,184 @@
        .raw-content {
            display: none;
        }
+
+        /* --- Modern Layout & Animations --- */
+        .main-content {
+            max-width: 1400px;
+            /* Wider container */
+            margin: 0 auto;
+            padding: 20px;
+            height: calc(100vh - 80px);
+            /* Fill remaining height */
+            box-sizing: border-box;
+        }
+
+        .main-layout {
+            display: flex;
+            height: 100%;
+            gap: 0;
+            /* Gap handled by margin in panel for smooth transition */
+            position: relative;
+        }
+
+        /* Chat Container flex fix */
+        .container {
+            flex: 1;
+            display: flex;
+            flex-direction: column;
+            background: #2d2d2d;
+            border-radius: 16px;
+            box-shadow: 0 10px 30px rgba(0, 0, 0, 0.5);
+            border: 1px solid #444;
+            height: 100%;
+            padding: 0;
+            overflow: hidden;
+            transition: all 0.3s ease;
+            z-index: 10;
+        }
+
+        /* Internal padding for chat container */
+        .chat-internal-wrapper {
+            padding: 20px;
+            display: flex;
+            flex-direction: column;
+            height: 100%;
+            box-sizing: border-box;
+        }
+
+        /* PROMPT PANEL - Slide In Style */
+        .prompt-panel {
+            width: 0;
+            opacity: 0;
+            background: #1e1e1e;
+            /* Darker contrast */
+            border-left: 1px solid #444;
+            border-radius: 16px;
+            display: flex;
+            flex-direction: column;
+            padding: 0;
+            /* Padding handled internally to avoid width jump */
+            transition: all 0.4s cubic-bezier(0.16, 1, 0.3, 1);
+            overflow: hidden;
+            margin-left: 0;
+            box-shadow: -5px 0 20px rgba(0, 0, 0, 0.3);
+            white-space: nowrap;
+            /* Prevent content flicker during width change */
+        }
+
+        .prompt-panel.open {
+            width: 500px;
+            /* Wider editor */
+            opacity: 1;
+            margin-left: 20px;
+            padding: 20px;
+        }
+
+        .prompt-header {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            margin-bottom: 20px;
+            border-bottom: 1px solid #333;
+            padding-bottom: 15px;
+        }
+
+        .prompt-header h3 {
+            font-size: 1.2em;
+            color: #4fc3f7;
+            /* Nice blue accent */
+            display: flex;
+            align-items: center;
+            gap: 10px;
+        }
+
+        .prompt-textarea {
+            flex: 1;
+            background: #111;
+            color: #dcdccc;
+            /* Soft code color */
+            border: 1px solid #333;
+            border-radius: 8px;
+            padding: 15px;
+            font-family: 'Consolas', 'Monaco', 'Courier New', monospace;
+            font-size: 14px;
+            line-height: 1.6;
+            resize: none;
+            margin-bottom: 15px;
+            white-space: pre-wrap;
+            /* Wrap code */
+            overflow-y: auto;
+            box-shadow: inset 0 2px 5px rgba(0, 0, 0, 0.5);
+        }
+
+        .prompt-textarea:focus {
+            outline: none;
+            border-color: #667eea;
+            box-shadow: inset 0 2px 5px rgba(0, 0, 0, 0.5), 0 0 0 2px rgba(102, 126, 234, 0.2);
+        }
+
+        .panel-footer {
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+            padding-top: 10px;
+            border-top: 1px solid #333;
+        }
+
+        .status-text {
+            font-size: 0.8em;
+            color: #666;
+            font-style: italic;
+        }
+
+        /* Buttons Update */
+        .action-btn {
+            padding: 10px 20px;
+            border-radius: 8px;
+            font-weight: 600;
+            font-size: 0.9em;
+            border: none;
+            cursor: pointer;
+            transition: all 0.2s;
+            display: flex;
+            align-items: center;
+            gap: 8px;
+        }
+
+        .btn-reload {
+            background: #333;
+            color: #aaa;
+        }
+
+        .btn-reload:hover {
+            background: #444;
+            color: white;
+        }
+
+        .btn-save {
+            background: linear-gradient(135deg, #43a047 0%, #2e7d32 100%);
+            color: white;
+            box-shadow: 0 4px 6px rgba(0, 0, 0, 0.2);
+        }
+
+        .btn-save:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 6px 12px rgba(0, 0, 0, 0.3);
+        }
+
+        .btn-close-panel {
+            background: transparent;
+            border: none;
+            color: #666;
+            font-size: 1.5rem;
+            cursor: pointer;
+            transition: color 0.2s;
+            line-height: 1;
+        }
+
+        .btn-close-panel:hover {
+            color: #ff6b6b;
+        }
    </style>
 </head>

@@ -437,35 +622,139 @@
    </div>

    <div class="main-content">
-        <div class="container">
-            <div class="header">
-                <h2>🤖 Canifa AI Chat</h2>
-                <div class="config-area">
-                    <input type="text" id="userId" placeholder="Enter User ID" value="" onblur="saveUserId()"
-                        onchange="saveUserId()">
-                    <button onclick="loadHistory(true)">↻ History</button>
-                    <button onclick="clearUI()" style="background: #d32f2f;">✗ Clear UI</button>
+        <div class="main-layout">
+            <!-- Chat Container -->
+            <div class="container">
+                <div class="chat-internal-wrapper">
+                    <div class="header">
+                        <h2>🤖 Canifa AI Chat</h2>
+                        <div class="config-area" style="flex-wrap: wrap;">
+                            <div style="display: flex; gap: 5px; align-items: center;">
+                                <label style="font-size: 0.8em; color: #aaa;">Device ID:</label>
+                                <input type="text" id="deviceId" placeholder="auto-generated" style="width: 150px;"
+                                    onblur="saveConfig()" onchange="saveConfig()">
+                            </div>
+                            <div style="display: flex; gap: 5px; align-items: center;">
+                                <label style="font-size: 0.8em; color: #aaa;">Access Token:</label>
+                                <input type="text" id="accessToken" placeholder="vsf-customer token (optional)"
+                                    style="width: 200px;" onblur="saveConfig()" onchange="saveConfig()">
+                            </div>
+                            <button onclick="loadHistory(true)">↻ History</button>
+                            <button onclick="togglePromptEditor()"
+                                style="background: #e6b800; color: #2d2d2d; font-weight: bold;">📝 Edit Prompt</button>
+                            <button onclick="clearUI()" style="background: #d32f2f;">✗ Clear UI</button>
+                        </div>
+                    </div>
+
+                    <div class="chat-box" id="chatBox">
+                        <div class="load-more" id="loadMoreBtn" style="display: none;">
+                            <button onclick="loadHistory(false)">Load Older Messages ⬆️</button>
+                        </div>
+                        <div id="messagesArea" style="display: flex; flex-direction: column; gap: 15px;"></div>
+                    </div>
+
+                    <div class="typing-indicator" id="typingIndicator">
+                        <span style="font-style: normal;">🤖</span> AI is thinking...
+                    </div>
+
+                    <div class="input-area">
+                        <input type="text" id="userInput" placeholder="Type your message..."
+                            onkeypress="handleKeyPress(event)" autocomplete="off">
+                        <button onclick="sendMessage()" id="sendBtn">➤ Send</button>
+                    </div>
                </div>
            </div>

-            <div class="chat-box" id="chatBox">
-                <div class="load-more" id="loadMoreBtn" style="display: none;">
-                    <button onclick="loadHistory(false)">Load Older Messages ⬆️</button>
+            <!-- Prompt Editor Panel -->
+            <div class="prompt-panel" id="promptPanel">
+                <div class="prompt-header">
+                    <h3>📝 System Prompt</h3>
+                    <button class="btn-close-panel" onclick="togglePromptEditor()">×</button>
                </div>
-                <div id="messagesArea" style="display: flex; flex-direction: column; gap: 15px;"></div>
-            </div>

-            <div class="typing-indicator" id="typingIndicator">AI is typing...</div>
+                <textarea id="systemPromptInput" class="prompt-textarea" placeholder="Loading prompt content..."
+                    spellcheck="false"></textarea>

-            <div class="input-area">
-                <input type="text" id="userInput" placeholder="Type your message..." onkeypress="handleKeyPress(event)"
-                    autocomplete="off">
-                <button onclick="sendMessage()" id="sendBtn">➤ Send</button>
+                <div class="panel-footer">
+                    <span class="status-text" id="promptStatus">Ready to edit</span>
+                    <div style="display: flex; gap: 10px;">
+                        <button class="action-btn btn-reload" onclick="loadSystemPrompt()">↻ Reset</button>
+                        <button class="action-btn btn-save" onclick="saveSystemPrompt()">💾 Save & Apply</button>
+                    </div>
+                </div>
            </div>
        </div>

        <script>
            let messageHistory = []; // Store messages for reference
+            let isPromptPanelOpen = false;
+
+            function togglePromptEditor() {
+                const panel = document.getElementById('promptPanel');
+                isPromptPanelOpen = !isPromptPanelOpen;
+
+                if (isPromptPanelOpen) {
+                    panel.classList.add('open');
+                    loadSystemPrompt();
+                } else {
+                    panel.classList.remove('open');
+                }
+            }
+
+            async function loadSystemPrompt() {
+                const textarea = document.getElementById('systemPromptInput');
+                textarea.value = "Loading...";
+                textarea.disabled = true;
+
+                try {
+                    const response = await fetch('/api/agent/system-prompt');
+                    const data = await response.json();
+
+                    if (data.status === 'success') {
+                        textarea.value = data.content;
+                    } else {
+                        textarea.value = "Error loading prompt: " + data.message;
+                    }
+                } catch (error) {
+                    textarea.value = "Error connecting to server.";
+                    console.error(error);
+                } finally {
+                    textarea.disabled = false;
+                }
+            }
+
+            async function saveSystemPrompt() {
+                const content = document.getElementById('systemPromptInput').value;
+                const statusLabel = document.getElementById('promptStatus');
+                if (!content) return;
+
+                if (!confirm('Bạn có chắc muốn lưu Prompt mới? Bot sẽ bị reset graph để học prompt mới này.')) {
+                    return;
+                }
+
+                statusLabel.innerText = "Saving...";
+
+                try {
+                    const response = await fetch('/api/agent/system-prompt', {
+                        method: 'POST',
+                        headers: { 'Content-Type': 'application/json' },
+                        body: JSON.stringify({ content: content })
+                    });
+                    const data = await response.json();
+
+                    if (data.status === 'success') {
+                        statusLabel.innerText = "Saved!";
+                        alert('✅ Đã lưu Prompt thành công!\nBot đã sẵn sàng với prompt mới.');
+                    } else {
+                        statusLabel.innerText = "Error!";
+                        alert('❌ Lỗi: ' + data.detail);
+                    }
+                } catch (error) {
+                    statusLabel.innerText = "Connection Error";
+                    alert('❌ Lỗi kết nối server');
+                    console.error(error);
+                }
+            }

            function toggleMessageView(messageId) {
                const filteredContent = document.getElementById('filtered-' + messageId);
@@ -491,12 +780,13 @@
            let isTyping = false;

            async function loadHistory(isRefresh) {
-                const userId = document.getElementById('userId').value;
+                const deviceId = document.getElementById('deviceId').value;
+                const accessToken = document.getElementById('accessToken').value.trim();
                const messagesArea = document.getElementById('messagesArea');
                const loadMoreBtn = document.getElementById('loadMoreBtn');

-                if (!userId) {
-                    alert('Please enter a User ID');
+                if (!deviceId) {
+                    alert('Please enter a Device ID');
                    return;
                }

@@ -505,7 +795,10 @@
                    currentCursor = null;
                }

-                const url = `/api/history/${userId}?limit=20${currentCursor ? `&before_id=${currentCursor}` : ''}`;
+                // Use deviceId as identity_key for guest, or call API to get user's history
+                // For now, use deviceId directly (middleware will handle identity resolution)
+                const identityKey = deviceId;
+                const url = `/api/history/${identityKey}?limit=20${currentCursor ? `&before_id=${currentCursor}` : ''}`;

                try {
                    const response = await fetch(url);
@@ -573,7 +866,62 @@
                // Message Bubble
                const div = document.createElement('div');
                div.className = `message ${msg.is_human ? 'user' : 'bot'}`;
-                div.innerText = msg.message;
+
+                // Generate unique message ID for toggle
+                const messageId = 'hist-' + (msg.id || Date.now() + Math.random());
+
+                if (msg.is_human) {
+                    // User message: simple text
+                    div.innerText = msg.message;
+                } else {
+                    // Bot message: add Widget/Raw JSON toggle
+
+                    // FILTERED CONTENT (default visible)
+                    const filteredDiv = document.createElement('div');
+                    filteredDiv.id = 'filtered-' + messageId;
+                    filteredDiv.className = 'filtered-content';
+                    filteredDiv.innerText = msg.message;
+                    div.appendChild(filteredDiv);
+
+                    // RAW CONTENT (hidden by default)
+                    const rawDiv = document.createElement('div');
+                    rawDiv.id = 'raw-' + messageId;
+                    rawDiv.className = 'raw-content';
+                    rawDiv.style.display = 'none';
+
+                    const rawJsonDiv = document.createElement('div');
+                    rawJsonDiv.className = 'raw-json-view';
+                    const pre = document.createElement('pre');
+                    pre.textContent = JSON.stringify({
+                        id: msg.id,
+                        message: msg.message,
+                        product_ids: msg.product_ids || [],
+                        timestamp: msg.timestamp,
+                        is_human: msg.is_human
+                    }, null, 2);
+                    rawJsonDiv.appendChild(pre);
+                    rawDiv.appendChild(rawJsonDiv);
+                    div.appendChild(rawDiv);
+
+                    // Toggle Buttons
+                    const toggleDiv = document.createElement('div');
+                    toggleDiv.className = 'message-view-toggle';
+
+                    const filteredBtn = document.createElement('button');
+                    filteredBtn.id = 'filtered-btn-' + messageId;
+                    filteredBtn.className = 'active';
+                    filteredBtn.innerText = '🎨 Widget';
+                    filteredBtn.onclick = () => toggleMessageView(messageId);
+
+                    const rawBtn = document.createElement('button');
+                    rawBtn.id = 'raw-btn-' + messageId;
+                    rawBtn.innerText = '👁️ Raw JSON';
+                    rawBtn.onclick = () => toggleMessageView(messageId);
+
+                    toggleDiv.appendChild(filteredBtn);
+                    toggleDiv.appendChild(rawBtn);
+                    div.appendChild(toggleDiv);
+                }

                // Timestamp inside bubble
                const time = document.createElement('span');
@@ -581,34 +929,29 @@
                time.innerText = new Date(msg.timestamp).toLocaleTimeString([], { hour: '2-digit', minute: '2-digit' });
                div.appendChild(time);

-                // Debug ID (optional)
-                // const meta = document.createElement('div');
-                // meta.style.fontSize = '9px';
-                // meta.style.opacity = '0.3';
-                // meta.innerText = `id: ${msg.id}`;
-                // div.appendChild(meta);
-
                container.appendChild(div);

                if (position === 'top') {
                    messagesArea.insertBefore(container, messagesArea.firstChild);
                } else {
-                    messagesArea.appendChild(container); // Corrected to append container
+                    messagesArea.appendChild(container);
                }
            }

            async function sendMessage() {
                const input = document.getElementById('userInput');
-                const userIdInput = document.getElementById('userId');
-                const userId = userIdInput.value.trim();
+                const deviceIdInput = document.getElementById('deviceId');
+                const accessTokenInput = document.getElementById('accessToken');
+                const deviceId = deviceIdInput.value.trim();
+                const accessToken = accessTokenInput.value.trim();
                const text = input.value.trim();
                const sendBtn = document.getElementById('sendBtn');
                const typingIndicator = document.getElementById('typingIndicator');
                const chatBox = document.getElementById('chatBox');

-                if (!userId) {
-                    alert('Please enter a User ID first!');
-                    userIdInput.focus();
+                if (!deviceId) {
+                    alert('Please enter a Device ID first!');
+                    deviceIdInput.focus();
                    return;
                }

@@ -629,24 +972,65 @@
                input.value = '';
                chatBox.scrollTop = chatBox.scrollHeight;

-                // Save user ID to localStorage
-                localStorage.setItem('canifa_user_id', userId);
+                // Save config to localStorage
+                saveConfig();

                // Track response time
                const startTime = Date.now();

                try {
-                    // SWITCH TO NON-STREAMING ENDPOINT
+                    // Build headers
+                    const headers = {
+                        'Content-Type': 'application/json',
+                        'device_id': deviceId
+                    };
+
+                    // Add Authorization if access token provided
+                    if (accessToken) {
+                        headers['Authorization'] = 'Bearer ' + accessToken;
+                    }
+
                    const response = await fetch('/api/agent/chat', {
                        method: 'POST',
-                        headers: { 'Content-Type': 'application/json' },
+                        headers: headers,
                        body: JSON.stringify({
-                            user_query: text,
-                            user_id: userId
+                            user_query: text
                        })
                    });

-                    if (!response.ok) throw new Error('Network response was not ok');
+                    // Handle rate limit (429) specifically
+                    if (response.status === 429) {
+                        const errorData = await response.json();
+                        // Backend returns: { message: "...", limit_info: {...} } directly or via exception
+                        const errorMessage = errorData.message ||
+                            errorData.detail?.message ||
+                            errorData.detail?.limit_info?.message ||
+                            'Bạn đã hết lượt chat hôm nay!';
+
+                        // Show simple red error message
+                        const messagesArea = document.getElementById('messagesArea');
+                        const container = document.createElement('div');
+                        container.className = 'message-container bot';
+
+                        const errorDiv = document.createElement('div');
+                        errorDiv.className = 'message bot';
+                        errorDiv.style.cssText = 'background: #3d2d2d; border: 1px solid #ff6b6b; color: #ff6b6b;';
+                        errorDiv.innerText = errorMessage;
+
+                        container.appendChild(errorDiv);
+                        messagesArea.appendChild(container);
+                        chatBox.scrollTop = chatBox.scrollHeight;
+
+                        input.disabled = false;
+                        sendBtn.disabled = false;
+                        typingIndicator.style.display = 'none';
+                        return;
+                    }
+
+                    if (!response.ok) {
+                        const errorData = await response.json().catch(() => ({}));
+                        throw new Error(errorData.detail?.message || errorData.detail || 'Có lỗi xảy ra');
+                    }

                    const data = await response.json();
                    const responseTime = ((Date.now() - startTime) / 1000).toFixed(2);
@@ -772,8 +1156,10 @@
                        rawJsonDiv.className = 'raw-json-view';
                        const pre = document.createElement('pre');
                        pre.textContent = JSON.stringify({
+                            status: data.status,
                            ai_response: data.ai_response,
-                            product_ids: data.product_ids
+                            product_ids: data.product_ids,
+                            limit_info: data.limit_info || null
                        }, null, 2);
                        rawJsonDiv.appendChild(pre);
                        rawDiv.appendChild(rawJsonDiv);
@@ -804,8 +1190,58 @@
                        timeDiv.innerText = `⏱️ ${responseTime}s`;
                        botMsgDiv.appendChild(timeDiv);
                    } else {
-                        botMsgDiv.innerText = "Error: " + (data.message || "Unknown error");
-                        botMsgDiv.style.color = 'red';
+                        // ERROR CASE: Limit exceeded or other errors
+
+                        // FILTERED CONTENT (error message - default visible)
+                        const filteredDiv = document.createElement('div');
+                        filteredDiv.id = 'filtered-' + messageId;
+                        filteredDiv.className = 'filtered-content';
+                        filteredDiv.style.color = '#ff6b6b';
+                        filteredDiv.innerHTML = `
+                            <div style="font-weight: bold; margin-bottom: 8px;">⚠️ ${data.error_code || 'ERROR'}</div>
+                            <div>${data.message || 'Unknown error'}</div>
+                            ${data.require_login ? '<div style="margin-top: 10px; padding: 8px; background: #3d2d2d; border-radius: 6px;">👉 Vui lòng đăng nhập để tiếp tục sử dụng!</div>' : ''}
+                        `;
+                        botMsgDiv.appendChild(filteredDiv);
+
+                        // RAW CONTENT (hidden by default)
+                        const rawDiv = document.createElement('div');
+                        rawDiv.id = 'raw-' + messageId;
+                        rawDiv.className = 'raw-content';
+                        rawDiv.style.display = 'none';
+
+                        const rawJsonDiv = document.createElement('div');
+                        rawJsonDiv.className = 'raw-json-view';
+                        const pre = document.createElement('pre');
+                        pre.textContent = JSON.stringify({
+                            status: data.status,
+                            error_code: data.error_code,
+                            message: data.message,
+                            require_login: data.require_login,
+                            limit_info: data.limit_info || null
+                        }, null, 2);
+                        rawJsonDiv.appendChild(pre);
+                        rawDiv.appendChild(rawJsonDiv);
+                        botMsgDiv.appendChild(rawDiv);
+
+                        // Toggle Buttons
+                        const toggleDiv = document.createElement('div');
+                        toggleDiv.className = 'message-view-toggle';
+
+                        const filteredBtn = document.createElement('button');
+                        filteredBtn.id = 'filtered-btn-' + messageId;
+                        filteredBtn.className = 'active';
+                        filteredBtn.innerText = '🎨 Widget';
+                        filteredBtn.onclick = () => toggleMessageView(messageId);
+
+                        const rawBtn = document.createElement('button');
+                        rawBtn.id = 'raw-btn-' + messageId;
+                        rawBtn.innerText = '👁️ Raw JSON';
+                        rawBtn.onclick = () => toggleMessageView(messageId);
+
+                        toggleDiv.appendChild(filteredBtn);
+                        toggleDiv.appendChild(rawBtn);
+                        botMsgDiv.appendChild(toggleDiv);
                    }

                    container.appendChild(botMsgDiv);
@@ -839,26 +1275,60 @@
                document.getElementById('messagesArea').innerHTML = '';
            }

-            // Save user ID to localStorage (called on input change/blur)
-            function saveUserId() {
-                const userIdInput = document.getElementById('userId');
-                const val = userIdInput.value.trim();
-                if (val) {
-                    localStorage.setItem('canifa_user_id', val);
+            // Apply token from login prompt in rate limit error
+            function applyLoginToken() {
+                const tokenInput = document.getElementById('loginTokenInput');
+                if (tokenInput && tokenInput.value.trim()) {
+                    document.getElementById('accessToken').value = tokenInput.value.trim();
+                    saveConfig();
+                    alert('✅ Token đã được lưu! Bạn có thể tiếp tục chat.');
                } else {
-                    // If empty, remove saved id
-                    localStorage.removeItem('canifa_user_id');
+                    alert('Vui lòng nhập Access Token!');
                }
            }

-            // Load user ID from localStorage on page load and auto-load history
+            // Save config to localStorage (called on input change/blur)
+            function saveConfig() {
+                const deviceId = document.getElementById('deviceId').value.trim();
+                const accessToken = document.getElementById('accessToken').value.trim();
+
+                if (deviceId) {
+                    localStorage.setItem('canifa_device_id', deviceId);
+                }
+                if (accessToken) {
+                    localStorage.setItem('canifa_access_token', accessToken);
+                } else {
+                    localStorage.removeItem('canifa_access_token');
+                }
+            }
+
+            // Generate UUID for device_id
+            function generateUUID() {
+                return 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx'.replace(/[xy]/g, function (c) {
+                    const r = Math.random() * 16 | 0;
+                    const v = c === 'x' ? r : (r & 0x3 | 0x8);
+                    return v.toString(16);
+                });
+            }
+
+            // Load config from localStorage on page load
            window.onload = function () {
-                const savedUserId = localStorage.getItem('canifa_user_id');
-                if (savedUserId) {
-                    document.getElementById('userId').value = savedUserId;
-                    // Auto-load history for saved user id
-                    setTimeout(() => loadHistory(true), 50);
+                // Load or generate Device ID
+                let savedDeviceId = localStorage.getItem('canifa_device_id');
+                if (!savedDeviceId) {
+                    savedDeviceId = 'device-' + generateUUID().substring(0, 8);
+                    localStorage.setItem('canifa_device_id', savedDeviceId);
                }
+                document.getElementById('deviceId').value = savedDeviceId;
+
+                // Load Access Token (optional)
+                const savedAccessToken = localStorage.getItem('canifa_access_token');
+                if (savedAccessToken) {
+                    document.getElementById('accessToken').value = savedAccessToken;
+                }
+
+                // Auto-load history
+                setTimeout(() => loadHistory(true), 50);
            };
        </script>
    </div> <!-- Close main-content -->

--- a/backend/test_canifa_auth.py
+++ b/backend/test_canifa_auth.py
+"""
+Test Canifa API Auth
+"""
+import asyncio
+import httpx
+
+TOKEN = "7ibs17luogynysetg0cbjabmrzl2wvw2"
+CANIFA_API = "https://canifa.com/v1/magento/customer"
+
+QUERY_BODY = [
+    {
+        "customer": "customer-custom-query",
+        "metadata": {
+            "fields": "\n customer {\n gender\n customer_id\n phone_number\n date_of_birth\n default_billing\n default_shipping\n email\n firstname\n is_subscribed\n lastname\n middlename\n prefix\n suffix\n taxvat\n addresses {\n city\n country_code\n default_billing\n default_shipping\n extension_attributes {\n attribute_code\n value\n }\n custom_attributes {\n attribute_code\n value\n }\n firstname\n id\n lastname\n postcode\n prefix\n region {\n region_code\n region_id\n region\n }\n street\n suffix\n telephone\n vat_id\n }\n is_subscribed\n }\n "
+        }
+    },
+    {}
+]
+
+async def test_canifa_api():
+    headers = {
+        "accept": "application/json, text/plain, */*",
+        "content-type": "application/json",
+        "Cookie": f"vsf-customer={TOKEN}"
+    }
+    
+    print(f"🔐 Testing Canifa API with token: {TOKEN}")
+    print(f"📡 URL: {CANIFA_API}")
+    print("-" * 50)
+    
+    try:
+        async with httpx.AsyncClient(timeout=10.0) as client:
+            response = await client.post(CANIFA_API, json=QUERY_BODY, headers=headers)
+            
+            print(f"📊 Status Code: {response.status_code}")
+            print(f"📝 Response:")
+            
+            data = response.json()
+            import json
+            print(json.dumps(data, indent=2, ensure_ascii=False))
+            
+            # Try to extract customer_id
+            if isinstance(data, list) and len(data) > 0:
+                first_item = data[0]
+                if isinstance(first_item, dict):
+                    result = first_item.get('result', {})
+                    customer = result.get('customer', {}) if isinstance(result, dict) else None
+                    if customer:
+                        print("\n✅ CUSTOMER INFO:")
+                        print(f"   customer_id: {customer.get('customer_id')}")
+                        print(f"   email: {customer.get('email')}")
+                        print(f"   firstname: {customer.get('firstname')}")
+                        print(f"   lastname: {customer.get('lastname')}")
+                    else:
+                        print("\n⚠️ No customer data in response")
+            elif isinstance(data, dict):
+                result = data.get('result', {})
+                customer = result.get('customer', {}) if isinstance(result, dict) else None
+                if customer:
+                    print("\n✅ CUSTOMER INFO:")
+                    print(f"   customer_id: {customer.get('customer_id')}")
+                    print(f"   email: {customer.get('email')}")
+                else:
+                    print("\n⚠️ No customer data found")
+                    
+    except Exception as e:
+        print(f"❌ Error: {e}")
+
+if __name__ == "__main__":
+    asyncio.run(test_canifa_api())
--- a/backend/test_prompt_update.py
+++ b/backend/test_prompt_update.py
+import requests
+import json
+
+BASE_URL = "http://localhost:5000"
+API_URL = f"{BASE_URL}/api/agent/system-prompt"
+
+# 1. Get current prompt
+print("1. Getting current prompt...")
+try:
+    response = requests.get(API_URL)
+    if response.status_code == 200:
+        print("✅ Current prompt fetched successfully.")
+        print(f"Preview: {response.json()['content'][:100]}...")
+    else:
+        print(f"❌ Failed to get prompt: {response.status_code} - {response.text}")
+except Exception as e:
+    print(f"❌ Error connecting: {e}")
+
+# 2. Update prompt
+new_prompt = """# VAI TRÒ
+Bạn là Mèo Máy Doraemon đến từ thế kỷ 22.
+Luôn kết thúc câu bằng "meo meo".
+"""
+
+print("\n2. Updating prompt to Doraemon...")
+try:
+    response = requests.post(API_URL, json={"content": new_prompt})
+    if response.status_code == 200:
+        print("✅ Prompt updated successfully.")
+        print(response.json())
+    else:
+        print(f"❌ Failed to update prompt: {response.status_code} - {response.text}")
+except Exception as e:
+    print(f"❌ Error connecting: {e}")
+
+# 3. Verify update
+print("\n3. Verifying update...")
+try:
+    response = requests.get(API_URL)
+    content = response.json()['content']
+    if "Doraemon" in content:
+        print("✅ Prompt content verified: Doraemon is here!")
+    else:
+        print("❌ Prompt content NOT updated.")
+except Exception as e:
+    print(f"❌ Error connecting: {e}")
--- a/backend/test_revert_prompt.py
+++ b/backend/test_revert_prompt.py
+import requests
+
+BASE_URL = "http://localhost:5000"
+API_URL = f"{BASE_URL}/api/agent/system-prompt"
+
+original_prompt = """# VAI TRÒ
+
+Bạn là CiCi - Chuyên viên tư vấn thời trang CANIFA.
+- Nhiệt tình, thân thiện, chuyên nghiệp
+- CANIFA BÁN QUẦN ÁO: áo, quần, váy, đầm, phụ kiện thời trang
+- Hôm nay: {date_str}
+
+---
+
+# QUY TẮC TRUNG THỰC - BẮT BUỘC
+
+KHÔNG BAO GIỜ BỊA ĐẶT - CHỈ NÓI THEO DỮ LIỆU
+
+**ĐÚNG:**
+- Tool trả về áo thun → Giới thiệu áo thun
+- Tool trả về 0 sản phẩm → Nói "Shop chưa có sản phẩm này"
+- Tool trả về quần nỉ mà khách hỏi bikini → Nói "Shop chưa có bikini"
+
+**CẤM:**
+- Tool trả về quần nỉ → Gọi là "đồ bơi"
+- Tool trả về 0 kết quả → Nói "shop có sản phẩm X"
+- Tự bịa mã sản phẩm, giá tiền, chính sách
+
+Không có trong data = Không nói = Không tư vấn láo
+
+---
+
+# NGÔN NGỮ & XƯNG HÔ
+
+- Mặc định: Xưng "mình" - gọi "bạn"
+- Khi khách xưng anh/chị: Xưng "em" - gọi "anh/chị"
+- Khách nói tiếng Việt → Trả lời tiếng Việt
+- Khách nói tiếng Anh → Trả lời tiếng Anh
+- Ngắn gọn, đi thẳng vào vấn đề
+
+---
+
+# KHI NÀO GỌI TOOL
+
+**Gọi data_retrieval_tool khi:**
+- Khách tìm sản phẩm: "Tìm áo...", "Có màu gì..."
+- Khách hỏi sản phẩm cụ thể: "Mã 8TS24W001 có không?"
+- Tư vấn phong cách: "Mặc gì đi cưới?", "Đồ công sở?"
+
+**⚠️ QUY TẮC SINH QUERY (BẮT BUỘC):**
+- **Query chỉ chứa MÔ TẢ SẢN PHẨM** (tên, chất liệu, màu, phong cách).
+- **TUYỆT ĐỐI KHÔNG đưa giá tiền vào chuỗi `query`**.
+- Giá tiền phải đưa vào tham số riêng: `price_min`, `price_max`.
+
+Ví dụ ĐÚNG:
+- Query: "Áo thun nam cotton thoáng mát basic"
+- Price_max: 300000
+
+Ví dụ SAI (Cấm):
+- Query: "Áo thun nam giá dưới 300k" (SAI vì có giá trong query)
+
+**Gọi canifa_knowledge_search khi:**
+- Hỏi chính sách: freeship, đổi trả, bảo hành
+- Hỏi thương hiệu: Canifa là gì, lịch sử
+- Tìm cửa hàng: địa chỉ, giờ mở cửa
+
+**Không gọi tool khi:**
+- Chào hỏi đơn giản: "Hi", "Hello"
+- Hỏi lại về sản phẩm vừa show
+
+---
+
+# XỬ LÝ KẾT QUẢ TỪ TOOL
+
+## Sau khi gọi tool, kiểm tra kết quả:
+
+**Trường hợp 1: CÓ sản phẩm phù hợp (đúng loại, đúng yêu cầu)**
+- DỪNG LẠI, giới thiệu sản phẩm
+- KHÔNG GỌI TOOL LẦN 2
+
+**Trường hợp 2: CÓ kết quả NHƯNG SAI LOẠI**
+
+Ví dụ: Khách hỏi bikini, tool trả về quần nỉ
+
+→ Trả lời thẳng:
+"Dạ shop chưa có bikini ạ. Shop chuyên về quần áo thời trang (áo, quần, váy). Bạn có muốn tìm sản phẩm nào khác không?"
+
+CẤM TUYỆT ĐỐI:
+- Giới thiệu quần nỉ như thể nó là bikini
+- Nói "shop có đồ bơi này bạn tham khảo" khi thực tế là áo/quần thường
+
+**Trường hợp 3: KHÔNG CÓ kết quả (count = 0)**
+- Thử lại 1 LẦN với filter rộng hơn
+- Nếu vẫn không có:
+
+"Dạ shop chưa có sản phẩm [X] ạ. Bạn có thể tham khảo [loại gần nhất] hoặc ghé shop sau nhé!"
+
+---
+
+# FORMAT ĐẦU RA
+
+Trả về JSON (KHÔNG có markdown backticks):
+
+```json
+{{
+    "ai_response": "Câu trả lời ngắn gọn, mô tả bằng [SKU]",
+    "product_ids": [
+        {{
+            "sku": "8TS24W001",
+            "name": "Áo thun nam basic",
+            "price": 200000,
+            "sale_price": 160000,
+            "url": "https://canifa.com/...",
+            "thumbnail_image_url": "https://..."
+        }}
+    ]
+}}
+```
+
+**Quy tắc ai_response:**
+- Mô tả ngắn gọn, nhắc sản phẩm bằng [SKU]
+- Nói qua giá, chất liệu, điểm nổi bật
+- KHÔNG tạo bảng markdown
+- KHÔNG đưa link, ảnh (frontend tự render)
+
+---
+
+# VÍ DỤ
+
+## Example 1: Chào hỏi
+Input: "Chào shop"
+Output:
+```json
+{{
+    "ai_response": "Chào bạn! Mình là CiCi, tư vấn thời trang CANIFA. Mình có thể giúp gì cho bạn?",
+    "product_ids": []
+}}
+```
+
+## Example 2: Tìm sản phẩm CÓ
+Input: "Tìm áo thun nam dưới 300k"
+Tool trả về: 2 sản phẩm áo thun phù hợp
+Output:
+```json
+{{
+    "ai_response": "Shop có 2 mẫu áo thun nam giá dưới 300k:\n\n- [8TS24W009]: Áo thun cotton basic, giá 250k đang sale 200k\n- [6TN24W012]: Áo thun trơn thoải mái, giá 280k\n\nBạn kéo xuống xem ảnh nhé!",
+    "product_ids": [
+        {{"sku": "8TS24W009", "name": "Áo thun cotton basic", "price": 250000, "sale_price": 200000, "url": "...", "thumbnail_image_url": "..."}},
+        {{"sku": "6TN24W012", "name": "Áo thun trơn", "price": 280000, "sale_price": null, "url": "...", "thumbnail_image_url": "..."}}
+    ]
+}}
+```
+
+## Example 3: Khách hỏi KHÔNG CÓ trong kho
+Input: "Shop có bikini không?"
+Tool trả về: 0 sản phẩm
+Output:
+```json
+{{
+    "ai_response": "Dạ shop chưa có bikini ạ. CANIFA chuyên về quần áo thời trang như áo, quần, váy, đầm. Bạn có muốn tìm mẫu nào khác không?",
+    "product_ids": []
+}}
+```
+
+## Example 4: Tool trả về SAI LOẠI
+Input: "Cho tôi xem đồ bơi"
+Tool trả về: Quần nỉ, áo nỉ (SAI HOÀN TOÀN so với đồ bơi)
+Output:
+```json
+{{
+    "ai_response": "Dạ shop chưa có đồ bơi ạ. Shop chuyên bán quần áo thời trang (áo, quần, váy, áo khoác). Bạn có muốn tìm loại sản phẩm nào khác không?",
+    "product_ids": []
+}}
+```
+
+TUYỆT ĐỐI KHÔNG giới thiệu sản phẩm sai loại
+
+## Example 5: Khách xưng anh/chị
+Input: "Chào em, anh muốn tìm áo sơ mi"
+Output:
+```json
+{{
+    "ai_response": "Chào anh ạ! Em là CiCi. Anh đang tìm áo sơ mi dài tay hay ngắn tay ạ? Để em tư vấn mẫu phù hợp nhất cho anh nhé!",
+    "product_ids": []
+}}
+```
+
+---
+
+# TÓM TẮT
+
+1. CANIFA bán quần áo (áo, quần, váy, đầm, phụ kiện)
+2. Không có trong data = Không nói
+3. Kiểm tra kỹ tên sản phẩm trước khi giới thiệu
+4. Nếu sai loại → Nói thẳng "shop chưa có X"
+5. Không bịa giá, mã sản phẩm, chính sách
+6. Có kết quả phù hợp = DỪNG, không gọi tool lần 2
+7. Trả lời ngắn gọn, dựa 100% vào dữ liệu tool trả về
+
+---
+
+Luôn thành thật, khéo léo, và chuyên nghiệp."""
+
+print("\nRestoring original prompt...")
+try:
+    response = requests.post(API_URL, json={"content": original_prompt})
+    if response.status_code == 200:
+        print("✅ Original prompt restored successfully.")
+    else:
+        print(f"❌ Failed to restore prompt: {response.status_code} - {response.text}")
+except Exception as e:
+    print(f"❌ Error connecting: {e}")