Update backend agent, API routes, frontend components, docker config & .gitignore

2fec4891 · root · fe844925 · 2fec4891 · 2fec4891 · 2fec4891
Commit 2fec4891 authored Mar 01, 2026 by root
39 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -69,3 +69,20 @@ run.txt

 # Ralph config (may contain API key)
 .cursor/ralph-config.json
+
+# Docker cache
+.docker/
+
+# Environment
+.env
+
+# Frontend dist probes
+frontend/dist-*/
+
+# Temp configs
+frontend/vite.probe*.config.mjs
+frontend/vite.temp*.mjs
+frontend/vite.trace*.config.mjs
+frontend/vite.no-mermaid.config.mjs
+frontend/src/__build_probe*.ts
+frontend/src/__probe_*.ts
--- a/backend/agent/controller.py
+++ b/backend/agent/controller.py
@@ -3,6 +3,7 @@ Fashion Q&A Agent Controller
 Langfuse will auto-trace via LangChain integration (no code changes needed).
 """

+import json
 import logging
 import time
 import uuid
@@ -78,19 +79,31 @@ async def chat_controller(
    memory = await get_conversation_manager()

    # Load History
-    history_dicts = await memory.get_chat_history(effective_identity_key, limit=20)
-    history_messages: list[BaseMessage] = [
-        HumanMessage(content=m["message"]) if m["is_human"] else AIMessage(content=m["message"])
-        for m in history_dicts
-    ]
+    history_dicts = await memory.get_chat_history(effective_identity_key, limit=5)
+    history_messages: list[BaseMessage] = []
+    for m in history_dicts:
+        if m["is_human"]:
+            history_messages.append(HumanMessage(content=m["message"]))
+        else:
+            # AI responses may be saved as JSON — extract readable text
+            ai_content = m["message"]
+            try:
+                parsed = json.loads(ai_content)
+                if isinstance(parsed, dict) and "ai_response" in parsed:
+                    ai_content = parsed["ai_response"]
+            except (json.JSONDecodeError, TypeError):
+                pass
+            history_messages.append(AIMessage(content=ai_content))

    # Prepare State
+    # - history: previous conversation messages (for context)
+    # - messages: starts with current query only (tool calls will be appended by LangGraph)
+    # - user_query: current user message
    user_query_message: BaseMessage = HumanMessage(content=query)
-    messages_with_query: list[BaseMessage] = [*history_messages, user_query_message]

    initial_state: AgentState = {
        "user_query": user_query_message,
-        "messages": messages_with_query,
+        "messages": [user_query_message],
        "history": history_messages,
        "user_id": user_id,
        "images_embedding": [],
@@ -124,7 +137,9 @@ async def chat_controller(
    # Parse Response
    all_product_ids = extract_product_ids(result.get("messages", []))
    ai_raw_content = result.get("ai_response").content if result.get("ai_response") else ""
+    logger.info("RAW LLM output (%d chars): %s", len(ai_raw_content), ai_raw_content[:500])
    ai_text_response, final_product_ids = parse_ai_response(ai_raw_content, all_product_ids)
+    logger.info("PARSED ai_response (%d chars): %s", len(ai_text_response), ai_text_response[:300])

    response_payload = {
        "ai_response": ai_text_response,
@@ -153,3 +168,157 @@ async def chat_controller(
    logger.info("chat_controller finished in %.2fs", duration)
    return {**response_payload, "cached": False}

+
+async def chat_controller_stream(
+    query: str,
+    user_id: str,
+    model_name: str = DEFAULT_MODEL,
+    images: list[str] | None = None,
+    identity_key: str | None = None,
+):
+    """
+    Streaming controller — yields SSE events with token chunks.
+    
+    Each yield is a JSON string:
+      {"token": "partial text"}       — during streaming
+      {"done": true, "ai_response": "full text"}  — final event
+    
+    History is saved after stream completes.
+    """
+    effective_identity_key = identity_key or user_id
+    
+    logger.info(
+        "chat_controller_stream start: model=%s, user_id=%s",
+        model_name, user_id,
+    )
+
+    # ====================== CACHE LAYER ======================
+    if REDIS_CACHE_TURN_ON:
+        cached_response = await redis_cache.get_response(
+            user_id=effective_identity_key, query=query
+        )
+        if cached_response:
+            logger.info(f"⚡ CACHE HIT (stream) for identity_key={effective_identity_key}")
+            # Stream cached response as one chunk
+            ai_text = cached_response.get("ai_response", "")
+            yield json.dumps({"token": ai_text}, ensure_ascii=False)
+            yield json.dumps({"done": True, "ai_response": ai_text}, ensure_ascii=False)
+            # Save history
+            memory = await get_conversation_manager()
+            await handle_post_chat_async(
+                memory=memory,
+                identity_key=effective_identity_key,
+                human_query=query,
+                ai_response=cached_response,
+            )
+            return
+
+    # ====================== STREAM LLM FLOW ======================
+    graph = build_graph()
+    memory = await get_conversation_manager()
+
+    # Load History
+    history_dicts = await memory.get_chat_history(effective_identity_key, limit=5)
+    history_messages: list[BaseMessage] = []
+    for m in history_dicts:
+        if m["is_human"]:
+            history_messages.append(HumanMessage(content=m["message"]))
+        else:
+            ai_content = m["message"]
+            try:
+                parsed = json.loads(ai_content)
+                if isinstance(parsed, dict) and "ai_response" in parsed:
+                    ai_content = parsed["ai_response"]
+            except (json.JSONDecodeError, TypeError):
+                pass
+            history_messages.append(AIMessage(content=ai_content))
+
+    user_query_message: BaseMessage = HumanMessage(content=query)
+
+    initial_state: AgentState = {
+        "user_query": user_query_message,
+        "messages": [user_query_message],
+        "history": history_messages,
+        "user_id": user_id,
+        "images_embedding": [],
+        "ai_response": None,
+    }
+
+    run_uuid = uuid.uuid4()
+    run_id_str = str(run_uuid)
+    langfuse_handler = get_callback_handler()
+
+    exec_config = RunnableConfig(
+        configurable={
+            "user_id": user_id,
+            "transient_images": images or [],
+            "run_id": run_id_str,
+        },
+        run_id=run_uuid,
+        metadata={"run_id": run_id_str, "tags": "chatbot,production,stream"},
+        callbacks=[langfuse_handler] if langfuse_handler else [],
+    )
+
+    # Stream using astream_events
+    start_time = time.time()
+    full_response = ""
+    is_final_response = False
+
+    session_id = f"{user_id}-{run_id_str[:8]}"
+    
+    with propagate_attributes(user_id=user_id, session_id=session_id):
+        async for event in graph.astream_events(
+            initial_state, config=exec_config, version="v2"
+        ):
+            kind = event.get("event", "")
+            
+            # Only stream tokens from the chat model (not tool calls)
+            if kind == "on_chat_model_stream":
+                chunk = event.get("data", {}).get("chunk")
+                if chunk and hasattr(chunk, "content") and chunk.content:
+                    # Skip if this is a tool_call chunk (no text content)
+                    if hasattr(chunk, "tool_calls") and chunk.tool_calls:
+                        continue
+                    # Only stream the FINAL response (after tool execution)
+                    # We detect this by tracking: if tools were called, 
+                    # the final agent response comes after tool results
+                    token = chunk.content
+                    full_response += token
+                    is_final_response = True
+                    yield json.dumps({"token": token}, ensure_ascii=False)
+
+            # When the agent finishes and we got tokens, prepare to end
+            elif kind == "on_chain_end" and event.get("name") == "agent":
+                if is_final_response:
+                    # Reset for potential next agent iteration
+                    pass
+
+    duration = time.time() - start_time
+    logger.info("chat_controller_stream finished in %.2fs (%d chars)", duration, len(full_response))
+
+    # Parse and yield final event
+    ai_text_response, _ = parse_ai_response(full_response, [])
+    yield json.dumps({"done": True, "ai_response": ai_text_response}, ensure_ascii=False)
+
+    # Build response payload for caching and history
+    response_payload = {
+        "ai_response": ai_text_response,
+        "product_ids": [],
+    }
+
+    # Save to cache
+    if REDIS_CACHE_TURN_ON:
+        await redis_cache.set_response(
+            user_id=effective_identity_key,
+            query=query,
+            response_data=response_payload,
+            ttl=300,
+        )
+
+    # Save to history
+    await handle_post_chat_async(
+        memory=memory,
+        identity_key=effective_identity_key,
+        human_query=query,
+        ai_response=response_payload,
+    )
--- a/backend/agent/graph.py
+++ b/backend/agent/graph.py
@@ -46,29 +46,31 @@ class CANIFAGraph:
        self.retrieval_tools = self.all_tools

        self.llm_with_tools = self.llm.bind_tools(self.all_tools, strict=True)
-        self.system_prompt = get_system_prompt()
-        self.prompt_template = ChatPromptTemplate.from_messages(
+        # NOTE: prompt is NOT cached here — fetched fresh each request
+        # so Langfuse updates take effect immediately.
+        self.cache = InMemoryCache()
+
+    def _build_chain(self):
+        """Build chain with fresh system prompt (from Langfuse or local fallback)."""
+        system_prompt = get_system_prompt()
+        prompt_template = ChatPromptTemplate.from_messages(
            [
-                ("system", self.system_prompt),
+                ("system", system_prompt),
                MessagesPlaceholder(variable_name="history"),
                MessagesPlaceholder(variable_name="user_query"),
                MessagesPlaceholder(variable_name="messages"),
            ]
        )
-        self.chain = self.prompt_template | self.llm_with_tools
-        self.cache = InMemoryCache()
+        return prompt_template | self.llm_with_tools

    async def _agent_node(self, state: AgentState, config: RunnableConfig) -> dict:
-        """Agent node - Chỉ việc đổ dữ liệu riêng vào khuôn đã có sẵn."""
+        """Agent node — rebuilds chain each call for realtime prompt updates."""
        messages = state.get("messages", [])
        history = state.get("history", [])
        user_query = state.get("user_query")

-        transient_images = config.get("configurable", {}).get("transient_images", [])
-        if transient_images and messages:
-            pass
-        # Invoke chain with user_query, history, and messages
-        response = await self.chain.ainvoke({
+        chain = self._build_chain()
+        response = await chain.ainvoke({
            "user_query": [user_query] if user_query else [],
            "history": history,
            "messages": messages

--- a/backend/agent/helper.py
+++ b/backend/agent/helper.py
@@ -10,7 +10,7 @@ import uuid
 from langchain_core.messages import HumanMessage, ToolMessage
 from langchain_core.runnables import RunnableConfig

-from common.conversation_manager import ConversationManager
+from common.conversation_manager import MongoDBConversationManager
 from common.langfuse_client import get_callback_handler
 from .models import AgentState

@@ -137,7 +137,7 @@ def prepare_execution_context(query: str, user_id: str, history: list, images: l


 async def handle_post_chat_async(
-    memory: ConversationManager, 
+    memory: MongoDBConversationManager, 
    identity_key: str, 
    human_query: str, 
    ai_response: dict | None

--- a/backend/agent/prompt.py
+++ b/backend/agent/prompt.py
 """
-CiCi Fashion Consultant - System Prompt
-Version 3.0 - Dynamic from File
+CuCu Assistant - System Prompt
+Supports two modes:
+  1. Langfuse prompt management (realtime, editable from Langfuse dashboard)
+  2. Local fallback (inline template)
 """

-import os
+import logging
 from datetime import datetime
+from functools import lru_cache

-PROMPT_FILE_PATH = os.path.join(os.path.dirname(__file__), "system_prompt.txt")
+from common.timezone_config import VIETNAM_TZ

+logger = logging.getLogger(__name__)

-def _ensure_json_instruction(prompt_text: str) -> str:
-    if "json" in prompt_text.lower():
-        return prompt_text
-    return f"{prompt_text}\n\nReturn JSON (json) object with keys: ai_response, product_ids."
+# Vietnamese weekday names
+_WEEKDAY_MAP = {
+    0: "Thứ 2",
+    1: "Thứ 3",
+    2: "Thứ 4",
+    3: "Thứ 5",
+    4: "Thứ 6",
+    5: "Thứ 7",
+    6: "Chủ nhật",
+}


-def get_system_prompt() -> str:
-    """
-    System prompt for CiCi Fashion Agent.
+def _get_weekday_str() -> str:
+    return _WEEKDAY_MAP[datetime.now(VIETNAM_TZ).weekday()]
+
+# ──────────────────────────── Local template ────────────────────────────
+# This is the SAME prompt pushed to Langfuse via scripts/push_prompt_to_langfuse.py
+# {{date_str}} is the only variable, replaced at runtime.
+
+_PROMPT_TEMPLATE = """# VAI TRÒ
+Bạn là **CuCu Assistant** - Trợ lý quản lý ghi chú cá nhân (Memos).
+- Thông minh, ngắn gọn, đi thẳng vào vấn đề.
+- NHIỆM VỤ DUY NHẤT: Giúp người dùng tìm kiếm và truy vấn lại các ghi chú (memos) họ đã lưu.
+- Hôm nay: {{date_str}} ({{weekday_str}})
+
+---
+
+# QUY TẮC SỬ DỤNG TOOL "memo_retrieval_tool"
+
+## 0. KHI NÀO GỌI TOOL vs KHÔNG GỌI
+
+### KHÔNG gọi tool (chỉ chào lại):
+- Câu CHỈ có lời chào, KHÔNG nhắc gì đến note/ghi chú/chủ đề: "hello", "hi bro", "chào em"
+
+### CÓ gọi tool (ưu tiên tìm kiếm):
+- Câu có nhắc đến **bất kỳ từ khóa nào** liên quan note/ghi chú/chủ đề, DÙ CÓ LỜI CHÀO đi kèm:
+  - "chào em, tao note kafka hôm nào ấy" → GỌI TOOL tìm kafka
+  - "hello, hôm qua tao note gì" → GỌI TOOL tìm theo ngày
+  - "ê bro, tìm note về meeting" → GỌI TOOL tìm meeting
+
+**NGUYÊN TẮC: Nếu câu có chứa từ khóa/chủ đề/topic → LUÔN GỌI TOOL, bỏ qua phần chào hỏi.**
+
+## 1. TỰ TÍNH TOÁN NGÀY THÁNG
+Bạn PHẢI tự tính ngày cụ thể (YYYY-MM-DD) dựa trên "Hôm nay: {{date_str}} ({{weekday_str}})".
+
+Quy ước thứ: Thứ 2 = Monday, Thứ 3 = Tuesday, Thứ 4 = Wednesday, Thứ 5 = Thursday, Thứ 6 = Friday, Thứ 7 = Saturday, Chủ nhật = Sunday.
+
+### CÁC MỐC THỜI GIAN THÔNG DỤNG:
+- "Hôm nay" → `start_date` = `end_date` = {{date_str}}
+- "Hôm qua" → `start_date` = `end_date` = {{date_str}} - 1 ngày
+- "Tuần trước" (không nói ngày cụ thể) → `start_date` = thứ 2 tuần trước, `end_date` = chủ nhật tuần trước
+- "Tuần này" → `start_date` = thứ 2 tuần này, `end_date` = {{date_str}}
+- "Tháng này" → `start_date` = ngày đầu tháng, `end_date` = {{date_str}}
+- "Năm nay" → `start_date` = ngày 1/1, `end_date` = {{date_str}}
+
+### CỰC KỲ QUAN TRỌNG — "THỨ X TUẦN TRƯỚC/NÀY" = ĐÚNG 1 NGÀY:
+Khi user nói "thứ X tuần trước" hoặc "thứ X tuần này", tính ra ĐÚNG 1 NGÀY cụ thể:
+- `start_date` = `end_date` = ngày đó (YYYY-MM-DD)
+- Ví dụ: Nếu hôm nay là 2026-02-25 (Thứ 4), thì:
+  - "thứ 5 tuần trước" → 2026-02-19 (chỉ 1 ngày!)
+  - "thứ 2 tuần này" → 2026-02-23 (chỉ 1 ngày!)
+  - "thứ 6 tuần trước" → 2026-02-20 (chỉ 1 ngày!)
+- **KHÔNG ĐƯỢC dùng range cả tuần** — user hỏi đúng 1 ngày thì trả đúng 1 ngày!
+
+### KHI USER HỎI "HÔM NÀO / NGÀY NÀO / BAO GIỜ":
+- "Kafka note hôm nào ấy nhỉ?" = User ĐANG HỎI ngày → tìm ALL dates
+- "Tao note cái đó khi nào?" = User ĐANG HỎI ngày → tìm ALL dates
+- → Dùng range rộng: `start_date` = "2020-01-01", `end_date` = {{date_str}}
+- **KHÔNG ĐƯỢC HỎI LẠI "ngày nào?"** — vì user đang nhờ bot tìm ngày!
+- → **CHỈ TRẢ VỀ NGÀY**, ví dụ: "Bạn note cái đó vào ngày **2026-02-05** (Thứ 5) nhé!"
+- **KHÔNG cần trích dẫn toàn bộ nội dung** khi user hỏi "hôm nào/khi nào" — user chỉ cần biết NGÀY.
+- Nếu tìm thấy nhiều memo khớp, liệt kê ngày của từng memo.
+
+### KHI KHÔNG NHẮC THỜI GIAN:
+- "Tìm note về X" → range rộng: `start_date` = "2020-01-01", `end_date` = {{date_str}}
+
+## 2. PHÂN TÍCH PARAMETERS
+
+### NGUYÊN TẮC: CHỈ THÊM PARAMETER KHI USER NÓI RÕ
+- **`content_search`**: Khi user nhắc từ khóa: "về Kafka", "pass wifi", "meeting"
+- **`tag`**: Khi user nhắc tag: "#work", "#idea"
+- **KHÔNG THÊM** content_search/tag nếu user chỉ hỏi theo ngày

-    Returns:
-        str: System prompt with the current date.
+### VÍ DỤ:
+- "Hôm qua note gì?" → ✅ date only
+- "Note kafka hôm nào?" → ✅ `content_search="kafka"` + range rộng
+- "Tìm note #work tuần này" → ✅ `tag="work"` + date tuần này
+
+## 3. KHI NÀO HỎI LẠI USER
+- **CHỈ hỏi lại khi THẬT SỰ không có thông tin gì**: "Tìm note", "Tìm cái đó"
+- **KHÔNG HỎI LẠI** nếu có bất kỳ keyword nào: "note kafka hôm nào" → đủ rồi, GỌI TOOL
+- **KHÔNG BAO GIỜ hỏi lại ngày** nếu user đang hỏi "hôm nào/khi nào" → dùng range rộng
+
+---
+
+# QUY TẮC TRẢ LỜI (CỰC KỲ QUAN TRỌNG)
+
+1. **NGẮN GỌN DƯỚI 100 TỪ**: Trả lời súc tích, đi thẳng vấn đề. KHÔNG dài dòng.
+2. **TÓM TẮT NỘI DUNG**: Mỗi memo chỉ hiển thị **tóm tắt ngắn gọn** (tối đa 15 từ), KHÔNG trích dẫn toàn bộ nội dung.
+3. **FORMAT**: 
+   - **📝 (YYYY-MM-DD):** [tóm tắt ngắn gọn nội dung]
+   - Nếu nhiều memo, liệt kê dạng danh sách bullet
+4. **KHÔNG BỊA ĐẶT**: Không tự chế nội dung.
+5. **NGÔN NGỮ**: Thân thiện, tự nhiên, như nói chuyện với bạn.
+6. Nếu count=0: "Không tìm thấy ghi chú nào 🤷"
+7. **Trả lời bằng text thuần/markdown**, KHÔNG wrap JSON.
+8. **CHỈ HIỂN THỊ ĐẦY ĐỦ** khi user yêu cầu rõ: "cho xem chi tiết", "đọc full nội dung"."""
+
+
+def get_system_prompt_template() -> str:
+    """Return the raw prompt template with {{date_str}} placeholder.
+    Used by the push script to upload to Langfuse.
    """
-    date_str = datetime.now().strftime("%d/%m/%Y")
+    return _PROMPT_TEMPLATE
+

+def _fetch_langfuse_prompt() -> str | None:
+    """
+    Try to fetch the latest prompt from Langfuse.
+    Returns the compiled prompt string, or None if unavailable.
+    Uses Langfuse's built-in caching (default TTL=60s).
+    """
    try:
-        if os.path.exists(PROMPT_FILE_PATH):
-            with open(PROMPT_FILE_PATH, "r", encoding="utf-8") as handle:
-                prompt_template = handle.read()
-            rendered = prompt_template.replace("{date_str}", date_str)
-            return _ensure_json_instruction(rendered)
-    except Exception as exc:
-        print(f"Error reading system prompt file: {exc}")
-
-    fallback = f"""# ROLE
-You are CiCi, a CANIFA fashion assistant.
-Today: {date_str}
-
-Never fabricate. Keep responses concise.
-"""
-    return _ensure_json_instruction(fallback)
+        from common.langfuse_client import get_langfuse_client
+        client = get_langfuse_client()
+        if not client:
+            return None
+
+        prompt = client.get_prompt(
+            name="cucu-system-prompt",
+            label="production",
+            cache_ttl_seconds=60,  # Re-fetch every 60s
+        )
+        date_str = datetime.now(VIETNAM_TZ).strftime("%Y-%m-%d")
+        weekday_str = _get_weekday_str()
+        compiled = prompt.compile(date_str=date_str, weekday_str=weekday_str)
+        logger.info("✅ Prompt fetched from Langfuse (version=%s)", prompt.version)
+        return compiled
+    except Exception as e:
+        logger.warning("⚠️ Langfuse prompt fetch failed: %s — using local fallback", e)
+        return None
+
+
+def get_system_prompt() -> str:
+    """
+    Get the system prompt. Priority:
+      1. Langfuse prompt management (realtime, editable)
+      2. Local fallback template
+    """
+    # Try Langfuse first
+    langfuse_prompt = _fetch_langfuse_prompt()
+    if langfuse_prompt:
+        return langfuse_prompt
+
+    # Fallback to local template
+    date_str = datetime.now(VIETNAM_TZ).strftime("%Y-%m-%d")
+    weekday_str = _get_weekday_str()
+    prompt = _PROMPT_TEMPLATE.replace("{{date_str}}", date_str).replace("{{weekday_str}}", weekday_str)
+    logger.info("📝 Using local prompt fallback (date=%s, weekday=%s)", date_str, weekday_str)
+    return prompt
--- a/backend/agent/system_prompt.txt
+++ b/backend/agent/system_prompt.txt
-# VAI TRÒ
-Bạn là **CuCu Assistant** - Trợ lý quản lý ghi chú cá nhân (Memos).
- Thông minh, ngắn gọn, đi thẳng vào vấn đề.
- NHIỆM VỤ DUY NHẤT: Giúp người dùng tìm kiếm và truy vấn lại các ghi chú (memos) họ đã lưu.
- Hôm nay: {date_str}
-
---
-
-# QUY TẮC SỬ DỤNG TOOL "memo_retrieval_tool"
-
-Bạn chỉ có 1 tool duy nhất là `memo_retrieval_tool`. Hãy sử dụng nó thông minh.
-
-## 0. KHI NÀO **KHÔNG** ĐƯỢC GỌI TOOL (QUAN TRỌNG)
- Nếu user **chỉ chào hỏi / small talk** (VD: "chào em", "hello", "hi bro", "alo"), hãy:
-  - Trả lời lại một câu chào ngắn gọn, KHÔNG gọi `memo_retrieval_tool`.
- Chỉ gọi tool khi user hỏi RÕ về **ghi chú / note / ngày / nội dung / tag**.
- Không được "đoán" là user đang hỏi về Kafka, work, v.v. nếu câu hiện tại **chỉ là lời chào**.
- Không dùng **câu hỏi cũ** để tự ý gọi tool cho câu mới nếu câu mới chỉ là lời chào.
-
-## 1. TỰ TÍNH TOÁN NGÀY THÁNG (QUAN TRỌNG)
-Người dùng sẽ hỏi ngày tương đối (hôm qua, tuần trước...). Bạn PHẢI tự tính ra ngày cụ thể (YYYY-MM-DD) dựa trên "Hôm nay: {date_str}".
-
- **"Hôm nay note gì?"**
-  → `start_date` = {date_str}, `end_date` = {date_str}
-
- **"Hôm qua note gì?"**
-  → `start_date` = {date_str} - 1 ngày
-
- **"Hôm kia note gì?"**
-  → `start_date` = {date_str} - 2 ngày
-
- **"Tuần trước note gì?"**
-  → `start_date` = {date_str} - 7 ngày, `end_date` = {date_str} (hoặc range cụ thể của tuần trước)
-
- **"Tháng 1 note gì?"**
-  → `start_date` = "2026-01-01", `end_date` = "2026-01-31"
-
-## 2. PHÂN TÍCH PARAMETERS
- **`content_search`**: Dùng khi user hỏi về nội dung (VD: "dự án A", "pass wifi", "số điện thoại"). Dùng Regex nên hãy chọn keyword đặc trưng.
- **`tag`**: Dùng khi user nhắc đến tag/chủ đề (VD: "#work", "#idea"). Chỉ điền nếu user KHẲNG ĐỊNH là tag.
-
-### 2.1. XỬ LÝ "TOPIC" THÀNH TAG / CONTENT_SEARCH
- Nếu user nhắc đến **một chủ đề ngắn gọn** (VD: "Kafka", "English", "health") nhưng **không có dấu #**:
-  - Hãy coi đó là một **topic**.
-  - Nếu topic là **một từ đơn, không có khoảng trắng** (VD: "Kafka", "work"):
-    - ƯU TIÊN map thành `tag` (VD: `tag="kafka"` hoặc `tag="work"` — KHÔNG cần dấu `#`).
-  - Nếu topic là **cụm từ dài** (VD: "Kafka performance", "meeting tuần trước"):
-    - Map thành `content_search` (VD: `content_search="Kafka performance"`).
- Nếu không chắc đó là tag hay chỉ là từ khóa nội dung:
-  - Có thể điền **cả hai**:
-    - `tag="kafka"`
-    - `content_search="Kafka"`
-  - Khi đó MongoDB sẽ lọc theo tag (nếu note có tag) và/hoặc nội dung có chứa keyword.
-
-## 3. VÍ DỤ GỌI TOOL
-
-**Case 1: Hỏi theo ngày**
-*Input: "Hôm qua tao có note gì không?" (Giả sử hôm nay 2026-01-24)*
-→ Bot tính: Hôm qua = 2026-01-23
-→ Tool call: `memo_retrieval_tool(start_date="2026-01-23")`
-
-**Case 2: Tìm nội dung + Ngày**
-*Input: "Tuần này tao note gì về 'meeting'?" (Hôm nay 2026-01-24)*
-→ Bot tính: Tuần này ~ 2026-01-19 đến 2026-01-25
-→ Tool call:
-```python
-memo_retrieval_tool(
-    start_date="2026-01-19",
-    end_date="2026-01-25",
-    content_search="meeting"
-)
-```
-
-**Case 3: Tìm theo tag**
-*Input: "Tìm mấy cái note #idea tháng trước"*
-→ Tool call:
-```python
-memo_retrieval_tool(
-    start_date="2025-12-01",
-    end_date="2025-12-31",
-    tag="idea"
-)
-```
-
-**Case 4: Tìm nội dung chung chung (Không rõ ngày)**
-*Input: "Tìm lại pass wifi"*
-→ Bot tự chọn range rộng hoặc không giới hạn (tùy tool support, ở đây tool bắt buộc start_date thì lấy ngày xa xưa hoặc 1 tháng gần nhất tùy ngữ cảnh, hoặc hỏi lại user. NHƯNG tốt nhất cứ search 1 năm gần đây).
-→ Tool call: `memo_retrieval_tool(start_date="2025-01-01", content_search="pass wifi")`
-
---
-
-# QUY TẮC TRẢ LỜI (RESPONSE)
-
-1. **DỰA TRÊN KẾT QUẢ TOOL**:
-   - Nếu có memos: Liệt kê ngắn gọn, trích dẫn nội dung chính.
-   - Nếu `count` = 0: Trả lời "Không tìm thấy ghi chú nào trong khoảng thời gian này/với từ khóa này."
-
-2. **KHÔNG BỊA ĐẶT**: Không tự chế ra nội dung memo không có trong data.
-
-3. **FORMAT MENU**:
-   - Ghi chú 1 (2026-01-24): [Nội dung tóm tắt]
-   - Ghi chú 2 (2026-01-23): [Nội dung tóm tắt]
-
-4. **NGÔN NGỮ**: Giao tiếp tự nhiên, thân thiện (bro-style nếu user thích, hoặc lịch sự mặc định).
-
---
-
-# FORMAT ĐẦU RA (JSON)
-Bot trả lời dưới dạng JSON (để Frontend render hoặc parse):
-
-```json
-{{
-    "ai_response": "Đây là các ghi chú mình tìm thấy hôm qua...",
-    "found_memos": [
-        {{
-            "id": "...",
-            "content": "...",
-            "created_at": "..."
-        }}
-    ]
-}}
-```
-*Lưu ý: Nếu tool trả về data, hãy tóm tắt vào `ai_response` và dán raw data vào `found_memos` nếu cần.*
\ No newline at end of file
--- a/backend/agent/tools/memo_retrieval_tool.py
+++ b/backend/agent/tools/memo_retrieval_tool.py
@@ -16,6 +16,8 @@ import os
 from datetime import datetime, time, timedelta, timezone
 from typing import Optional, Any

+from langchain_core.runnables import RunnableConfig
+
 from langchain_core.tools import tool
 from langchain_openai import OpenAIEmbeddings
 from pydantic import SecretStr
@@ -26,6 +28,40 @@ from common.mongo_client import mongodb_client, serialize_doc

 logger = logging.getLogger(__name__)

+# ---- Cached singletons to avoid re-init per request ----
+_embedder: OpenAIEmbeddings | None = None
+_cached_embeddings: list[dict] | None = None
+_cache_timestamp: float = 0
+_CACHE_TTL = 300  # 5 minutes
+
+
+def _get_embedder() -> OpenAIEmbeddings | None:
+    global _embedder
+    if _embedder is not None:
+        return _embedder
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        return None
+    _embedder = OpenAIEmbeddings(
+        model="text-embedding-3-small",
+        api_key=SecretStr(api_key),
+    )
+    return _embedder
+
+
+async def _get_cached_embeddings() -> list[dict]:
+    """Load embeddings from MongoDB with in-memory cache."""
+    import time as _time
+    global _cached_embeddings, _cache_timestamp
+    now = _time.time()
+    if _cached_embeddings is not None and (now - _cache_timestamp) < _CACHE_TTL:
+        return _cached_embeddings
+    cursor = mongodb_client.memo_embeddings.find({})
+    _cached_embeddings = await cursor.to_list(length=5000)
+    _cache_timestamp = now
+    logger.info("Refreshed embeddings cache: %d docs", len(_cached_embeddings))
+    return _cached_embeddings
+

 @tool
 async def memo_retrieval_tool(
@@ -33,6 +69,8 @@ async def memo_retrieval_tool(
    end_date: Optional[str] = None,
    content_search: Optional[str] = None,
    tag: Optional[str] = None,
+    *,
+    config: RunnableConfig,
 ) -> str:
    """
    Truy vấn các memo từ database MongoDB (cuccu_memos).
@@ -47,156 +85,127 @@ async def memo_retrieval_tool(
        JSON string chứa danh sách memo tìm được.
    """
    try:
+        # Extract user_id from RunnableConfig to filter by creator
+        user_id = (config.get("configurable") or {}).get("user_id")
        logger.info(
-            "memo_retrieval_tool started: start=%s, end=%s, content=%s, tag=%s",
-            start_date, end_date, content_search, tag
+            "memo_retrieval_tool started: user_id=%s, start=%s, end=%s, content=%s, tag=%s",
+            user_id, start_date, end_date, content_search, tag
        )

-        # 1. Build Date Filter
-        query: dict[str, Any] = {}
-        
+        # ──────────────────────────────────────────────
+        # Step 1: Parse dates
+        # ──────────────────────────────────────────────
        try:
-            # Parse start_date key
-            dt_start = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
-            
-            if end_date:
-                # Range query: start_date 00:00:00 -> end_date 23:59:59
-                dt_end_str = end_date
-                dt_end = datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=timezone.utc)
-            else:
-                # Single day query: start_date 00:00:00 -> start_date 23:59:59
-                dt_end_str = start_date
-                dt_end = dt_start
-
-            # Tạo mốc thời gian đầu ngày và cuối ngày
-            # Lưu ý: MongoDB lưu UTC. Cần đảm bảo logic query phù hợp.
-            # Ở đây giả sử start_date nhận vào là Local nhưng backend lưu UTC.
-            # Tạm thời query range rộng (start 00:00 -> end 23:59:59)
-            
-            # Helper to get begin of day and end of day
-            begin_of_start_day = datetime.combine(dt_start, time.min).replace(tzinfo=timezone.utc)
-            end_of_end_day = datetime.combine(dt_end, time.max).replace(tzinfo=timezone.utc)
-
-            query["created_at"] = {
-                "$gte": begin_of_start_day,
-                "$lte": end_of_end_day
-            }
-
+            from common.timezone_config import VIETNAM_TZ
+            dt_start = datetime.strptime(start_date, "%Y-%m-%d").replace(tzinfo=VIETNAM_TZ)
+            dt_end = (
+                datetime.strptime(end_date, "%Y-%m-%d").replace(tzinfo=VIETNAM_TZ)
+                if end_date else dt_start
+            )
+            begin_of_start_day = datetime.combine(dt_start, time.min).replace(tzinfo=VIETNAM_TZ).astimezone(timezone.utc)
+            end_of_end_day = datetime.combine(dt_end, time.max).replace(tzinfo=VIETNAM_TZ).astimezone(timezone.utc)
        except ValueError as e:
            return json.dumps({
                "status": "error",
                "message": f"Invalid date format. Use YYYY-MM-DD. Error: {e}"
            }, ensure_ascii=False)

-        # 2. Content Search - Use Vector Search if content_search provided
-        memo_ids_from_vector_search = []
-        if content_search:
-            # Use vector search for semantic understanding
-            try:
-                api_key = os.getenv("OPENAI_API_KEY")
-                if api_key:
-                    logger.info(f"🔍 Using vector search for: '{content_search}'")
-                    embedder = OpenAIEmbeddings(
-                        model="text-embedding-3-small",
-                        api_key=SecretStr(api_key),
-                    )
-                    # Use async embedding to avoid blocking the event loop
-                    query_embedding = await embedder.aembed_query(content_search)
-                    
-                    # Search embeddings collection
-                    import numpy as np
-                    cursor_emb = mongodb_client.memo_embeddings.find({})
-                    emb_docs = await cursor_emb.to_list(length=1000)
-                    
-                    results = []
-                    q = np.array(query_embedding, dtype=float)
-                    
-                    for emb_doc in emb_docs:
-                        emb = emb_doc.get("embedding", [])
-                        if not emb:
-                            continue
-                        
-                        v = np.array(emb, dtype=float)
-                        if v.shape != q.shape:
-                            continue
-                        
-                        # Cosine similarity
-                        denom = np.linalg.norm(q) * np.linalg.norm(v)
-                        sim = float(np.dot(q, v) / denom) if denom != 0 else 0.0
-                        
-                        if sim > 0.5:  # Threshold for relevance
-                            results.append({
-                                "memo_id": emb_doc.get("memo_id"),
-                                "score": sim,
-                            })
-                    
-                    # Sort by similarity score
-                    results.sort(key=lambda r: r["score"], reverse=True)
-                    memo_ids_from_vector_search = [r["memo_id"] for r in results[:20]]
-                    logger.info(f"✅ Vector search found {len(memo_ids_from_vector_search)} memos")
-                else:
-                    logger.warning("⚠️ OPENAI_API_KEY not set, falling back to regex search")
-                    # Fallback to regex if no API key
-                    query["content"] = {"$regex": content_search, "$options": "i"}
-            except Exception as e:
-                logger.warning(f"⚠️ Vector search failed: {e}, falling back to regex")
-                # Fallback to regex on error
-                query["content"] = {"$regex": content_search, "$options": "i"}
-        else:
-            # No content search, use regex fallback if needed
-            pass
-
-        # 3. Tag Filter
+        # ──────────────────────────────────────────────
+        # Step 2: Query MongoDB FIRST (date + tag filter)
+        # → Narrow down memos before any semantic search
+        # ──────────────────────────────────────────────
+        base_query: dict[str, Any] = {
+            "created_at": {
+                "$gte": begin_of_start_day,
+                "$lte": end_of_end_day,
+            }
+        }
+        # Filter by creator_id — only return current user's memos
+        if user_id:
+            base_query["creator_id"] = user_id
        if tag:
-            # Tags trong mongo lưu là array ["tag1", "tag2"]
-            # Query exact match trong array
-            query["payload.tags"] = tag
-
-        # 4. Execute Query
-        if memo_ids_from_vector_search:
-            # Use vector search results + date filter
-            # Convert string IDs to ObjectId
-            object_ids: list[ObjectId] = []
-            for mid in memo_ids_from_vector_search:
-                try:
-                    if ObjectId.is_valid(mid):
-                        object_ids.append(ObjectId(mid))
-                except Exception:  # pragma: no cover - defensive
-                    continue
-
-            if not object_ids:
-                # Vector search said there were matches but all IDs were invalid.
-                # To avoid returning a broad set of memos by date/tag only,
-                # short‑circuit and return an empty result set.
-                logger.warning(
-                    "Vector search returned only invalid memo IDs. "
-                    "Returning empty result instead of broad date/tag query."
-                )
-                empty_payload = {
-                    "status": "success",
-                    "query": {
-                        "start_date": start_date,
-                        "end_date": end_date or start_date,
-                        "content_search": content_search,
-                        "tag": tag,
-                    },
-                    "count": 0,
-                    "memos": [],
-                }
-                return json.dumps(empty_payload, ensure_ascii=False)
-
-            query["_id"] = {"$in": object_ids}
-            logger.info("MongoDB Query (with vector search): %s", query)
+            base_query["payload.tags"] = tag
+
+        # If NO content_search → just run date/tag query directly
+        if not content_search:
+            logger.info("MongoDB Query (date-only): %s", base_query)
+            cursor = mongodb_client.memos.find(base_query).sort("created_at", -1).limit(20)
+            docs = await cursor.to_list(length=20)
+
+        # If content_search with short keywords (≤2 words) → regex on MongoDB
+        elif len(content_search.strip().split()) <= 2 or not _get_embedder():
+            if not _get_embedder():
+                logger.warning("⚠️ OPENAI_API_KEY not set, using regex search")
+            logger.info(f"🔍 Using regex search for: '{content_search}'")
+            base_query["content"] = {"$regex": content_search, "$options": "i"}
+            logger.info("MongoDB Query (date+regex): %s", base_query)
+            cursor = mongodb_client.memos.find(base_query).sort("created_at", -1).limit(20)
+            docs = await cursor.to_list(length=20)
+
+        # If content_search with longer queries → date query FIRST, then vector re-rank
        else:
-            logger.info("MongoDB Query: %s", query)
-        
-        cursor = mongodb_client.memos.find(query).sort("created_at", -1).limit(20)
-        docs = await cursor.to_list(length=20)
-
-        # 5. Format Result
-        memos = []
-        for doc in docs:
-            memos.append(serialize_doc(doc))
+            # Step 2a: Get all memos in date range (+ tag) from MongoDB
+            logger.info("MongoDB Pre-filter Query: %s", base_query)
+            cursor = mongodb_client.memos.find(base_query).sort("created_at", -1).limit(100)
+            candidate_docs = await cursor.to_list(length=100)
+            logger.info(f"📋 Pre-filter found {len(candidate_docs)} memos in date range")
+
+            if not candidate_docs:
+                docs = []
+            else:
+                # Step 2b: Get embeddings ONLY for these candidate memos
+                candidate_ids = [str(doc["_id"]) for doc in candidate_docs]
+                all_embeddings = await _get_cached_embeddings()
+                # Filter embeddings to only candidates
+                candidate_emb_map = {}
+                for emb_doc in all_embeddings:
+                    mid = str(emb_doc.get("memo_id", ""))
+                    if mid in candidate_ids and emb_doc.get("embedding"):
+                        candidate_emb_map[mid] = emb_doc["embedding"]
+
+                logger.info(f"🧠 Found {len(candidate_emb_map)} embeddings for candidates")
+
+                if not candidate_emb_map:
+                    # No embeddings for these memos → fallback to regex
+                    logger.info("No embeddings found, falling back to regex")
+                    base_query["content"] = {"$regex": content_search, "$options": "i"}
+                    cursor = mongodb_client.memos.find(base_query).sort("created_at", -1).limit(20)
+                    docs = await cursor.to_list(length=20)
+                else:
+                    # Step 2c: Semantic re-rank using vector similarity
+                    try:
+                        embedder = _get_embedder()
+                        query_embedding = await embedder.aembed_query(content_search)
+
+                        import numpy as np
+                        q = np.array(query_embedding, dtype=float)
+                        scored: list[tuple[str, float]] = []
+
+                        for mid, emb in candidate_emb_map.items():
+                            v = np.array(emb, dtype=float)
+                            if v.shape != q.shape:
+                                continue
+                            denom = np.linalg.norm(q) * np.linalg.norm(v)
+                            sim = float(np.dot(q, v) / denom) if denom != 0 else 0.0
+                            if sim > 0.4:
+                                scored.append((mid, sim))
+
+                        scored.sort(key=lambda x: x[1], reverse=True)
+                        top_ids = {mid for mid, _ in scored[:20]}
+                        logger.info(f"✅ Semantic re-rank: {len(top_ids)} memos above threshold")
+
+                        # Re-order candidate_docs by semantic score
+                        docs = [d for d in candidate_docs if str(d["_id"]) in top_ids]
+                    except Exception as e:
+                        logger.warning(f"⚠️ Vector search failed: {e}, falling back to regex")
+                        base_query["content"] = {"$regex": content_search, "$options": "i"}
+                        cursor = mongodb_client.memos.find(base_query).sort("created_at", -1).limit(20)
+                        docs = await cursor.to_list(length=20)
+
+        # ──────────────────────────────────────────────
+        # Step 3: Format Result
+        # ──────────────────────────────────────────────
+        memos = [serialize_doc(doc) for doc in docs]

        return json.dumps(
            {
@@ -205,7 +214,7 @@ async def memo_retrieval_tool(
                    "start_date": start_date,
                    "end_date": end_date or start_date,
                    "content_search": content_search,
-                    "tag": tag
+                    "tag": tag,
                },
                "count": len(memos),
                "memos": memos,

--- a/backend/api/chatbot/chat_route.py
+++ b/backend/api/chatbot/chat_route.py
 """
 Chatbot API Route
 -----------------
-`POST /api/agent/chat` - chính là endpoint chat cho CiCi Assistant.
-Logic xử lý nằm ở `agent.controller.chat_controller`.
+`POST /api/agent/chat` - endpoint chat cho CuCu Assistant (non-streaming).
+`POST /api/agent/chat/stream` - endpoint chat SSE streaming.
+Logic xử lý nằm ở `agent.controller`.
 """

 import logging
 from dataclasses import dataclass
+from typing import AsyncGenerator

 from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
+from fastapi.responses import StreamingResponse
 from opentelemetry import trace

-from agent.controller import chat_controller
-from agent.models import QueryRequest
+# Lazy imports - defer heavy AI modules to first use
+# from agent.controller import chat_controller, chat_controller_stream
+# from agent.models import QueryRequest
 from common.message_limit import message_limit_service
 from config import DEFAULT_MODEL

@@ -20,6 +24,26 @@ logger = logging.getLogger(__name__)
 tracer = trace.get_tracer(__name__)
 router = APIRouter()

+# Cache for lazy-loaded modules
+_agent_modules = {}
+
+
+def _get_agent_controller():
+    """Lazy-load agent.controller to defer LangChain/LangGraph import."""
+    if "controller" not in _agent_modules:
+        from agent.controller import chat_controller, chat_controller_stream
+        _agent_modules["controller"] = chat_controller
+        _agent_modules["controller_stream"] = chat_controller_stream
+    return _agent_modules["controller"], _agent_modules["controller_stream"]
+
+
+def _get_query_request_model():
+    """Lazy-load agent.models.QueryRequest."""
+    if "QueryRequest" not in _agent_modules:
+        from agent.models import QueryRequest
+        _agent_modules["QueryRequest"] = QueryRequest
+    return _agent_modules["QueryRequest"]
+

 @dataclass
 class Identity:
@@ -41,7 +65,6 @@ def _get_identity(request: Request) -> Identity:
    history_key = primary_id
    rate_limit_key = primary_id
  else:
-    # Guest: dùng device_id, fallback 'anonymous'
    primary_id = device_id or "anonymous"
    history_key = device_id or "anonymous"
    rate_limit_key = device_id or "anonymous"
@@ -54,38 +77,35 @@ def _get_identity(request: Request) -> Identity:
  )


-@router.post("/api/agent/chat", summary="Chat with CiCi Assistant")
-async def cici_chat(request: Request, req: QueryRequest, background_tasks: BackgroundTasks):
-  """
-  Endpoint chat không stream - trả về response JSON đầy đủ một lần.
+@router.post("/api/agent/chat", summary="Chat with CuCu Assistant")
+async def cucu_chat(request: Request, background_tasks: BackgroundTasks):
+  """Endpoint chat không stream - trả về response JSON đầy đủ một lần."""
+  # Lazy-load AI modules on first call
+  QueryRequest = _get_query_request_model()
+  chat_controller, _ = _get_agent_controller()
+
+  body = await request.json()
+  req = QueryRequest(**body)

-  - Tự lấy user/device từ middleware (`get_user_identity`)
-  - Gọi `chat_controller` để xử lý toàn bộ logic LLM + tools
-  - Tự tăng counter rate limit sau khi trả lời xong
-  """
-  # 1. Xác định identity
  identity = _get_identity(request)
  user_id = identity.primary_id

  logger.info("📥 [Incoming Chat] User=%s | Query=%s", user_id, req.user_query)

-  # Span cho tracing (optional)
  span = trace.get_current_span()
  span.set_attribute("user.id", user_id)
  span.set_attribute("chat.user_query", req.user_query)

  try:
-    # 2. Gọi controller xử lý
    result = await chat_controller(
      query=req.user_query,
      user_id=user_id,
      background_tasks=background_tasks,
      model_name=DEFAULT_MODEL,
      images=req.images,
-      identity_key=identity.history_key,  # Guest: device_id, User: user_id
+      identity_key=identity.history_key,
    )

-    # 3. Tăng usage info (rate limit) sau khi thành công
    usage_info = await message_limit_service.increment(
      identity_key=identity.rate_limit_key,
      is_authenticated=identity.is_authenticated,
@@ -104,7 +124,58 @@ async def cici_chat(request: Request, req: QueryRequest, background_tasks: Backg
      },
    }
  except Exception as e:
-    logger.error("Error in cici_chat: %s", e, exc_info=True)
+    logger.error("Error in cucu_chat: %s", e, exc_info=True)
    raise HTTPException(status_code=500, detail=str(e)) from e


+@router.post("/api/agent/chat/stream", summary="Chat with CuCu Assistant (SSE Streaming)")
+async def cucu_chat_stream(request: Request):
+  """
+  Endpoint chat SSE streaming — trả về token-by-token qua Server-Sent Events.
+
+  SSE format:
+    data: {"token": "partial"}\n\n     — mỗi token chunk
+    data: {"done": true, "ai_response": "full text"}\n\n  — kết thúc
+  """
+  # Lazy-load AI modules on first call
+  QueryRequest = _get_query_request_model()
+  _, chat_controller_stream = _get_agent_controller()
+
+  body = await request.json()
+  req = QueryRequest(**body)
+
+  identity = _get_identity(request)
+  user_id = identity.primary_id
+
+  logger.info("📥 [Incoming Stream] User=%s | Query=%s", user_id, req.user_query)
+
+  async def sse_generator() -> AsyncGenerator[str, None]:
+    try:
+      async for chunk_json in chat_controller_stream(
+        query=req.user_query,
+        user_id=user_id,
+        model_name=DEFAULT_MODEL,
+        images=req.images,
+        identity_key=identity.history_key,
+      ):
+        yield f"data: {chunk_json}\n\n"
+
+      # Increment rate limit after stream completes
+      await message_limit_service.increment(
+        identity_key=identity.rate_limit_key,
+        is_authenticated=identity.is_authenticated,
+      )
+      logger.info("📤 [Stream Done] User=%s", user_id)
+    except Exception as e:
+      logger.error("Error in stream: %s", e, exc_info=True)
+      yield f'data: {{"error": "{str(e)}"}}\n\n'
+
+  return StreamingResponse(
+    sse_generator(),
+    media_type="text/event-stream",
+    headers={
+      "Cache-Control": "no-cache",
+      "Connection": "keep-alive",
+      "X-Accel-Buffering": "no",
+    },
+  )
--- a/backend/api/chatbot/history_route.py
+++ b/backend/api/chatbot/history_route.py
@@ -30,6 +30,14 @@ class ClearHistoryResponse(BaseModel):
  message: str


+@router.get("/api/history/me", summary="Get My Chat History", response_model=ChatHistoryResponse)
+async def get_my_chat_history(request: Request, limit: int | None = 50, before_id: int | None = None):
+  """
+  Shortcut: lấy lịch sử chat của chính mình (identity từ middleware).
+  """
+  return await get_chat_history(request, identity_key="me", limit=limit, before_id=before_id)
+
+
 @router.get("/api/history/{identity_key}", summary="Get Chat History", response_model=ChatHistoryResponse)
 async def get_chat_history(request: Request, identity_key: str, limit: int | None = 50, before_id: int | None = None):
  """
@@ -59,6 +67,30 @@ async def get_chat_history(request: Request, identity_key: str, limit: int | Non
    raise HTTPException(status_code=500, detail="Failed to fetch chat history")


+@router.delete("/api/history/me", summary="Clear My Chat History", response_model=ClearHistoryResponse)
+async def clear_my_chat_history(request: Request):
+  """
+  Shortcut: xóa lịch sử chat của chính mình (identity từ middleware).
+  """
+  try:
+    user_id = getattr(request.state, "user_id", None)
+    device_id = getattr(request.state, "device_id", "") or ""
+    is_authenticated = bool(getattr(request.state, "is_authenticated", False))
+
+    if is_authenticated and user_id:
+      resolved_key = str(user_id)
+    else:
+      resolved_key = device_id or "anonymous"
+
+    manager = await get_conversation_manager()
+    await manager.clear_history(resolved_key)
+    logger.info("✅ Cleared chat history for %s", resolved_key)
+    return {"success": True, "message": f"Đã xóa lịch sử chat"}
+  except Exception as e:
+    logger.error("Error clearing chat history: %s", e, exc_info=True)
+    raise HTTPException(status_code=500, detail="Failed to clear chat history")
+
+
 @router.delete("/api/history/{identity_key}", summary="Clear Chat History", response_model=ClearHistoryResponse)
 async def clear_chat_history(identity_key: str):
  """

--- a/backend/api/memos/memo_routes.py
+++ b/backend/api/memos/memo_routes.py
@@ -55,12 +55,25 @@ async def list_memos(
            filter_str=filter,
        )
        
+        # Parse creator_id from filter string (e.g. "creator_id == user_xxx")
+        creator_id = None
        if filter:
            logger.debug("List memos GET with filter=%r", filter)
+            pattern_creator = r"creator_id\s*==\s*([a-zA-Z0-9_\-\.]+)"
+            match_creator = re.search(pattern_creator, filter)
+            if match_creator:
+                creator_id = match_creator.group(1)
+
+        # Parse pinned from filter string (e.g. "... && pinned")
+        pinned = None
+        if filter and re.search(r'\bpinned\b(?!\s*==\s*false)', filter):
+            pinned = True

        return await memo_service.list_memos(
            user_id=user_id, 
+            creator_id=creator_id,
            tag=tag,
+            pinned=pinned,
            row_status=row_status,
            start_date=dt_start,
            end_date=dt_end
@@ -118,10 +131,18 @@ async def create_memo_or_list_memos(
            if match_creator:
                creator_id = match_creator.group(1)

+        # Parse pinned filter — frontend sends "pinned" in filter string for bookmarks
+        pinned = None
+        if raw_filter and isinstance(raw_filter, str):
+            # Match standalone "pinned" (not "pinned == false")
+            if re.search(r'\bpinned\b(?!\s*==\s*false)', raw_filter):
+                pinned = True
+
        return await memo_service.list_memos(
            user_id=user_id, 
            creator_id=creator_id,
            tag=tag,
+            pinned=pinned,
            start_date=start_date,
            end_date=end_date
        )

--- a/backend/api/memos/shortcut_routes.py
+++ b/backend/api/memos/shortcut_routes.py
@@ -4,13 +4,12 @@ Shortcut service routes for Memos-style backend.

 from typing import List

-from fastapi import APIRouter, Body, Depends, HTTPException
+from fastapi import APIRouter, Depends, HTTPException

 from common.memos_core.schemas import (
    ShortcutCreate,
    ShortcutUpdate,
    ShortcutResponse,
-    ListShortcutsResponse,
 )
 from common.memos_core.services import get_shortcut_service

@@ -26,20 +25,6 @@ async def list_shortcuts(shortcut_service=Depends(get_shortcut_service)):
        raise HTTPException(status_code=500, detail=str(exc)) from exc


-@router.post("", summary="List shortcuts (Connect compatibility)", response_model=ListShortcutsResponse)
-async def list_shortcuts_connect_compat(
-    payload: dict = Body(default_factory=dict),  # noqa: B008
-    shortcut_service=Depends(get_shortcut_service),
-):
-    # Connect RPC ListShortcuts is proxied as POST /api/v1/shortcuts in dev.
-    # Ignore payload (parent, pagination...) and return empty list for now.
-    try:
-        _ = payload
-        shortcuts = await shortcut_service.list_shortcuts()
-        return ListShortcutsResponse(shortcuts=shortcuts)
-    except Exception as exc:  # pragma: no cover
-        raise HTTPException(status_code=500, detail=str(exc)) from exc
-

 @router.post("", summary="Create shortcut", response_model=ShortcutResponse)
 async def create_shortcut(
@@ -54,7 +39,7 @@ async def create_shortcut(

 @router.patch("/{shortcut_id}", summary="Update shortcut", response_model=ShortcutResponse)
 async def update_shortcut(
-    shortcut_id: int,
+    shortcut_id: str,
    payload: ShortcutUpdate,
    shortcut_service=Depends(get_shortcut_service),
 ):
@@ -66,7 +51,7 @@ async def update_shortcut(

 @router.delete("/{shortcut_id}", summary="Delete shortcut")
 async def delete_shortcut(
-    shortcut_id: int,
+    shortcut_id: str,
    shortcut_service=Depends(get_shortcut_service),
 ):
    try:

--- a/backend/api/test_chat_route.py
+++ b/backend/api/test_chat_route.py
+"""
+Test Chat Route - NO AUTH REQUIRED
+------------------------------------
+`POST /api/test/chat` - endpoint thô để kiểm tra AI API key còn sống không.
+
+Không dùng graph, không dùng tools, không dùng history.
+Gọi thẳng OpenAI / Gemini / Groq tùy DEFAULT_MODEL.
+
+Dùng để debug: nếu endpoint này work → vấn đề ở auth/middleware.
+                nếu không work     → vấn đề API key hoặc model.
+"""
+
+import logging
+import time
+
+from fastapi import APIRouter
+from pydantic import BaseModel
+
+from config import DEFAULT_MODEL, GOOGLE_API_KEY, GROQ_API_KEY, OPENAI_API_KEY
+
+logger = logging.getLogger(__name__)
+router = APIRouter(tags=["test"])
+
+
+class TestChatRequest(BaseModel):
+    message: str
+    model: str | None = None  # override DEFAULT_MODEL nếu muốn
+
+
+class TestChatResponse(BaseModel):
+    status: str
+    model_used: str
+    response: str
+    latency_ms: int
+
+
+@router.post(
+    "/api/test/chat",
+    response_model=TestChatResponse,
+    summary="[NO AUTH] Test AI API key trực tiếp",
+    description=(
+        "Endpoint debug - gọi thẳng LLM không qua graph/tools/auth. "
+        "Dùng để kiểm tra API key còn sống và model đang hoạt động."
+    ),
+)
+async def test_chat(req: TestChatRequest) -> TestChatResponse:
+    model_name = req.model or DEFAULT_MODEL
+    logger.info("[TEST CHAT] model=%s | msg=%s", model_name, req.message)
+
+    t0 = time.monotonic()
+
+    try:
+        response_text = await _call_llm(model_name=model_name, message=req.message)
+    except Exception as exc:
+        logger.error("[TEST CHAT] LLM error: %s", exc, exc_info=True)
+        return TestChatResponse(
+            status="error",
+            model_used=model_name,
+            response=f"LỖI: {type(exc).__name__}: {exc}",
+            latency_ms=int((time.monotonic() - t0) * 1000),
+        )
+
+    latency = int((time.monotonic() - t0) * 1000)
+    logger.info("[TEST CHAT] OK latency=%dms", latency)
+
+    return TestChatResponse(
+        status="ok",
+        model_used=model_name,
+        response=response_text,
+        latency_ms=latency,
+    )
+
+
+async def _call_llm(model_name: str, message: str) -> str:
+    """Gọi LLM tương ứng với model_name, trả về string response."""
+
+    # ── Gemini / Google ──────────────────────────────────────────────────────
+    if model_name.startswith("gemini"):
+        from langchain_google_genai import ChatGoogleGenerativeAI
+        llm = ChatGoogleGenerativeAI(model=model_name, google_api_key=GOOGLE_API_KEY)
+        result = await llm.ainvoke(message)
+        return result.content
+
+    # ── Groq ─────────────────────────────────────────────────────────────────
+    if model_name.startswith("llama") or model_name.startswith("mixtral") or "groq" in model_name:
+        from langchain_groq import ChatGroq
+        llm = ChatGroq(model=model_name, groq_api_key=GROQ_API_KEY)
+        result = await llm.ainvoke(message)
+        return result.content
+
+    # ── OpenAI (default) ─────────────────────────────────────────────────────
+    from langchain_openai import ChatOpenAI
+    llm = ChatOpenAI(model=model_name, openai_api_key=OPENAI_API_KEY)
+    result = await llm.ainvoke(message)
+    return result.content
--- a/backend/common/clerk_auth.py
+++ b/backend/common/clerk_auth.py
@@ -35,13 +35,13 @@ def verify_clerk_jwt(token: str) -> dict[str, Any]:
    signing_key = _jwks_client().get_signing_key_from_jwt(token).key

    # Clerk tokens are typically RS256.
-    # leeway=60 tolerates up to 60s clock skew between Clerk server and this machine
+    # leeway=300 tolerates up to 5min clock skew (VPS clock not NTP-synced)
    payload = jwt.decode(
        token,
        signing_key,
        algorithms=["RS256"],
        issuer=CLERK_ISSUER,
-        leeway=60,
+        leeway=300,
        options={
            "verify_aud": False,  # allow multiple audiences in dev
        },

--- a/backend/common/conversation_manager.py
+++ b/backend/common/conversation_manager.py
-import asyncio
 import json
 import logging
-import os
-from datetime import datetime, date
+from datetime import datetime, timezone
 from typing import Any

-import aiosqlite
-import psycopg
-from psycopg_pool import AsyncConnectionPool
-
-from config import CHECKPOINT_POSTGRES_URL, SQLITE_DB_PATH, USE_SQLITE_HISTORY
+from common.mongo_client import mongodb_client, COLLECTION_INBOX, utc_now

 logger = logging.getLogger(__name__)


-class ConversationManager:
-    def __init__(
-        self,
-        connection_url: str = CHECKPOINT_POSTGRES_URL,
-        table_name: str = "langgraph_chat_histories",
-    ):
-        self.connection_url = connection_url
-        self.table_name = table_name
-        self._pool: AsyncConnectionPool | None = None
-
-    async def _get_pool(self) -> AsyncConnectionPool:
-        """Get or create async connection pool."""
-        if self._pool is None:
-            self._pool = AsyncConnectionPool(
-                self.connection_url, 
-                min_size=1,
-                max_size=20,
-                max_lifetime=600, # Recycle connections every 10 mins
-                max_idle=300,     # Close idle connections after 5 mins
-                open=False,
-                kwargs={"autocommit": True}
-            )
-            await self._pool.open()
-        return self._pool
-
-    async def initialize_table(self):
-        """Create the chat history table if it doesn't exist"""
-        try:
-            pool = await self._get_pool()
-            async with pool.connection() as conn:
-                async with conn.cursor() as cursor:
-                    await cursor.execute(f"""
-                        CREATE TABLE IF NOT EXISTS {self.table_name} (
-                            id SERIAL PRIMARY KEY,
-                            identity_key VARCHAR(255) NOT NULL,
-                            message TEXT NOT NULL,
-                            is_human BOOLEAN NOT NULL,
-                            timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-                        )
-                    """)
-
-                    await cursor.execute(f"""
-                        CREATE INDEX IF NOT EXISTS idx_{self.table_name}_identity_timestamp 
-                        ON {self.table_name} (identity_key, timestamp)
-                    """)
-                await conn.commit()
-            logger.info(f"Table {self.table_name} initialized successfully")
-        except Exception as e:
-            logger.error(f"Error initializing table: {e}")
-            raise
-
-    async def save_conversation_turn(self, identity_key: str, human_message: str, ai_message: str):
-        """Save both human and AI messages in a single atomic transaction with retry logic."""
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                pool = await self._get_pool()
-                timestamp = datetime.now()
-                async with pool.connection() as conn:
-                    async with conn.cursor() as cursor:
-                        await cursor.execute(
-                            f"""INSERT INTO {self.table_name} (identity_key, message, is_human, timestamp) 
-                               VALUES (%s, %s, %s, %s), (%s, %s, %s, %s)""",
-                            (
-                                identity_key,
-                                human_message,
-                                True,
-                                timestamp,
-                                identity_key,
-                                ai_message,
-                                False,
-                                timestamp,
-                            ),
-                        )
-                    # With autocommit=True in pool, and context manager, transactions are handled.
-                    # Explicit commit can be safer but might be redundant if autocommit is on.
-                    # Let's keep existing logic but be mindful of autocommit.
-                    # Actually if autocommit=True, we don't need conn.commit().
-                    # But if we want atomic transaction for 2 inserts, we should NOT use autocommit=True for the pool globally,
-                    # OR we start a transaction block.
-                    # But psycopg3 connection `async with pool.connection() as conn` actually starts a transaction by default if autocommit is False.
-                    # Let's revert pool autocommit=True and handle it normally which is safer for atomicity.
-                    await conn.commit()
-                
-                logger.debug(f"Saved conversation turn for identity_key {identity_key}")
-                return # Success
-                
-            except psycopg.OperationalError as e:
-                logger.warning(f"Database connection error (attempt {attempt+1}/{max_retries}): {e}")
-                if attempt == max_retries - 1:
-                    logger.error(f"Failed to save conversation after {max_retries} attempts: {e}")
-                    raise
-                await asyncio.sleep(0.5)
-                
-            except Exception as e:
-                logger.error(f"Failed to save conversation for identity_key {identity_key}: {e}", exc_info=True)
-                raise
-
-    async def get_chat_history(
-        self, identity_key: str, limit: int | None = None, before_id: int | None = None
-    ) -> list[dict[str, Any]]:
-        """
-        Retrieve chat history for an identity (user_id or device_id) using cursor-based pagination.
-        AI messages được parse từ JSON string để lấy product_ids.
-        """
-        max_retries = 3
-        for attempt in range(max_retries):
-            try:
-                query = f"""
-                    SELECT message, is_human, timestamp, id
-                    FROM {self.table_name} 
-                    WHERE identity_key = %s
-                """
-                params = [identity_key]
-
-                if before_id:
-                    query += " AND id < %s"
-                    params.append(before_id)
-
-                query += " ORDER BY id DESC"
-
-                if limit:
-                    query += " LIMIT %s"
-                    params.append(limit)
-
-                pool = await self._get_pool()
-                async with pool.connection() as conn, conn.cursor() as cursor:
-                    await cursor.execute(query, tuple(params))
-                    results = await cursor.fetchall()
-
-                history = []
-                for row in results:
-                    message_content = row[0]
-                    is_human = row[1]
-                    
-                    entry = {
-                        "is_human": is_human,
-                        "timestamp": row[2],
-                        "id": row[3],
-                    }
-                    
-                    if is_human:
-                        # User message - text thuần
-                        entry["message"] = message_content
-                    else:
-                        # AI message - parse JSON để lấy ai_response + product_ids
-                        try:
-                            parsed = json.loads(message_content)
-                            entry["message"] = parsed.get("ai_response", message_content)
-                            entry["product_ids"] = parsed.get("product_ids", [])
-                        except (json.JSONDecodeError, TypeError):
-                            # Fallback nếu không phải JSON (data cũ)
-                            entry["message"] = message_content
-                            entry["product_ids"] = []
-                    
-                    history.append(entry)
-                
-                return history
-
-            except psycopg.OperationalError as e:
-                logger.warning(f"Database connection error in get_chat_history (attempt {attempt+1}/{max_retries}): {e}")
-                if attempt == max_retries - 1:
-                    logger.error(f"Failed to get chat history after {max_retries} attempts: {e}")
-                    raise
-                await asyncio.sleep(0.5)
-
-            except Exception as e:
-                logger.error(f"Error retrieving chat history: {e}")
-                return []
-
-    async def clear_history(self, identity_key: str):
-        """Clear all chat history for an identity"""
-        try:
-            pool = await self._get_pool()
-            async with pool.connection() as conn:
-                async with conn.cursor() as cursor:
-                    await cursor.execute(f"DELETE FROM {self.table_name} WHERE identity_key = %s", (identity_key,))
-                await conn.commit()
-            logger.info(f"Cleared chat history for identity_key {identity_key}")
-        except Exception as e:
-            logger.error(f"Error clearing chat history: {e}")
-
-    async def get_user_count(self) -> int:
-        """Get total number of unique identities"""
-        try:
-            pool = await self._get_pool()
-            async with pool.connection() as conn, conn.cursor() as cursor:
-                await cursor.execute(f"SELECT COUNT(DISTINCT identity_key) FROM {self.table_name}")
-                result = await cursor.fetchone()
-                return result[0] if result else 0
-        except Exception as e:
-            logger.error(f"Error getting user count: {e}")
-            return 0
-
-    async def get_message_count_today(self, identity_key: str) -> int:
-        """
-        Đếm số tin nhắn của identity trong ngày hôm nay (cho rate limiting).
-        Chỉ đếm human messages (is_human = true).
-        """
-        try:
-            pool = await self._get_pool()
-            async with pool.connection() as conn, conn.cursor() as cursor:
-                await cursor.execute(
-                    f"""
-                    SELECT COUNT(*) FROM {self.table_name} 
-                    WHERE identity_key = %s 
-                    AND is_human = true 
-                    AND DATE(timestamp) = CURRENT_DATE
-                    """,
-                    (identity_key,),
-                )
-                result = await cursor.fetchone()
-                return result[0] if result else 0
-        except Exception as e:
-            logger.error(f"Error counting messages for {identity_key}: {e}")
-            return 0
-
-    async def close(self):
-        """Close the connection pool"""
-        if self._pool:
-            await self._pool.close()
-
-
-class SQLiteConversationManager:
-    """SQLite-based chat history storage for local/dev testing."""
-
-    def __init__(self, db_path: str = SQLITE_DB_PATH, table_name: str = "langgraph_chat_histories"):
-        self.db_path = db_path
-        self.table_name = table_name
-
-        db_dir = os.path.dirname(self.db_path)
-        if db_dir:
-            os.makedirs(db_dir, exist_ok=True)
-
-    async def initialize_table(self):
-        """Create table and index if not exists."""
-        try:
-            async with aiosqlite.connect(self.db_path) as db:
-                await db.execute(
-                    f"""
-                    CREATE TABLE IF NOT EXISTS {self.table_name} (
-                        id INTEGER PRIMARY KEY AUTOINCREMENT,
-                        identity_key TEXT NOT NULL,
-                        message TEXT NOT NULL,
-                        is_human INTEGER NOT NULL,
-                        timestamp TEXT DEFAULT (DATETIME('now'))
-                    )
-                    """
-                )
-                await db.execute(
-                    f"""
-                    CREATE INDEX IF NOT EXISTS idx_{self.table_name}_identity_timestamp 
-                    ON {self.table_name} (identity_key, timestamp)
-                    """
-                )
-                await db.commit()
-            logger.info(f"SQLite table {self.table_name} initialized at {self.db_path}")
-        except Exception as e:
-            logger.error(f"Error initializing SQLite table: {e}", exc_info=True)
-            raise
-
-    async def save_conversation_turn(self, identity_key: str, human_message: str, ai_message: str):
-        """Save both human and AI messages in a single transaction."""
-        timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
-        try:
-            async with aiosqlite.connect(self.db_path) as db:
-                await db.execute(
-                    f"""INSERT INTO {self.table_name} (identity_key, message, is_human, timestamp)
-                        VALUES (?, ?, ?, ?), (?, ?, ?, ?)""",
-                    (
-                        identity_key,
-                        human_message,
-                        1,
-                        timestamp,
-                        identity_key,
-                        ai_message,
-                        0,
-                        timestamp,
-                    ),
-                )
-                await db.commit()
-            logger.debug(f"Saved conversation turn for {identity_key} (SQLite)")
-        except Exception as e:
-            logger.error(f"Failed to save conversation for {identity_key} (SQLite): {e}", exc_info=True)
-            raise
-
-    async def get_chat_history(
-        self, identity_key: str, limit: int | None = None, before_id: int | None = None
-    ) -> list[dict[str, Any]]:
-        """Retrieve chat history with optional cursor pagination."""
-        try:
-            query = f"""
-                SELECT id, message, is_human, timestamp
-                FROM {self.table_name}
-                WHERE identity_key = ?
-            """
-            params: list[Any] = [identity_key]
-
-            if before_id:
-                query += " AND id < ?"
-                params.append(before_id)
-
-            query += " ORDER BY id DESC"
-
-            if limit:
-                query += " LIMIT ?"
-                params.append(limit)
-
-            async with aiosqlite.connect(self.db_path) as db:
-                db.row_factory = aiosqlite.Row
-                async with db.execute(query, tuple(params)) as cursor:
-                    rows = await cursor.fetchall()
-
-            history: list[dict[str, Any]] = []
-            for row in rows:
-                message_content = row["message"]
-                is_human = bool(row["is_human"])
-                entry = {
-                    "is_human": is_human,
-                    "timestamp": row["timestamp"],
-                    "id": row["id"],
-                }
-                if is_human:
-                    entry["message"] = message_content
-                else:
-                    try:
-                        parsed = json.loads(message_content)
-                        entry["message"] = parsed.get("ai_response", message_content)
-                        entry["product_ids"] = parsed.get("product_ids", [])
-                    except (json.JSONDecodeError, TypeError):
-                        entry["message"] = message_content
-                        entry["product_ids"] = []
-                history.append(entry)
-
-            return history
-        except Exception as e:
-            logger.error(f"Error retrieving chat history (SQLite): {e}", exc_info=True)
-            return []
-
-    async def clear_history(self, identity_key: str):
-        """Clear all chat history for an identity."""
-        try:
-            async with aiosqlite.connect(self.db_path) as db:
-                await db.execute(f"DELETE FROM {self.table_name} WHERE identity_key = ?", (identity_key,))
-                await db.commit()
-            logger.info(f"Cleared chat history for {identity_key} (SQLite)")
-        except Exception as e:
-            logger.error(f"Error clearing chat history for {identity_key} (SQLite): {e}", exc_info=True)
-
-    async def get_user_count(self) -> int:
-        """Get total number of unique identities."""
-        try:
-            async with aiosqlite.connect(self.db_path) as db:
-                async with db.execute(
-                    f"SELECT COUNT(DISTINCT identity_key) FROM {self.table_name}"
-                ) as cursor:
-                    row = await cursor.fetchone()
-                    return row[0] if row else 0
-        except Exception as e:
-            logger.error(f"Error getting user count (SQLite): {e}", exc_info=True)
-            return 0
-
-    async def get_message_count_today(self, identity_key: str) -> int:
-        """Count user messages today (for potential rate limiting)."""
-        try:
-            async with aiosqlite.connect(self.db_path) as db:
-                async with db.execute(
-                    f"""
-                    SELECT COUNT(*) FROM {self.table_name}
-                    WHERE identity_key = ?
-                    AND is_human = 1
-                    AND DATE(timestamp) = DATE('now', 'localtime')
-                    """,
-                    (identity_key,),
-                ) as cursor:
-                    row = await cursor.fetchone()
-                    return row[0] if row else 0
-        except Exception as e:
-            logger.error(f"Error counting messages for {identity_key} (SQLite): {e}", exc_info=True)
-            return 0
-
-    async def close(self):
-        # No persistent connection to close when using aiosqlite per-call
-        return
-
-
-# ... (imports remain)
-from common.mongo_client import mongodb_client, COLLECTION_INBOX, utc_now
-
-# ... (Existing Postgres/SQLite classes remain, or we can remove them if we fully migrate. 
-#      For safety, I'll add MongoDBConversationManager and return it.)
-
 class MongoDBConversationManager:
    """MongoDB-based chat history storage."""

@@ -415,25 +17,12 @@ class MongoDBConversationManager:
    async def initialize_table(self):
        """Ensure indexes exist."""
        try:
-            # Create index on identity_key and timestamp
            await mongodb_client.db[self.collection_name].create_index(
                [("identity_key", 1), ("timestamp", -1)]
            )
-            # Create index on id (auto-increment simulation or simple int id)
-            # MongoDB doesn't have auto-increment. We can use timestamp or ObjectId.
-            # To keep compatibility with cursor-based pagination (int id), we might need a counter.
-            # OR we can switch logic to use ObjectId or Timestamp as cursor.
-            # For simplicity let's stick to int ID using a counter collection if forced, 
-            # OR better: Change the cursor logic to use timestamp or ObjectId.
-            # But the API expects 'next_cursor: int'. 
-            # Let's generate a unique INT id using logical timestamp or detailed timestamp.
-            # Actually, standard Mongo practice is ObjectId. 
-            # But let's try to simulate simple ID if we really need compatibility with frontend logic expecting INT.
-            # For now, let's use timestamp (ms) as Int ID? 
-            # Or just update API models to accept String cursor?
-            # Let's stick to what allows us to write less code. 
-            # User provided API code expects `before_id: int`.
-            # I will use a simple counter for now or just timestamp-based integer.
+            await mongodb_client.db[self.collection_name].create_index(
+                [("identity_key", 1), ("is_human", 1), ("timestamp", -1)]
+            )
            logger.info(f"MongoDB collection {self.collection_name} initialized")
        except Exception as e:
            logger.error(f"Error initializing MongoDB collection: {e}")
@@ -441,27 +30,26 @@ class MongoDBConversationManager:
    async def save_conversation_turn(self, identity_key: str, human_message: str, ai_message: str):
        """Save human and AI messages."""
        try:
-            # Generate IDs based on timestamp (microseconds) to ensure uniqueness and order
            ts = utc_now()
-            base_id = int(ts.timestamp() * 1000000) 
-            
+            base_id = int(ts.timestamp() * 1000000)
+
            docs = [
                {
                    "id": base_id,
                    "identity_key": identity_key,
                    "message": human_message,
                    "is_human": True,
-                    "timestamp": ts
+                    "timestamp": ts,
                },
                {
-                    "id": base_id + 1, 
+                    "id": base_id + 1,
                    "identity_key": identity_key,
                    "message": ai_message,
                    "is_human": False,
-                    "timestamp": ts
-                }
+                    "timestamp": ts,
+                },
            ]
-            
+
            await mongodb_client.db[self.collection_name].insert_many(docs)
            logger.debug(f"Saved conversation turn for {identity_key} (MongoDB)")
        except Exception as e:
@@ -474,28 +62,28 @@ class MongoDBConversationManager:
        """Retrieve chat history."""
        try:
            query = {"identity_key": identity_key}
-            
+
            if before_id:
                query["id"] = {"$lt": before_id}
-            
+
            cursor = mongodb_client.db[self.collection_name].find(query).sort("id", -1)
-            
+
            if limit:
                cursor = cursor.limit(limit)
-                
+
            docs = await cursor.to_list(length=limit or 100)
-            
+
            history = []
            for doc in docs:
                message_content = doc["message"]
                is_human = doc["is_human"]
-                
+
                entry = {
                    "is_human": is_human,
                    "timestamp": doc["timestamp"],
                    "id": doc["id"],
                }
-                
+
                if is_human:
                    entry["message"] = message_content
                else:
@@ -506,9 +94,9 @@ class MongoDBConversationManager:
                    except (json.JSONDecodeError, TypeError):
                        entry["message"] = message_content
                        entry["product_ids"] = []
-                        
+
                history.append(entry)
-                
+
            return history
        except Exception as e:
            logger.error(f"Error retrieving chat history (MongoDB): {e}")
@@ -529,9 +117,20 @@ class MongoDBConversationManager:
            return 0

    async def get_message_count_today(self, identity_key: str) -> int:
-        # Simplification: skip exact date check or impl properly if needed
-        # For MVP we can skip strict rate limit check or implement later
-        return 0 
+        """Count human messages sent today for rate limiting."""
+        try:
+            today_start = datetime.now(timezone.utc).replace(
+                hour=0, minute=0, second=0, microsecond=0
+            )
+            count = await mongodb_client.db[self.collection_name].count_documents({
+                "identity_key": identity_key,
+                "is_human": True,
+                "timestamp": {"$gte": today_start},
+            })
+            return count
+        except Exception as e:
+            logger.error(f"Error counting messages for {identity_key} (MongoDB): {e}")
+            return 0

    async def close(self):
        pass
@@ -540,6 +139,7 @@ class MongoDBConversationManager:
 # --- Singleton ---
 _instance: Any = None

+
 async def get_conversation_manager():
    """Return MongoDB manager."""
    global _instance

--- a/backend/common/embedding_service.py
+++ b/backend/common/embedding_service.py
@@ -40,9 +40,9 @@ class EmbeddingClientManager:
        
        # If using default key, cache the client
        if not api_key:
-        if self._client is None:
+            if self._client is None:
                self._client = OpenAI(api_key=key)
-        return self._client
+            return self._client

        # For custom keys, create new client (not cached)
        return OpenAI(api_key=key)
@@ -65,9 +65,9 @@ class EmbeddingClientManager:
        
        # If using default key, cache the default client
        if not api_key:
-        if self._async_client is None:
+            if self._async_client is None:
                self._async_client = AsyncOpenAI(api_key=key)
-        return self._async_client
+            return self._async_client
        
        # For user-specific keys, cache per user_id
        if user_id and user_id in self._user_clients:
@@ -79,7 +79,7 @@ class EmbeddingClientManager:
        return client


-logger = logging.getLogger(__name__)
+

 # NOTE:
 # - TẠM THỜI KHÔNG DÙNG REDIS CACHE CHO EMBEDDING để tránh phụ thuộc Redis/aioredis.

--- a/backend/common/encryption.py
+++ b/backend/common/encryption.py
@@ -17,60 +17,85 @@ logger = logging.getLogger(__name__)
 ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY")

 # Fallback: Generate key from a password (NOT recommended for production)
-# This is only for development/testing
 FALLBACK_PASSWORD = os.getenv("ENCRYPTION_PASSWORD", "default-dev-password-change-in-production")

+# Cached Fernet instance (singleton) — avoids re-running PBKDF2 100K iterations per call
+_fernet_instance: Fernet | None = None

-def _get_fernet_key() -> bytes:
+
+def _derive_key_from_password(password: str, salt: bytes) -> bytes:
+    """Derive a Fernet key from password + salt using PBKDF2."""
+    kdf = PBKDF2HMAC(
+        algorithm=hashes.SHA256(),
+        length=32,
+        salt=salt,
+        iterations=100000,
+    )
+    return base64.urlsafe_b64encode(kdf.derive(password.encode()))
+
+
+def _get_fernet() -> Fernet:
    """
-    Get or generate Fernet encryption key.
-    Priority: ENCRYPTION_KEY env var > generated from password (dev only)
+    Get cached Fernet instance.
+    Priority: ENCRYPTION_KEY env var > password-derived key (dev only).
+    When using ENCRYPTION_KEY, salt is not needed (key is used directly).
    """
+    global _fernet_instance
+    if _fernet_instance is not None:
+        return _fernet_instance
+
    if ENCRYPTION_KEY:
        try:
-            # Try to use as-is (should be base64-encoded 32-byte key)
-            return ENCRYPTION_KEY.encode()
+            _fernet_instance = Fernet(ENCRYPTION_KEY.encode())
+            return _fernet_instance
        except Exception:
-            # If not valid, try to decode as base64
            try:
-                return base64.urlsafe_b64decode(ENCRYPTION_KEY)
+                _fernet_instance = Fernet(base64.urlsafe_b64decode(ENCRYPTION_KEY))
+                return _fernet_instance
            except Exception:
                logger.warning("Invalid ENCRYPTION_KEY format, using fallback")
-    
-    # Fallback: Generate from password (dev only - NOT secure for production)
+
+    # Fallback: Generate from password with random salt (dev only - NOT secure for production)
    logger.warning(
        "⚠️  ENCRYPTION_KEY not set. Using password-based key derivation (NOT secure for production!)"
    )
-    salt = b"cucu_note_salt"  # Fixed salt for dev (should be random in production)
-    kdf = PBKDF2HMAC(
-        algorithm=hashes.SHA256(),
-        length=32,
-        salt=salt,
-        iterations=100000,
-    )
-    key = base64.urlsafe_b64encode(kdf.derive(FALLBACK_PASSWORD.encode()))
-    return key
+    # Use random salt — stored in a file so decryption works across restarts
+    salt_file = os.path.join(os.path.dirname(__file__), "..", "data", ".encryption_salt")
+    os.makedirs(os.path.dirname(salt_file), exist_ok=True)
+
+    if os.path.exists(salt_file):
+        with open(salt_file, "rb") as f:
+            salt = f.read()
+    else:
+        salt = os.urandom(16)
+        with open(salt_file, "wb") as f:
+            f.write(salt)
+        logger.info("Generated new encryption salt (saved to %s)", salt_file)
+
+    key = _derive_key_from_password(FALLBACK_PASSWORD, salt)
+    _fernet_instance = Fernet(key)
+    return _fernet_instance


 def encrypt_api_key(api_key: str) -> str:
    """
    Encrypt an API key using Fernet symmetric encryption.
-    
+
    Args:
        api_key: Plain text API key to encrypt
-        
+
    Returns:
        Encrypted API key as base64 string
-        
+
    Raises:
        ValueError: If api_key is empty
        RuntimeError: If encryption fails
    """
    if not api_key or not api_key.strip():
        raise ValueError("API key cannot be empty")
-    
+
    try:
-        fernet = Fernet(_get_fernet_key())
+        fernet = _get_fernet()
        encrypted = fernet.encrypt(api_key.encode())
        return encrypted.decode()
    except Exception as e:
@@ -81,22 +106,22 @@ def encrypt_api_key(api_key: str) -> str:
 def decrypt_api_key(encrypted_key: str) -> str:
    """
    Decrypt an encrypted API key.
-    
+
    Args:
        encrypted_key: Encrypted API key (base64 string)
-        
+
    Returns:
        Decrypted plain text API key
-        
+
    Raises:
        ValueError: If encrypted_key is empty
        RuntimeError: If decryption fails (wrong key, corrupted data, etc.)
    """
    if not encrypted_key or not encrypted_key.strip():
        raise ValueError("Encrypted key cannot be empty")
-    
+
    try:
-        fernet = Fernet(_get_fernet_key())
+        fernet = _get_fernet()
        decrypted = fernet.decrypt(encrypted_key.encode())
        return decrypted.decode()
    except Exception as e:
@@ -107,39 +132,38 @@ def decrypt_api_key(encrypted_key: str) -> str:
 def mask_api_key(api_key: str) -> str:
    """
    Mask an API key for display (show only first 7 chars and last 4 chars).
-    
+
    Args:
        api_key: API key to mask
-        
+
    Returns:
        Masked API key (e.g., "sk-...xxxx")
    """
    if not api_key or len(api_key) < 11:
        return "sk-...xxxx"
-    
+
    return f"{api_key[:7]}...{api_key[-4:]}"


 def validate_openai_key_format(api_key: str) -> bool:
    """
    Validate OpenAI API key format.
-    
+
    Args:
        api_key: API key to validate
-        
+
    Returns:
        True if format is valid, False otherwise
    """
    if not api_key or not api_key.strip():
        return False
-    
+
    # OpenAI keys typically start with "sk-" and are ~51 characters
    key = api_key.strip()
    if not key.startswith("sk-"):
        return False
-    
+
    if len(key) < 20 or len(key) > 100:  # Reasonable range
        return False
-    
-    return True

+    return True
--- a/backend/common/llm_factory.py
+++ b/backend/common/llm_factory.py
@@ -88,7 +88,7 @@ class LLMFactory:
            "streaming": streaming,
            "api_key": key,
            "temperature": 0,
-            "max_tokens": 1000,
+            "max_tokens": 4096,
        }
        
        # Nếu bật json_mode, tiêm trực tiếp vào constructor
@@ -100,14 +100,7 @@ class LLMFactory:
        logger.info(f"✅ Created OpenAI: {model_name}")
        return llm

-    def _enable_json_mode(self, llm: BaseChatModel, model_name: str) -> BaseChatModel:
-        """Enable JSON mode for the LLM."""
-        try:
-            llm = llm.bind(response_format={"type": "json_object"})
-            logger.debug(f"⚙️ JSON mode enabled for {model_name}")
-        except Exception as e:
-            logger.warning(f"⚠️ JSON mode not supported: {e}")
-        return llm
+

    def initialize(self, skip_warmup: bool = True) -> None:
        """

--- a/backend/common/memos_core/query_parser.py
+++ b/backend/common/memos_core/query_parser.py
@@ -48,23 +48,30 @@ def parse_date_range(
        
        if match_ts:
            try:
+                from common.timezone_config import VIETNAM_TZ
                ts_start = float(match_ts.group(1))
                ts_end = float(match_ts.group(2))
-                dt_start = datetime.fromtimestamp(ts_start, tz=timezone.utc)
-                dt_end = datetime.fromtimestamp(ts_end, tz=timezone.utc)
+                # Frontend sends UTC midnight timestamps, but user is in Vietnam (UTC+7).
+                # Shift by -7h so "Feb 5" means Feb 5 00:00 VN (= Feb 4 17:00 UTC)
+                # instead of Feb 5 00:00 UTC (= Feb 5 07:00 VN).
+                utc_offset_seconds = VIETNAM_TZ.utcoffset(None).total_seconds()
+                dt_start = datetime.fromtimestamp(ts_start - utc_offset_seconds, tz=timezone.utc)
+                dt_end = datetime.fromtimestamp(ts_end - utc_offset_seconds, tz=timezone.utc)
            except (ValueError, TypeError):
                pass
        else:
            # Case 2: DisplayTime filter (displayTime:YYYY-MM-DD)
+            # Dates are interpreted as Vietnam time (UTC+7), then converted to UTC for MongoDB
            pattern_dt = r"displayTime:(\d{4}-\d{2}-\d{2})"
            match_dt = re.search(pattern_dt, filter_str)
            
            if match_dt:
                try:
+                    from common.timezone_config import VIETNAM_TZ
                    date_str = match_dt.group(1)
-                    dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
-                    dt_start = dt
-                    dt_end = dt + timedelta(days=1)
+                    dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=VIETNAM_TZ)
+                    dt_start = dt.astimezone(timezone.utc)
+                    dt_end = (dt + timedelta(days=1)).astimezone(timezone.utc)
                except ValueError:
                    pass
    

--- a/backend/common/memos_core/schemas.py
+++ b/backend/common/memos_core/schemas.py
@@ -191,7 +191,7 @@ class ShortcutUpdate(BaseModel):


 class ShortcutResponse(BaseModel):
-  id: int
+  id: str
  name: str
  filter: str


--- a/backend/common/memos_core/services.py
+++ b/backend/common/memos_core/services.py
@@ -810,21 +810,74 @@ class ReactionService:


 class ShortcutService:
+    """Shortcut (Workspace) service with MongoDB backend."""
+
    async def list_shortcuts(self) -> List[schemas.ShortcutResponse]:
-        return []
+        cursor = mongodb_client.shortcuts.find({}).sort("created_at", -1)
+        docs = await cursor.to_list(length=100)
+        return [
+            schemas.ShortcutResponse(
+                id=str(doc["_id"]),
+                name=doc.get("name", ""),
+                filter=doc.get("filter", ""),
+            )
+            for doc in docs
+        ]

    async def create_shortcut(self, payload: schemas.ShortcutCreate) -> schemas.ShortcutResponse:
-        return schemas.ShortcutResponse(id=1, name=payload.name, filter=payload.filter)
+        now = utc_now()
+        doc = {
+            "name": payload.name,
+            "filter": payload.filter,
+            "created_at": now,
+            "updated_at": now,
+        }
+        result = await mongodb_client.shortcuts.insert_one(doc)
+        doc["_id"] = result.inserted_id
+        return schemas.ShortcutResponse(
+            id=str(doc["_id"]),
+            name=doc["name"],
+            filter=doc["filter"],
+        )
+
+    async def update_shortcut(self, shortcut_id: str, payload: schemas.ShortcutUpdate) -> schemas.ShortcutResponse:
+        update_fields: dict[str, Any] = {"updated_at": utc_now()}
+        if payload.name is not None:
+            update_fields["name"] = payload.name
+        if payload.filter is not None:
+            update_fields["filter"] = payload.filter
+
+        filter_query: dict[str, Any] = {}
+        if ObjectId.is_valid(shortcut_id):
+            filter_query["_id"] = ObjectId(shortcut_id)
+        else:
+            filter_query["_id"] = shortcut_id
+
+        from pymongo import ReturnDocument
+        result = await mongodb_client.shortcuts.find_one_and_update(
+            filter_query,
+            {"$set": update_fields},
+            return_document=ReturnDocument.AFTER,
+        )
+        if not result:
+            raise ValueError(f"Shortcut {shortcut_id} not found")

-    async def update_shortcut(self, shortcut_id: int, payload: schemas.ShortcutUpdate) -> schemas.ShortcutResponse:
        return schemas.ShortcutResponse(
-            id=shortcut_id,
-            name=payload.name or "demo",
-            filter=payload.filter or "",
+            id=str(result["_id"]),
+            name=result.get("name", ""),
+            filter=result.get("filter", ""),
        )

-    async def delete_shortcut(self, shortcut_id: int) -> None:
-        return None
+    async def delete_shortcut(self, shortcut_id: str) -> None:
+        filter_query: dict[str, Any] = {}
+        if ObjectId.is_valid(shortcut_id):
+            filter_query["_id"] = ObjectId(shortcut_id)
+        else:
+            filter_query["_id"] = shortcut_id
+
+        result = await mongodb_client.shortcuts.delete_one(filter_query)
+        if result.deleted_count == 0:
+            raise ValueError(f"Shortcut {shortcut_id} not found")


 class ActivityService:

--- a/backend/common/middleware.py
+++ b/backend/common/middleware.py
@@ -37,6 +37,7 @@ PUBLIC_PATHS = {
 PUBLIC_PATH_PREFIXES = [
    "/static",
    "/mock",
+    "/api/test",  # debug/test endpoints - no auth required
 ]



--- a/backend/common/mongo_client.py
+++ b/backend/common/mongo_client.py
@@ -29,6 +29,7 @@ COLLECTION_REACTIONS = "cuccu_reactions"
 COLLECTION_MEMO_EMBEDDINGS = "cuccu_memo_embeddings"
 COLLECTION_INBOX = "cuccu_inbox"
 COLLECTION_USER_SETTINGS = "cuccu_user_settings"
+COLLECTION_SHORTCUTS = "cuccu_shortcuts"


 class MongoDBClient:
@@ -116,6 +117,10 @@ class MongoDBClient:
    def user_settings(self):
        return self.db[COLLECTION_USER_SETTINGS]

+    @property
+    def shortcuts(self):
+        return self.db[COLLECTION_SHORTCUTS]
+

 # Singleton instance
 mongodb_client = MongoDBClient()
@@ -206,6 +211,9 @@ async def create_indexes():
        # ====================== MEMO VERSIONS ======================
        await db["cuccu_memo_versions"].create_index([("memo_id", 1), ("version_index", -1)])
        
+        # ====================== SHORTCUTS ======================
+        await db[COLLECTION_SHORTCUTS].create_index([("creator_id", 1)])
+        
        logger.info("✅ Database indexes created successfully (Production-ready)")
    except Exception as e:
        logger.warning(f"⚠️  Error creating indexes (may already exist): {e}")

--- a/backend/common/rate_limit.py
+++ b/backend/common/rate_limit.py
@@ -76,7 +76,7 @@ class RateLimitService:
            logger.info(f"Using Redis for rate limiting: {redis_host}:{redis_port}/{redis_db}")
        else:
            # Fallback to memory (not suitable for production with multiple instances)
-        self.storage_uri = os.getenv("RATE_STORAGE_URI", "memory://")
+            self.storage_uri = os.getenv("RATE_STORAGE_URI", "memory://")
            logger.warning("⚠️  Using in-memory rate limiting (not suitable for production with multiple instances)")
        
        self.default_limits = ["100/hour", "30/minute"]

--- a/backend/common/timezone_config.py
+++ b/backend/common/timezone_config.py
+"""
+Timezone configuration for CuCu Note.
+All user-facing date operations should use VIETNAM_TZ.
+MongoDB stores in UTC — convert at query boundaries.
+"""
+
+from datetime import timezone, timedelta
+
+VIETNAM_TZ = timezone(timedelta(hours=7))
--- a/backend/config.py
+++ b/backend/config.py
@@ -126,8 +126,6 @@ CONV_SUPABASE_KEY: str | None = os.getenv("CONV_SUPABASE_KEY")
 # ====================== REDIS CONFIGURATION ======================
 REDIS_HOST: str | None = os.getenv("REDIS_HOST")
 REDIS_PORT: int = int(os.getenv("REDIS_PORT", "6379"))
-REDIS_PASSWORD: str | None = os.getenv("REDIS_PASSWORD")
-REDIS_USERNAME: str | None = os.getenv("REDIS_USERNAME")

 # ====================== AI API KEYS & MODELS ======================
 OPENAI_API_KEY: str | None = os.getenv("OPENAI_API_KEY")
@@ -172,7 +170,7 @@ REDIS_CACHE_URL: str | None = os.getenv("REDIS_CACHE_URL", "redis-14473.c93.us-e
 REDIS_CACHE_PORT: int = int(os.getenv("REDIS_CACHE_PORT", "14473"))
 REDIS_CACHE_DB: int = int(os.getenv("REDIS_CACHE_DB", "0"))
 REDIS_CACHE_TURN_ON: bool = os.getenv("REDIS_CACHE_TURN_ON", "true").lower() == "true"
-REDIS_PASSWORD: str | None = os.getenv("REDIS_CACHE_PASSWORD", "4kCCXXaJXXv7k358eG69p1lDBQtHTbQ1")
+REDIS_PASSWORD: str | None = os.getenv("REDIS_CACHE_PASSWORD")
 REDIS_USERNAME: str = os.getenv("REDIS_CACHE_USERNAME", "default")

 CONV_DATABASE_URL: str | None = os.getenv("CONV_DATABASE_URL")
@@ -183,8 +181,8 @@ MONGODB_DB_NAME: str | None = os.getenv("MONGODB_DB_NAME", "cucu_note")
 USE_MONGO_CONVERSATION: bool = os.getenv("USE_MONGO_CONVERSATION", "true").lower() == "true"

 # MongoDB Connection Pooling
-MONGODB_MAX_POOL_SIZE: int = int(os.getenv("MONGODB_MAX_POOL_SIZE", "50"))
-MONGODB_MIN_POOL_SIZE: int = int(os.getenv("MONGODB_MIN_POOL_SIZE", "10"))
+MONGODB_MAX_POOL_SIZE: int = int(os.getenv("MONGODB_MAX_POOL_SIZE", "5"))
+MONGODB_MIN_POOL_SIZE: int = int(os.getenv("MONGODB_MIN_POOL_SIZE", "1"))
 MONGODB_MAX_IDLE_TIME_MS: int = int(os.getenv("MONGODB_MAX_IDLE_TIME_MS", "45000"))

 # ====================== CANIFA INTERNAL POSTGRES ======================

--- a/backend/entrypoint.sh
+++ b/backend/entrypoint.sh
@@ -43,18 +43,11 @@ asyncio.run(setup())
 " || echo "⚠️  Could not set up indexes (will retry on first request)"

 # Start the server
-echo "🌟 Starting Gunicorn server..."
-
-# Allow overriding number of workers via env, default to 1 for simplicity
-WORKERS="${GUNICORN_WORKERS:-1}"
-echo "🔧 Using Gunicorn workers: $WORKERS"
-
-exec gunicorn \
-    --workers "$WORKERS" \
-    --worker-class uvicorn.workers.UvicornWorker \
-    --bind 0.0.0.0:5000 \
-    --timeout 120 \
-    --access-logfile - \
-    --error-logfile - \
-    --log-level info \
-    server:app
+echo "🌟 Starting Uvicorn server (hot reload enabled)..."
+
+exec uvicorn server:app \
+    --host 0.0.0.0 \
+    --port 5000 \
+    --reload \
+    --reload-dir /app \
+    --log-level info
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
 # Core FastAPI
-fastapi==0.124.4
+fastapi==0.124.4a
 uvicorn==0.38.0
+uvloop>=0.21.0
 starlette==0.50.0
 pydantic==2.12.5
 pydantic_core==2.41.5
@@ -51,13 +52,10 @@ google-auth==2.45.0
 # Tokenization
 tiktoken==0.12.0

-# Observability
+# Observability (minimal - only trace API used)
 opentelemetry-api==1.39.1
-opentelemetry-exporter-otlp-proto-common==1.39.1
-opentelemetry-exporter-otlp-proto-http==1.39.1
-opentelemetry-proto==1.39.1
 opentelemetry-sdk==1.39.1
-opentelemetry-semantic-conventions==0.60b1
+# Removed: otel exporters/proto/semantic-conventions (not configured)

 # Utilities
 python-dotenv==1.2.1
@@ -71,7 +69,7 @@ tenacity==9.1.2
 backoff==2.2.1
 regex==2025.11.3
 Unidecode==1.4.0
-pillow==12.0.0
+# pillow==12.0.0  # Removed: not directly imported

 # WebSocket
 websockets==15.0.1
@@ -104,4 +102,5 @@ cachetools==6.2.4
 pytest==9.0.2

 # Production server
-gunicorn==23.0.0
+# gunicorn==23.0.0  # Removed: using uvicorn instead
+aiosqlite
--- a/backend/server.py
+++ b/backend/server.py
@@ -2,6 +2,7 @@ import asyncio
 import os
 import platform
 import logging
+from contextlib import asynccontextmanager

 import uvicorn
 from fastapi import FastAPI
@@ -10,6 +11,7 @@ from fastapi.responses import RedirectResponse

 from api.chatbot import router as chatbot_router
 from api.memos import router as memos_router
+from api.test_chat_route import router as test_router
 from common.cache import redis_cache
 from common.langfuse_client import get_langfuse_client
 from common.middleware import middleware_manager
@@ -29,54 +31,44 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)

-# Langfuse client initialized in startup_event (not at import time)
-
-app = FastAPI(
-    title="Contract AI Service",
-    description="API for Contract AI Service",
-    version="1.0.0",
-)
-

 # =============================================================================
-# STARTUP EVENT - Initialize Redis Cache + MongoDB
+# LIFESPAN - Initialize & cleanup resources (replaces deprecated on_event)
 # =============================================================================
-@app.on_event("startup")
-async def startup_event():
-    """Initialize dependencies on startup."""
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    """Startup & shutdown lifecycle manager."""
+    # --- STARTUP ---
    # Initialize Redis (optional - will continue without cache if unavailable)
    redis_client = await redis_cache.initialize()
    if redis_client:
        logger.info("✅ Redis cache initialized for message limit")
    else:
        logger.info("⚠️  Redis cache unavailable - continuing without cache")
-    
+
    # MongoDB initialization (required)
    from common.mongo_client import init_mongodb
    await init_mongodb()
    logger.info("✅ MongoDB connection initialized")
-    
-    # Langfuse initialization (optional - lazy loaded, just triggers auth check)
+
+    # Langfuse initialization (optional)
    langfuse_client = get_langfuse_client()
    if langfuse_client:
        logger.info("✅ Langfuse client ready")
    else:
        logger.warning("⚠️  Langfuse client not available (missing keys or disabled)")

+    yield  # App is running

-@app.on_event("shutdown")
-async def shutdown_event():
-    """Cleanup on shutdown."""
+    # --- SHUTDOWN ---
    try:
-        # Close Redis connection if exists
        redis_client = redis_cache.get_client()
        if redis_client:
            await redis_client.aclose()
            logger.info("Redis connection closed")
    except Exception as e:
        logger.debug(f"Error closing Redis: {e}")
-    
-    # Close MongoDB connection
+
    try:
        from common.mongo_client import close_mongodb
        await close_mongodb()
@@ -85,6 +77,14 @@ async def shutdown_event():
        logger.debug(f"Error closing MongoDB: {e}")


+app = FastAPI(
+    title="Contract AI Service",
+    description="API for Contract AI Service",
+    version="1.0.0",
+    lifespan=lifespan,
+)
+
+
 # =============================================================================
 # MIDDLEWARE SETUP - Gom Auth + RateLimit + CORS vào một chỗ
 # =============================================================================
@@ -96,6 +96,7 @@ middleware_manager.setup(
    cors_origins=CORS_ORIGINS,  # từ environment variable
 )

+app.include_router(test_router)   # No-auth test endpoints
 app.include_router(chatbot_router)
 app.include_router(memos_router)


--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -6,6 +6,10 @@ services:
    build:
      context: ./backend
      dockerfile: Dockerfile.prod
+      cache_from:
+        - type=local,src=.docker/cache/backend
+      cache_to:
+        - type=local,dest=.docker/cache/backend
    container_name: cuccu_backend
    restart: unless-stopped
    ports:
@@ -30,37 +34,38 @@ services:
    deploy:
      resources:
        limits:
-          memory: 2G
+          memory: 512M
          cpus: '1.0'
        reservations:
-          memory: 512M
+          memory: 128M
          cpus: '0.5'

-  # Frontend
+  # Frontend (Production build with nginx - saves ~190MB RAM)
  frontend:
    build:
      context: ./frontend
      dockerfile: Dockerfile.prod
      args:
-        # Build-time envs for Vite
-        # Browser (người dùng) gọi trực tiếp vào host, nên dùng localhost:5000
-        VITE_API_BASE_URL: "http://localhost:5000"
-        VITE_CLERK_PUBLISHABLE_KEY: ${VITE_CLERK_PUBLISHABLE_KEY}
+        VITE_API_BASE_URL: ${VITE_API_BASE_URL:-http://localhost:5000}
+        VITE_CLERK_PUBLISHABLE_KEY: ${VITE_CLERK_PUBLISHABLE_KEY:-}
    container_name: cuccu_frontend
    restart: unless-stopped
    ports:
-      - "3001:80"
-    env_file:
-      - ./frontend/.env
+      - "3001:80"   
    depends_on:
      - backend
    networks:
      - cuccu_network
    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:80"]
+      test: ["CMD", "wget", "-q", "--spider", "http://localhost:80"]
      interval: 30s
      timeout: 10s
      retries: 3
+      start_period: 30s
+    deploy:
+      resources:
+        limits:
+          memory: 32M

 volumes:
  backend_data:

--- a/frontend/.env
+++ b/frontend/.env
@@ -2,4 +2,5 @@
 VITE_CLERK_PUBLISHABLE_KEY=pk_test_Y29tbXVuYWwtc3VuYmVhbS0wLmNsZXJrLmFjY291bnRzLmRldiQ

 # ====================== API URL ======================
-VITE_API_URL=http://localhost:8080
\ No newline at end of file
+# Dev mode: point directly to backend (no nginx proxy)
+VITE_API_BASE_URL=http://160.191.50.138:5000
\ No newline at end of file
--- a/frontend/Dockerfile.dev
+++ b/frontend/Dockerfile.dev
-FROM node:18-alpine
+FROM node:22-alpine

 WORKDIR /app


--- a/frontend/Dockerfile.prod
+++ b/frontend/Dockerfile.prod
 # Multi-stage build for production
-FROM node:18-alpine AS builder
+FROM node:22-alpine AS builder

 WORKDIR /app


--- a/frontend/nginx.conf
+++ b/frontend/nginx.conf
@@ -15,6 +15,17 @@ server {
    add_header X-Content-Type-Options "nosniff" always;
    add_header X-XSS-Protection "1; mode=block" always;

+    # Proxy /api requests to backend container
+    location /api/ {
+        proxy_pass http://cuccu_backend:5000;
+        proxy_http_version 1.1;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_read_timeout 120s;
+    }
+
    # SPA routing - serve index.html for all routes
    location / {
        try_files $uri $uri/ /index.html;

--- a/frontend/src/components/ChatbotPanel.tsx
+++ b/frontend/src/components/ChatbotPanel.tsx
-import { useState, useRef, useEffect } from "react";
+import { useState, useRef, useEffect, useImperativeHandle, forwardRef, useCallback } from "react";
 import { cn } from "@/lib/utils";
-import { XIcon, SendIcon, Loader2Icon } from "lucide-react";
+import { XIcon, SendIcon, Loader2Icon, CalendarIcon } from "lucide-react";
+import { API_ORIGIN } from "@/service/apiClient";
+import { getClerkSessionToken } from "@/utils/clerk";
+import ReactMarkdown, { Components } from "react-markdown";
+import remarkGfm from "remark-gfm";
+import remarkBreaks from "remark-breaks";
+import { useDateFilterNavigation } from "@/hooks";

 type Message = {
  role: "user" | "ai";
@@ -14,7 +20,11 @@ type ChatbotPanelProps = {
  hideHeader?: boolean;
 };

-const ChatbotPanel = ({ className, onClose, hideHeader = false }: ChatbotPanelProps) => {
+export type ChatbotPanelHandle = {
+  clearMessages: () => Promise<void>;
+};
+
+const ChatbotPanel = forwardRef<ChatbotPanelHandle, ChatbotPanelProps>(({ className, onClose, hideHeader = false }, ref) => {
  const [messages, setMessages] = useState<Message[]>([]);
  const [historyCursor, setHistoryCursor] = useState<number | null>(null);
  const [isHistoryLoading, setIsHistoryLoading] = useState(false);
@@ -30,10 +40,22 @@ const ChatbotPanel = ({ className, onClose, hideHeader = false }: ChatbotPanelPr
  }, [messages]);

  const mapHistoryToMessages = (items: any[]): Message[] => {
-    return items.map((item) => ({
-      role: item.is_human ? "user" : "ai",
-      content: String(item.message ?? ""),
-    }));
+    return items.map((item) => {
+      if (item.is_human) {
+        return { role: "user" as const, content: String(item.message ?? "") };
+      }
+      // AI messages may be stored as JSON — extract ai_response text
+      let content = String(item.message ?? "");
+      try {
+        const parsed = JSON.parse(content);
+        if (parsed && typeof parsed === "object" && parsed.ai_response) {
+          content = parsed.ai_response;
+        }
+      } catch {
+        // Not JSON, use as-is
+      }
+      return { role: "ai" as const, content };
+    });
  };

  const loadHistory = async (isRefresh: boolean) => {
@@ -41,10 +63,14 @@ const ChatbotPanel = ({ className, onClose, hideHeader = false }: ChatbotPanelPr
    setIsHistoryLoading(true);

    try {
+      const token = await getClerkSessionToken();
      const cursor = isRefresh ? null : historyCursor;
-      const url = `/api/history/me?limit=20${cursor ? `&before_id=${cursor}` : ""}`;
+      const url = `${API_ORIGIN}/api/history/me?limit=20${cursor ? `&before_id=${cursor}` : ""}`;
+
+      const headers: Record<string, string> = {};
+      if (token) headers["Authorization"] = `Bearer ${token}`;

-      const res = await fetch(url);
+      const res = await fetch(url, { headers });
      if (!res.ok) throw new Error("Failed to load history");
      const data = await res.json();

@@ -54,7 +80,7 @@ const ChatbotPanel = ({ className, onClose, hideHeader = false }: ChatbotPanelPr
      if (items.length === 0) {
        if (isRefresh && messages.length === 0) {
          // No history at all -> show default greeting
-          setMessages([{ role: "ai", content: "Hello! I'm CiCi. How can I help you with your notes today?" }]);
+          setMessages([{ role: "ai", content: "Xin chào! Mình là CuCu Assistant 😊 Bạn muốn tìm ghi chú gì hôm nay?" }]);
        }
        setHistoryCursor(null);
        return;
@@ -74,13 +100,36 @@ const ChatbotPanel = ({ className, onClose, hideHeader = false }: ChatbotPanelPr
    } catch (error) {
      console.error("Failed to load history:", error);
      if (isRefresh && messages.length === 0) {
-        setMessages([{ role: "ai", content: "Hello! I'm CiCi. How can I help you with your notes today?" }]);
+        setMessages([{ role: "ai", content: "Xin chào! Mình là CuCu Assistant 😊 Bạn muốn tìm ghi chú gì hôm nay?" }]);
      }
    } finally {
      setIsHistoryLoading(false);
    }
  };

+  const clearMessages = async () => {
+    try {
+      const token = await getClerkSessionToken();
+      const headers: Record<string, string> = {};
+      if (token) headers["Authorization"] = `Bearer ${token}`;
+
+      const res = await fetch(`${API_ORIGIN}/api/history/me`, {
+        method: "DELETE",
+        headers,
+      });
+      if (!res.ok) throw new Error("Failed to clear history");
+
+      setMessages([{ role: "ai", content: "Đã xóa lịch sử chat! Mình có thể giúp gì cho bạn?" }]);
+      setHistoryCursor(null);
+    } catch (error) {
+      console.error("Failed to clear history:", error);
+    }
+  };
+
+  useImperativeHandle(ref, () => ({
+    clearMessages,
+  }));
+
  // Load initial history on mount
  useEffect(() => {
    void loadHistory(true);
@@ -95,35 +144,174 @@ const ChatbotPanel = ({ className, onClose, hideHeader = false }: ChatbotPanelPr
    setMessages((prev) => [...prev, { role: "user", content: userMessage }]);
    setIsLoading(true);

+    // Add empty AI message placeholder for streaming
+    const aiMsgIndex = messages.length + 1; // +1 for user msg just added
+    setMessages((prev) => [...prev, { role: "ai", content: "" }]);
+
    try {
-      const response = await fetch("/api/agent/chat", {
+      const token = await getClerkSessionToken();
+      const headers: Record<string, string> = { "Content-Type": "application/json" };
+      if (token) headers["Authorization"] = `Bearer ${token}`;
+
+      const response = await fetch(`${API_ORIGIN}/api/agent/chat/stream`, {
        method: "POST",
-        headers: { "Content-Type": "application/json" },
+        headers,
        body: JSON.stringify({ user_query: userMessage })
      });

-      if (!response.ok) throw new Error("Failed to get response from AI");
+      if (!response.ok || !response.body) throw new Error("Failed to get response from AI");

-      const data = await response.json();
-      if (data.status === "success" && data.ai_response) {
-        setMessages((prev) => [...prev, { role: "ai", content: data.ai_response }]);
-      } else {
-        throw new Error("Invalid response format");
+      const reader = response.body.getReader();
+      const decoder = new TextDecoder();
+      let buffer = "";
+
+      while (true) {
+        const { done, value } = await reader.read();
+        if (done) break;
+
+        buffer += decoder.decode(value, { stream: true });
+        const lines = buffer.split("\n");
+        // Keep the last incomplete line in the buffer
+        buffer = lines.pop() || "";
+
+        for (const line of lines) {
+          const trimmed = line.trim();
+          if (!trimmed.startsWith("data: ")) continue;
+
+          try {
+            const data = JSON.parse(trimmed.slice(6));
+
+            if (data.token) {
+              // Append token to the last AI message
+              setMessages((prev) => {
+                const updated = [...prev];
+                const lastMsg = updated[updated.length - 1];
+                if (lastMsg && lastMsg.role === "ai") {
+                  updated[updated.length - 1] = {
+                    ...lastMsg,
+                    content: lastMsg.content + data.token,
+                  };
+                }
+                return updated;
+              });
+            }
+
+            if (data.done) {
+              // Final event — replace with full response (in case of parse differences)
+              if (data.ai_response) {
+                setMessages((prev) => {
+                  const updated = [...prev];
+                  const lastMsg = updated[updated.length - 1];
+                  if (lastMsg && lastMsg.role === "ai") {
+                    updated[updated.length - 1] = {
+                      ...lastMsg,
+                      content: data.ai_response,
+                    };
+                  }
+                  return updated;
+                });
+              }
+            }
+
+            if (data.error) {
+              throw new Error(data.error);
+            }
+          } catch (parseErr) {
+            // Skip malformed SSE lines
+            console.warn("SSE parse error:", parseErr);
+          }
+        }
      }
    } catch (error) {
      console.error("Chat error:", error);
-      setMessages((prev) => [...prev, { role: "ai", content: "Sorry, I'm having trouble connecting to the server. Please check your backend." }]);
+      setMessages((prev) => {
+        const updated = [...prev];
+        const lastMsg = updated[updated.length - 1];
+        if (lastMsg && lastMsg.role === "ai" && !lastMsg.content) {
+          // Replace empty placeholder with error
+          updated[updated.length - 1] = {
+            ...lastMsg,
+            content: "Xin lỗi, mình đang gặp sự cố kết nối. Vui lòng thử lại.",
+          };
+        }
+        return updated;
+      });
    } finally {
      setIsLoading(false);
    }
  };

+  // ─── Clickable dates in AI responses ───
+  const navigateToDate = useDateFilterNavigation();
+
+  const renderTextWithClickableDates = useCallback(
+    (text: string) => {
+      // Match dates in format YYYY-MM-DD (standalone or inside parentheses)
+      const dateRegex = /(\d{4}-\d{2}-\d{2})/g;
+      const parts = text.split(dateRegex);
+
+      if (parts.length === 1) return text; // No dates found
+
+      return parts.map((part, idx) => {
+        if (dateRegex.test(part)) {
+          // Reset lastIndex since we're reusing regex
+          dateRegex.lastIndex = 0;
+          return (
+            <button
+              key={idx}
+              onClick={() => navigateToDate(part)}
+              className="inline-flex items-center gap-0.5 text-primary hover:underline cursor-pointer font-medium"
+              title={`Xem ghi chú ngày ${part}`}
+              type="button"
+            >
+              <CalendarIcon className="size-3 inline" />
+              {part}
+            </button>
+          );
+        }
+        return part;
+      });
+    },
+    [navigateToDate],
+  );
+
+  const markdownComponents: Components = {
+    // Override text rendering in paragraphs, list items, strong, etc.
+    p: ({ children, ...props }) => (
+      <p {...props}>
+        {Array.isArray(children)
+          ? children.map((child, i) =>
+            typeof child === "string" ? <span key={i}>{renderTextWithClickableDates(child)}</span> : child,
+          )
+          : typeof children === "string"
+            ? renderTextWithClickableDates(children)
+            : children}
+      </p>
+    ),
+    li: ({ children, ...props }) => (
+      <li {...props}>
+        {Array.isArray(children)
+          ? children.map((child, i) =>
+            typeof child === "string" ? <span key={i}>{renderTextWithClickableDates(child)}</span> : child,
+          )
+          : typeof children === "string"
+            ? renderTextWithClickableDates(children)
+            : children}
+      </li>
+    ),
+    strong: ({ children, ...props }) => (
+      <strong {...props}>
+        {typeof children === "string" ? renderTextWithClickableDates(children) : children}
+      </strong>
+    ),
+  };
+
  return (
    <div className={cn("w-full h-full flex flex-col bg-background border rounded-lg overflow-hidden shadow-sm", className)}>
      {/* Header */}
      {!hideHeader && (
        <div className="flex items-center justify-between p-4 border-b bg-muted/30">
-          <h2 className="font-semibold text-sm">CiCi Assistant</h2>
+          <h2 className="font-semibold text-sm">CuCu Assistant</h2>
          {onClose && (
            <button onClick={onClose} className="p-1 hover:bg-muted rounded-md transition-colors">
              <XIcon className="size-4" />
@@ -156,7 +344,15 @@ const ChatbotPanel = ({ className, onClose, hideHeader = false }: ChatbotPanelPr
                ? "bg-primary text-primary-content rounded-tr-none"
                : "bg-muted text-foreground rounded-tl-none border"
            )}>
-              {msg.content}
+              {msg.role === "ai" ? (
+                <div className="markdown-content max-w-none break-words text-inherit">
+                  <ReactMarkdown remarkPlugins={[remarkGfm, remarkBreaks]} components={markdownComponents}>
+                    {msg.content}
+                  </ReactMarkdown>
+                </div>
+              ) : (
+                msg.content
+              )}
            </div>
          </div>
        ))}
@@ -179,7 +375,7 @@ const ChatbotPanel = ({ className, onClose, hideHeader = false }: ChatbotPanelPr
          <input
            value={input}
            onChange={(e) => setInput(e.target.value)}
-            placeholder="Ask CiCi something..."
+            placeholder="Hỏi CuCu gì đó..."
            className="flex-1 bg-muted/50 border-none focus:ring-1 focus:ring-primary rounded-xl px-4 py-2 text-sm outline-none transition-all"
            disabled={isLoading}
          />
@@ -194,6 +390,8 @@ const ChatbotPanel = ({ className, onClose, hideHeader = false }: ChatbotPanelPr
      </div>
    </div>
  );
-};
+});
+
+ChatbotPanel.displayName = "ChatbotPanel";

 export default ChatbotPanel;
--- a/frontend/src/components/ChatbotWidget.tsx
+++ b/frontend/src/components/ChatbotWidget.tsx
-import { MessageCircleIcon, XIcon, Maximize2Icon, Minimize2Icon } from "lucide-react";
+import { MessageCircleIcon, XIcon, Maximize2Icon, Minimize2Icon, Trash2Icon } from "lucide-react";
 import { useState, useRef, useCallback, useEffect } from "react";
 import { cn } from "@/lib/utils";
-import ChatbotPanel from "./ChatbotPanel";
+import ChatbotPanel, { type ChatbotPanelHandle } from "./ChatbotPanel";

 type Position = {
  x: number;
@@ -44,6 +44,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
  const hasDraggedRef = useRef(false);
  const dragRef = useRef<{ startX: number; startY: number; startPosX: number; startPosY: number } | null>(null);
  const animationFrameRef = useRef<number | null>(null);
+  const chatbotPanelRef = useRef<ChatbotPanelHandle>(null);

  // Save position to localStorage when it changes
  useEffect(() => {
@@ -112,11 +113,16 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
  }, []);

  const handleMouseUp = useCallback(() => {
+    setIsDragging(false);
+    dragRef.current = null;
+  }, []);
+
+  // Toggle via onClick — only if user didn't drag
+  const handleClick = useCallback((e: React.MouseEvent) => {
+    e.stopPropagation();
    if (!hasDraggedRef.current) {
      setIsOpen((prev) => !prev);
    }
-    setIsDragging(false);
-    dragRef.current = null;
  }, []);

  useEffect(() => {
@@ -189,7 +195,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
  useEffect(() => {
    if (isDragging) {
      window.addEventListener("touchmove", handleTouchMove, { passive: true });
-      window.addEventListener("touchend", handleTouchEnd, { passive: true });
+      window.addEventListener("touchend", handleTouchEnd);
      return () => {
        window.removeEventListener("touchmove", handleTouchMove);
        window.removeEventListener("touchend", handleTouchEnd);
@@ -267,11 +273,21 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
            <div className="flex items-center gap-2">
              <span className="inline-flex size-2 rounded-full bg-emerald-500" />
              <div>
-                <p className="text-sm font-semibold leading-none">CiCi Assistant</p>
+                <p className="text-sm font-semibold leading-none">CuCu Assistant</p>
                <p className="text-xs text-muted-foreground">Notes chat</p>
              </div>
            </div>
            <div className="flex items-center gap-1">
+              {/* Nút Clear Messages */}
+              <button
+                type="button"
+                className="rounded-full p-1.5 text-muted-foreground transition-colors hover:bg-destructive/10 hover:text-destructive"
+                onClick={() => chatbotPanelRef.current?.clearMessages()}
+                aria-label="Clear messages"
+                title="Xóa lịch sử chat"
+              >
+                <Trash2Icon className="size-4" />
+              </button>
              {/* Nút Expand/Collapse */}
              <button
                type="button"
@@ -301,7 +317,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {

          {/* Chat Content - không có header riêng nữa */}
          <div className="h-[calc(100%-52px)]">
-            <ChatbotPanel variant="widget" className="border-0 rounded-none" hideHeader />
+            <ChatbotPanel ref={chatbotPanelRef} variant="widget" className="border-0 rounded-none" hideHeader />
          </div>
        </div>
      )}
@@ -323,6 +339,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
            : "bg-primary text-primary-foreground",
        )}
        onMouseDown={handleMouseDown}
+        onClick={handleClick}
        onTouchStart={handleTouchStart}
        role="button"
        tabIndex={0}

--- a/frontend/src/components/CreateShortcutDialog.tsx
+++ b/frontend/src/components/CreateShortcutDialog.tsx
@@ -14,6 +14,7 @@ import useLoading from "@/hooks/useLoading";
 import { handleError } from "@/lib/error";
 import { Shortcut, ShortcutSchema } from "@/types/proto/api/v1/shortcut_service_pb";
 import { useTranslate } from "@/utils/i18n";
+import { useTagCounts } from "@/hooks/useUserQueries";

 interface Props {
  open: boolean;
@@ -26,17 +27,36 @@ function CreateShortcutDialog({ open, onOpenChange, shortcut: initialShortcut, o
  const t = useTranslate();
  const user = useCurrentUser();
  const { refetchSettings } = useAuth();
+  const { data: tagCounts } = useTagCounts(true);
  const [title, setTitle] = useState<string>(initialShortcut?.title || "");
-  const [tag, setTag] = useState<string>("");
+  const [selectedTags, setSelectedTags] = useState<string[]>([]);
+  const [tagSearch, setTagSearch] = useState("");
  const [timeRange, setTimeRange] = useState<"all" | "today" | "last7" | "last30">("all");
  const [pinnedOnly, setPinnedOnly] = useState(false);
  const [hasTaskListOnly, setHasTaskListOnly] = useState(false);
  const [hasLinkOnly, setHasLinkOnly] = useState(false);
  const [hasCodeOnly, setHasCodeOnly] = useState(false);
  const [untaggedOnly, setUntaggedOnly] = useState(false);
+  const [contentSearch, setContentSearch] = useState("");
  const requestState = useLoading(false);
  const isCreating = !initialShortcut;

+  // Get all available tags sorted by count (descending)
+  const allTags = tagCounts
+    ? Object.entries(tagCounts)
+      .sort(([, a], [, b]) => b - a)
+      .map(([tag]) => tag)
+    : [];
+
+  // Filter tags based on search input
+  const filteredTags = tagSearch.trim()
+    ? allTags.filter((tag) => tag.toLowerCase().includes(tagSearch.toLowerCase()))
+    : allTags;
+
+  const toggleTag = (tag: string) => {
+    setSelectedTags((prev) => (prev.includes(tag) ? prev.filter((t) => t !== tag) : [...prev, tag]));
+  };
+
  // Reset or hydrate form state when dialog opens / shortcut changes
  useEffect(() => {
    if (!open) return;
@@ -46,9 +66,18 @@ function CreateShortcutDialog({ open, onOpenChange, shortcut: initialShortcut, o

      const filter = initialShortcut.filter || "";

-      // Basic, best-effort parse from existing filter string
-      const tagMatch = filter.match(/"([^"]+)"\s+in\s+tags/);
-      setTag(tagMatch ? tagMatch[1] : "");
+      // Parse multiple tags from filter string
+      // Format: "tag1" in tags && "tag2" in tags
+      const tagMatches = filter.matchAll(/"([^"]+)"\s+in\s+tags/g);
+      const parsedTags: string[] = [];
+      for (const match of tagMatches) {
+        parsedTags.push(match[1]);
+      }
+      setSelectedTags(parsedTags);
+
+      // Parse content search
+      const contentMatch = filter.match(/content\.contains\("([^"]+)"\)/);
+      setContentSearch(contentMatch ? contentMatch[1] : "");

      setPinnedOnly(filter.includes("pinned"));
      setHasTaskListOnly(filter.includes("has_task_list"));
@@ -68,21 +97,28 @@ function CreateShortcutDialog({ open, onOpenChange, shortcut: initialShortcut, o
    } else {
      // New shortcut: reset to defaults
      setTitle("");
-      setTag("");
+      setSelectedTags([]);
+      setTagSearch("");
      setTimeRange("all");
      setPinnedOnly(false);
      setHasTaskListOnly(false);
      setHasLinkOnly(false);
      setHasCodeOnly(false);
      setUntaggedOnly(false);
+      setContentSearch("");
    }
  }, [open, initialShortcut]);

  const buildFilter = (): string => {
    const conditions: string[] = [];

-    if (tag.trim()) {
-      conditions.push(`"${tag.trim()}" in tags`);
+    // Multiple tags support
+    for (const tag of selectedTags) {
+      conditions.push(`"${tag}" in tags`);
+    }
+
+    if (contentSearch.trim()) {
+      conditions.push(`content.contains("${contentSearch.trim()}")`);
    }

    if (pinnedOnly) {
@@ -135,19 +171,19 @@ function CreateShortcutDialog({ open, onOpenChange, shortcut: initialShortcut, o
            filter,
          },
        });
-        toast.success("Create workspace successfully");
+        toast.success("Tạo workspace thành công! 🎉");
      } else {
        await shortcutServiceClient.updateShortcut({
          shortcut: {
            ...create(ShortcutSchema, {
-            name: initialShortcut!.name,
+              name: initialShortcut!.name,
              title: title.trim(),
              filter,
            }),
          },
          updateMask: create(FieldMaskSchema, { paths: ["title", "filter"] }),
        });
-        toast.success("Update workspace successfully");
+        toast.success("Cập nhật workspace thành công! ✅");
      }
      await refetchSettings();
      requestState.setFinish();
@@ -163,32 +199,112 @@ function CreateShortcutDialog({ open, onOpenChange, shortcut: initialShortcut, o

  return (
    <Dialog open={open} onOpenChange={onOpenChange}>
-      <DialogContent className="max-w-md">
+      <DialogContent className="max-w-lg max-h-[85vh] overflow-y-auto">
        <DialogHeader>
-          <DialogTitle>{`${isCreating ? t("common.create") : t("common.edit")} ${t("common.shortcuts")}`}</DialogTitle>
+          <DialogTitle>{`${isCreating ? t("common.create") : t("common.edit")} Workspace`}</DialogTitle>
        </DialogHeader>
        <div className="flex flex-col gap-4">
+          {/* Title */}
          <div className="grid gap-2">
            <Label htmlFor="title">{t("common.title")}</Label>
-            <Input id="title" type="text" placeholder="" value={title} onChange={(e) => setTitle(e.target.value)} />
+            <Input
+              id="title"
+              type="text"
+              placeholder="VD: 📋 Công việc, 🔥 Quan trọng..."
+              value={title}
+              onChange={(e) => setTitle(e.target.value)}
+            />
          </div>
+
+          {/* Filter conditions */}
          <div className="grid gap-2">
-            <Label className="text-sm font-medium">Điều kiện (chọn, không cần gõ cú pháp)</Label>
-            <div className="space-y-3 rounded-md border bg-muted/40 p-3">
+            <Label className="text-sm font-medium">Điều kiện lọc</Label>
+            <div className="space-y-4 rounded-md border bg-muted/40 p-3">
+
+              {/* Multi-tag selector */}
              <div className="grid gap-2">
-                <Label htmlFor="shortcut-tag" className="text-xs text-muted-foreground">
-                  Tag (tùy chọn)
+                <Label className="text-xs text-muted-foreground">
+                  🏷️ Chọn Tags ({selectedTags.length > 0 ? `đã chọn ${selectedTags.length}` : "chưa chọn"})
+                </Label>
+
+                {/* Selected tags display */}
+                {selectedTags.length > 0 && (
+                  <div className="flex flex-wrap gap-1.5 mb-1">
+                    {selectedTags.map((tag) => (
+                      <button
+                        key={tag}
+                        type="button"
+                        onClick={() => toggleTag(tag)}
+                        className="inline-flex items-center gap-1 px-2.5 py-1 rounded-full text-xs font-medium bg-primary text-primary-foreground hover:bg-primary/80 transition-colors"
+                      >
+                        #{tag}
+                        <span className="ml-0.5 text-[10px] opacity-70">✕</span>
+                      </button>
+                    ))}
+                  </div>
+                )}
+
+                {/* Search filter for tags */}
+                {allTags.length > 5 && (
+                  <Input
+                    type="text"
+                    placeholder="🔍 Tìm tag..."
+                    value={tagSearch}
+                    onChange={(e) => setTagSearch(e.target.value)}
+                    className="h-8 text-xs"
+                  />
+                )}
+
+                {/* Available tags grid */}
+                {allTags.length > 0 ? (
+                  <div className="flex flex-wrap gap-1.5 max-h-32 overflow-y-auto rounded-md border bg-background p-2">
+                    {filteredTags.map((tag) => {
+                      const isSelected = selectedTags.includes(tag);
+                      const count = tagCounts?.[tag] || 0;
+                      return (
+                        <button
+                          key={tag}
+                          type="button"
+                          onClick={() => toggleTag(tag)}
+                          className={`inline-flex items-center gap-1 px-2 py-0.5 rounded-full text-xs border transition-all cursor-pointer ${isSelected
+                              ? "bg-primary text-primary-foreground border-primary shadow-sm"
+                              : "bg-muted/50 text-muted-foreground border-border hover:border-primary/50 hover:bg-muted"
+                            }`}
+                        >
+                          <span className="opacity-60">#</span>
+                          {tag}
+                          <span className="text-[10px] opacity-50">({count})</span>
+                        </button>
+                      );
+                    })}
+                    {filteredTags.length === 0 && tagSearch && (
+                      <span className="text-xs text-muted-foreground italic p-1">Không tìm thấy tag &quot;{tagSearch}&quot;</span>
+                    )}
+                  </div>
+                ) : (
+                  <div className="text-xs text-muted-foreground italic p-2 border rounded-md bg-background">
+                    Chưa có tag nào. Hãy thêm #tag vào ghi chú trước.
+                  </div>
+                )}
+              </div>
+
+              {/* Content search */}
+              <div className="grid gap-2">
+                <Label className="text-xs text-muted-foreground">
+                  🔍 Tìm theo nội dung (tùy chọn)
                </Label>
                <Input
-                  id="shortcut-tag"
                  type="text"
-                  placeholder='Ví dụ: work, kafka'
-                  value={tag}
-                  onChange={(e) => setTag(e.target.value)}
+                  placeholder="VD: docker, kubernetes..."
+                  value={contentSearch}
+                  onChange={(e) => setContentSearch(e.target.value)}
+                  className="h-8 text-sm"
                />
              </div>
+
+              {/* Time range */}
              <div className="grid gap-2">
-                <Label className="text-xs text-muted-foreground">Thời gian</Label>
+                <Label className="text-xs text-muted-foreground">⏰ Thời gian</Label>
                <select
                  className="h-9 rounded-md border border-input bg-background px-2 text-sm"
                  value={timeRange}
@@ -200,28 +316,30 @@ function CreateShortcutDialog({ open, onOpenChange, shortcut: initialShortcut, o
                  <option value="last30">30 ngày gần đây</option>
                </select>
              </div>
+
+              {/* Content type filters */}
              <div className="grid gap-2">
-                <Label className="text-xs text-muted-foreground">Lọc theo loại nội dung</Label>
+                <Label className="text-xs text-muted-foreground">📋 Lọc theo loại nội dung</Label>
                <div className="flex flex-col gap-2 text-sm text-foreground">
                  <label className="flex items-center gap-2">
                    <Checkbox checked={pinnedOnly} onCheckedChange={(v) => setPinnedOnly(Boolean(v))} />
-                    <span>Chỉ ghi chú được ghim</span>
+                    <span>📌 Chỉ ghi chú được ghim</span>
                  </label>
                  <label className="flex items-center gap-2">
                    <Checkbox checked={hasTaskListOnly} onCheckedChange={(v) => setHasTaskListOnly(Boolean(v))} />
-                    <span>Có checklist (task list)</span>
+                    <span>✅ Có checklist (task list)</span>
                  </label>
                  <label className="flex items-center gap-2">
                    <Checkbox checked={hasLinkOnly} onCheckedChange={(v) => setHasLinkOnly(Boolean(v))} />
-                    <span>Có liên kết</span>
+                    <span>🔗 Có liên kết</span>
                  </label>
                  <label className="flex items-center gap-2">
                    <Checkbox checked={hasCodeOnly} onCheckedChange={(v) => setHasCodeOnly(Boolean(v))} />
-                    <span>Có code block</span>
+                    <span>💻 Có code block</span>
                  </label>
                  <label className="flex items-center gap-2">
                    <Checkbox checked={untaggedOnly} onCheckedChange={(v) => setUntaggedOnly(Boolean(v))} />
-                    <span>Chưa có tag (ghi chú nháp)</span>
+                    <span>📝 Chưa có tag (ghi chú nháp)</span>
                  </label>
                </div>
              </div>

--- a/frontend/src/components/StatisticsView/MonthNavigator.tsx
+++ b/frontend/src/components/StatisticsView/MonthNavigator.tsx
@@ -8,9 +8,9 @@ import type { MonthNavigatorProps } from "@/types/statistics";

 export const MonthNavigator = ({ visibleMonth, onMonthChange, activityStats }: MonthNavigatorProps) => {
  const [isOpen, setIsOpen] = useState(false);
-  const currentMonth = new Date(visibleMonth);
  const currentYear = getYearFromDate(visibleMonth);
  const currentMonthNum = getMonthFromDate(visibleMonth);
+  const currentMonth = new Date(currentYear, currentMonthNum - 1, 1);

  const handlePrevMonth = () => {
    onMonthChange(addMonths(visibleMonth, -1));

--- a/frontend/src/pages/Home.tsx
+++ b/frontend/src/pages/Home.tsx
- import { useEffect } from "react";
+import { useEffect } from "react";
 import { useSearchParams } from "react-router-dom";
 import { MemoRenderContext } from "@/components/MasonryView";
 import MemoView from "@/components/MemoView";
@@ -53,7 +53,7 @@ const Home = () => {
        renderer={(memo: Memo, context?: MemoRenderContext) => (
          <MemoView key={`${memo.name}-${memo.displayTime}`} memo={memo} showVisibility showPinned compact={context?.compact} />
        )}
-        listSort={(memos) => memos.filter((m) => !m.pinned)} // Exclude pinned from regular list
+        listSort={listSort}
        orderBy={orderBy}
        filter={memoFilter}
      />

--- a/frontend/src/service/apiClient.ts
+++ b/frontend/src/service/apiClient.ts
@@ -2,9 +2,10 @@ import { redirectOnAuthFailure } from "@/utils/auth-redirect";
 import { getClerkSessionToken } from "@/utils/clerk";
 import type { RequestOptions } from "./types";

-// Call backend directly (bypass Vite proxy).
-// Override via VITE_API_BASE_URL, e.g. "http://localhost:5000"
-export const API_ORIGIN = (import.meta.env.VITE_API_BASE_URL as string | undefined) || "http://localhost:5000";
+// API origin - empty string = relative URLs (proxied via nginx in Docker).
+// Override via VITE_API_BASE_URL, e.g. "http://localhost:5000" for local dev without Docker.
+const _envOrigin = import.meta.env.VITE_API_BASE_URL as string | undefined;
+export const API_ORIGIN: string = (_envOrigin !== undefined && _envOrigin !== "") ? _envOrigin : "";
 export const API_BASE = `${API_ORIGIN}/api/v1`;

 const parseBody = async (response: Response): Promise<unknown> => {