Update backend agent, API routes, frontend components, docker config & .gitignore

2fec4891 · root · fe844925 · 2fec4891 · 2fec4891 · 2fec4891
Commit 2fec4891 authored Mar 01, 2026 by root
39 changed files
--- a/.gitignore
+++ b/.gitignore
@@ -69,3 +69,20 @@ run.txt
 # Ralph config (may contain API key)
 .cursor/ralph-config.json
+# Docker cache
+.docker/
+# Environment
+.env
+# Frontend dist probes
+frontend/dist-*/
+# Temp configs
+frontend/vite.probe*.config.mjs
+frontend/vite.temp*.mjs
+frontend/vite.trace*.config.mjs
+frontend/vite.no-mermaid.config.mjs
+frontend/src/__build_probe*.ts
+frontend/src/__probe_*.ts
--- a/backend/agent/controller.py
+++ b/backend/agent/controller.py
@@ -3,6 +3,7 @@ Fashion Q&A Agent Controller
 Langfuse will auto-trace via LangChain integration (no code changes needed).
 """
+import json
 import logging
 import time
 import uuid
@@ -78,19 +79,31 @@ async def chat_controller(
    memory = await get_conversation_manager()
    # Load History
-    history_dicts = await memory.get_chat_history(effective_identity_key, limit=20)
+    history_dicts = await memory.get_chat_history(effective_identity_key, limit=5)
-    history_messages: list[BaseMessage] = [
+    history_messages: list[BaseMessage] = []
-        HumanMessage(content=m["message"]) if m["is_human"] else AIMessage(content=m["message"])
+    for m in history_dicts:
-        for m in history_dicts
+        if m["is_human"]:
-    ]
+            history_messages.append(HumanMessage(content=m["message"]))
+        else:
+            # AI responses may be saved as JSON — extract readable text
+            ai_content = m["message"]
+            try:
+                parsed = json.loads(ai_content)
+                if isinstance(parsed, dict) and "ai_response" in parsed:
+                    ai_content = parsed["ai_response"]
+            except (json.JSONDecodeError, TypeError):
+                pass
+            history_messages.append(AIMessage(content=ai_content))
    # Prepare State
+    # - history: previous conversation messages (for context)
+    # - messages: starts with current query only (tool calls will be appended by LangGraph)
+    # - user_query: current user message
    user_query_message: BaseMessage = HumanMessage(content=query)
-    messages_with_query: list[BaseMessage] = [*history_messages, user_query_message]
    initial_state: AgentState = {
        "user_query": user_query_message,
-        "messages": messages_with_query,
+        "messages": [user_query_message],
        "history": history_messages,
        "user_id": user_id,
        "images_embedding": [],
@@ -124,7 +137,9 @@ async def chat_controller(
    # Parse Response
    all_product_ids = extract_product_ids(result.get("messages", []))
    ai_raw_content = result.get("ai_response").content if result.get("ai_response") else ""
+    logger.info("RAW LLM output (%d chars): %s", len(ai_raw_content), ai_raw_content[:500])
    ai_text_response, final_product_ids = parse_ai_response(ai_raw_content, all_product_ids)
+    logger.info("PARSED ai_response (%d chars): %s", len(ai_text_response), ai_text_response[:300])
    response_payload = {
        "ai_response": ai_text_response,
@@ -153,3 +168,157 @@ async def chat_controller(
    logger.info("chat_controller finished in %.2fs", duration)
    return {**response_payload, "cached": False}
+async def chat_controller_stream(
+    query: str,
+    user_id: str,
+    model_name: str = DEFAULT_MODEL,
+    images: list[str] | None = None,
+    identity_key: str | None = None,
+):
+    """
+    Streaming controller — yields SSE events with token chunks.
+    Each yield is a JSON string:
+      {"token": "partial text"}       — during streaming
+      {"done": true, "ai_response": "full text"}  — final event
+    History is saved after stream completes.
+    """
+    effective_identity_key = identity_key or user_id
+    logger.info(
+        "chat_controller_stream start: model=%s, user_id=%s",
+        model_name, user_id,
+    )
+    # ====================== CACHE LAYER ======================
+    if REDIS_CACHE_TURN_ON:
+        cached_response = await redis_cache.get_response(
+            user_id=effective_identity_key, query=query
+        )
+        if cached_response:
+            logger.info(f"⚡ CACHE HIT (stream) for identity_key={effective_identity_key}")
+            # Stream cached response as one chunk
+            ai_text = cached_response.get("ai_response", "")
+            yield json.dumps({"token": ai_text}, ensure_ascii=False)
+            yield json.dumps({"done": True, "ai_response": ai_text}, ensure_ascii=False)
+            # Save history
+            memory = await get_conversation_manager()
+            await handle_post_chat_async(
+                memory=memory,
+                identity_key=effective_identity_key,
+                human_query=query,
+                ai_response=cached_response,
+            )
+            return
+    # ====================== STREAM LLM FLOW ======================
+    graph = build_graph()
+    memory = await get_conversation_manager()
+    # Load History
+    history_dicts = await memory.get_chat_history(effective_identity_key, limit=5)
+    history_messages: list[BaseMessage] = []
+    for m in history_dicts:
+        if m["is_human"]:
+            history_messages.append(HumanMessage(content=m["message"]))
+        else:
+            ai_content = m["message"]
+            try:
+                parsed = json.loads(ai_content)
+                if isinstance(parsed, dict) and "ai_response" in parsed:
+                    ai_content = parsed["ai_response"]
+            except (json.JSONDecodeError, TypeError):
+                pass
+            history_messages.append(AIMessage(content=ai_content))
+    user_query_message: BaseMessage = HumanMessage(content=query)
+    initial_state: AgentState = {
+        "user_query": user_query_message,
+        "messages": [user_query_message],
+        "history": history_messages,
+        "user_id": user_id,
+        "images_embedding": [],
+        "ai_response": None,
+    }
+    run_uuid = uuid.uuid4()
+    run_id_str = str(run_uuid)
+    langfuse_handler = get_callback_handler()
+    exec_config = RunnableConfig(
+        configurable={
+            "user_id": user_id,
+            "transient_images": images or [],
+            "run_id": run_id_str,
+        },
+        run_id=run_uuid,
+        metadata={"run_id": run_id_str, "tags": "chatbot,production,stream"},
+        callbacks=[langfuse_handler] if langfuse_handler else [],
+    )
+    # Stream using astream_events
+    start_time = time.time()
+    full_response = ""
+    is_final_response = False
+    session_id = f"{user_id}-{run_id_str[:8]}"
+    with propagate_attributes(user_id=user_id, session_id=session_id):
+        async for event in graph.astream_events(
+            initial_state, config=exec_config, version="v2"
+        ):
+            kind = event.get("event", "")
+            # Only stream tokens from the chat model (not tool calls)
+            if kind == "on_chat_model_stream":
+                chunk = event.get("data", {}).get("chunk")
+                if chunk and hasattr(chunk, "content") and chunk.content:
+                    # Skip if this is a tool_call chunk (no text content)
+                    if hasattr(chunk, "tool_calls") and chunk.tool_calls:
+                        continue
+                    # Only stream the FINAL response (after tool execution)
+                    # We detect this by tracking: if tools were called, 
+                    # the final agent response comes after tool results
+                    token = chunk.content
+                    full_response += token
+                    is_final_response = True
+                    yield json.dumps({"token": token}, ensure_ascii=False)
+            # When the agent finishes and we got tokens, prepare to end
+            elif kind == "on_chain_end" and event.get("name") == "agent":
+                if is_final_response:
+                    # Reset for potential next agent iteration
+                    pass
+    duration = time.time() - start_time
+    logger.info("chat_controller_stream finished in %.2fs (%d chars)", duration, len(full_response))
+    # Parse and yield final event
+    ai_text_response, _ = parse_ai_response(full_response, [])
+    yield json.dumps({"done": True, "ai_response": ai_text_response}, ensure_ascii=False)
+    # Build response payload for caching and history
+    response_payload = {
+        "ai_response": ai_text_response,
+        "product_ids": [],
+    }
+    # Save to cache
+    if REDIS_CACHE_TURN_ON:
+        await redis_cache.set_response(
+            user_id=effective_identity_key,
+            query=query,
+            response_data=response_payload,
+            ttl=300,
+        )
+    # Save to history
+    await handle_post_chat_async(
+        memory=memory,
+        identity_key=effective_identity_key,
+        human_query=query,
+        ai_response=response_payload,
+    )
--- a/backend/agent/graph.py
+++ b/backend/agent/graph.py
@@ -46,29 +46,31 @@ class CANIFAGraph:
        self.retrieval_tools = self.all_tools
        self.llm_with_tools = self.llm.bind_tools(self.all_tools, strict=True)
-        self.system_prompt = get_system_prompt()
+        # NOTE: prompt is NOT cached here — fetched fresh each request
-        self.prompt_template = ChatPromptTemplate.from_messages(
+        # so Langfuse updates take effect immediately.
+        self.cache = InMemoryCache()
+    def _build_chain(self):
+        """Build chain with fresh system prompt (from Langfuse or local fallback)."""
+        system_prompt = get_system_prompt()
+        prompt_template = ChatPromptTemplate.from_messages(
            [
-                ("system", self.system_prompt),
+                ("system", system_prompt),
                MessagesPlaceholder(variable_name="history"),
                MessagesPlaceholder(variable_name="user_query"),
                MessagesPlaceholder(variable_name="messages"),
            ]
        )
-        self.chain = self.prompt_template | self.llm_with_tools
+        return prompt_template | self.llm_with_tools
-        self.cache = InMemoryCache()
    async def _agent_node(self, state: AgentState, config: RunnableConfig) -> dict:
-        """Agent node - Chỉ việc đổ dữ liệu riêng vào khuôn đã có sẵn."""
+        """Agent node — rebuilds chain each call for realtime prompt updates."""
        messages = state.get("messages", [])
        history = state.get("history", [])
        user_query = state.get("user_query")
-        transient_images = config.get("configurable", {}).get("transient_images", [])
+        chain = self._build_chain()
-        if transient_images and messages:
+        response = await chain.ainvoke({
-            pass
-        # Invoke chain with user_query, history, and messages
-        response = await self.chain.ainvoke({
            "user_query": [user_query] if user_query else [],
            "history": history,
            "messages": messages

--- a/backend/agent/helper.py
+++ b/backend/agent/helper.py
@@ -10,7 +10,7 @@ import uuid
 from langchain_core.messages import HumanMessage, ToolMessage
 from langchain_core.runnables import RunnableConfig
-from common.conversation_manager import ConversationManager
+from common.conversation_manager import MongoDBConversationManager
 from common.langfuse_client import get_callback_handler
 from .models import AgentState
@@ -137,7 +137,7 @@ def prepare_execution_context(query: str, user_id: str, history: list, images: l
 async def handle_post_chat_async(
-    memory: ConversationManager, 
+    memory: MongoDBConversationManager, 
    identity_key: str, 
    human_query: str, 
    ai_response: dict | None

--- a/backend/agent/prompt.py
+++ b/backend/agent/prompt.py
 """
-CiCi Fashion Consultant - System Prompt
+CuCu Assistant - System Prompt
-Version 3.0 - Dynamic from File
+Supports two modes:
+  1. Langfuse prompt management (realtime, editable from Langfuse dashboard)
+  2. Local fallback (inline template)
 """
-import os
+import logging
 from datetime import datetime
+from functools import lru_cache
-PROMPT_FILE_PATH = os.path.join(os.path.dirname(__file__), "system_prompt.txt")
+from common.timezone_config import VIETNAM_TZ
+logger = logging.getLogger(__name__)
-def _ensure_json_instruction(prompt_text: str) -> str:
+# Vietnamese weekday names
-    if "json" in prompt_text.lower():
+_WEEKDAY_MAP = {
-        return prompt_text
+    0: "Thứ 2",
-    return f"{prompt_text}\n\nReturn JSON (json) object with keys: ai_response, product_ids."
+    1: "Thứ 3",
+    2: "Thứ 4",
+    3: "Thứ 5",
+    4: "Thứ 6",
+    5: "Thứ 7",
+    6: "Chủ nhật",
+}
-def get_system_prompt() -> str:
+def _get_weekday_str() -> str:
-    """
+    return _WEEKDAY_MAP[datetime.now(VIETNAM_TZ).weekday()]
-    System prompt for CiCi Fashion Agent.
+# ──────────────────────────── Local template ────────────────────────────
+# This is the SAME prompt pushed to Langfuse via scripts/push_prompt_to_langfuse.py
+# {{date_str}} is the only variable, replaced at runtime.
+_PROMPT_TEMPLATE = """# VAI TRÒ
+Bạn là **CuCu Assistant** - Trợ lý quản lý ghi chú cá nhân (Memos).
+- Thông minh, ngắn gọn, đi thẳng vào vấn đề.
+- NHIỆM VỤ DUY NHẤT: Giúp người dùng tìm kiếm và truy vấn lại các ghi chú (memos) họ đã lưu.
+- Hôm nay: {{date_str}} ({{weekday_str}})
+---
+# QUY TẮC SỬ DỤNG TOOL "memo_retrieval_tool"
+## 0. KHI NÀO GỌI TOOL vs KHÔNG GỌI
+### KHÔNG gọi tool (chỉ chào lại):
+- Câu CHỈ có lời chào, KHÔNG nhắc gì đến note/ghi chú/chủ đề: "hello", "hi bro", "chào em"
+### CÓ gọi tool (ưu tiên tìm kiếm):
+- Câu có nhắc đến **bất kỳ từ khóa nào** liên quan note/ghi chú/chủ đề, DÙ CÓ LỜI CHÀO đi kèm:
+  - "chào em, tao note kafka hôm nào ấy" → GỌI TOOL tìm kafka
+  - "hello, hôm qua tao note gì" → GỌI TOOL tìm theo ngày
+  - "ê bro, tìm note về meeting" → GỌI TOOL tìm meeting
+**NGUYÊN TẮC: Nếu câu có chứa từ khóa/chủ đề/topic → LUÔN GỌI TOOL, bỏ qua phần chào hỏi.**
+## 1. TỰ TÍNH TOÁN NGÀY THÁNG
+Bạn PHẢI tự tính ngày cụ thể (YYYY-MM-DD) dựa trên "Hôm nay: {{date_str}} ({{weekday_str}})".
+Quy ước thứ: Thứ 2 = Monday, Thứ 3 = Tuesday, Thứ 4 = Wednesday, Thứ 5 = Thursday, Thứ 6 = Friday, Thứ 7 = Saturday, Chủ nhật = Sunday.
+### CÁC MỐC THỜI GIAN THÔNG DỤNG:
+- "Hôm nay" → `start_date` = `end_date` = {{date_str}}
+- "Hôm qua" → `start_date` = `end_date` = {{date_str}} - 1 ngày
+- "Tuần trước" (không nói ngày cụ thể) → `start_date` = thứ 2 tuần trước, `end_date` = chủ nhật tuần trước
+- "Tuần này" → `start_date` = thứ 2 tuần này, `end_date` = {{date_str}}
+- "Tháng này" → `start_date` = ngày đầu tháng, `end_date` = {{date_str}}
+- "Năm nay" → `start_date` = ngày 1/1, `end_date` = {{date_str}}
+### CỰC KỲ QUAN TRỌNG — "THỨ X TUẦN TRƯỚC/NÀY" = ĐÚNG 1 NGÀY:
+Khi user nói "thứ X tuần trước" hoặc "thứ X tuần này", tính ra ĐÚNG 1 NGÀY cụ thể:
+- `start_date` = `end_date` = ngày đó (YYYY-MM-DD)
+- Ví dụ: Nếu hôm nay là 2026-02-25 (Thứ 4), thì:
+  - "thứ 5 tuần trước" → 2026-02-19 (chỉ 1 ngày!)
+  - "thứ 2 tuần này" → 2026-02-23 (chỉ 1 ngày!)
+  - "thứ 6 tuần trước" → 2026-02-20 (chỉ 1 ngày!)
+- **KHÔNG ĐƯỢC dùng range cả tuần** — user hỏi đúng 1 ngày thì trả đúng 1 ngày!
+### KHI USER HỎI "HÔM NÀO / NGÀY NÀO / BAO GIỜ":
+- "Kafka note hôm nào ấy nhỉ?" = User ĐANG HỎI ngày → tìm ALL dates
+- "Tao note cái đó khi nào?" = User ĐANG HỎI ngày → tìm ALL dates
+- → Dùng range rộng: `start_date` = "2020-01-01", `end_date` = {{date_str}}
+- **KHÔNG ĐƯỢC HỎI LẠI "ngày nào?"** — vì user đang nhờ bot tìm ngày!
+- → **CHỈ TRẢ VỀ NGÀY**, ví dụ: "Bạn note cái đó vào ngày **2026-02-05** (Thứ 5) nhé!"
+- **KHÔNG cần trích dẫn toàn bộ nội dung** khi user hỏi "hôm nào/khi nào" — user chỉ cần biết NGÀY.
+- Nếu tìm thấy nhiều memo khớp, liệt kê ngày của từng memo.
+### KHI KHÔNG NHẮC THỜI GIAN:
+- "Tìm note về X" → range rộng: `start_date` = "2020-01-01", `end_date` = {{date_str}}
+## 2. PHÂN TÍCH PARAMETERS
+### NGUYÊN TẮC: CHỈ THÊM PARAMETER KHI USER NÓI RÕ
+- **`content_search`**: Khi user nhắc từ khóa: "về Kafka", "pass wifi", "meeting"
+- **`tag`**: Khi user nhắc tag: "#work", "#idea"
+- **KHÔNG THÊM** content_search/tag nếu user chỉ hỏi theo ngày
-    Returns:
+### VÍ DỤ:
-        str: System prompt with the current date.
+- "Hôm qua note gì?" → ✅ date only
+- "Note kafka hôm nào?" → ✅ `content_search="kafka"` + range rộng
+- "Tìm note #work tuần này" → ✅ `tag="work"` + date tuần này
+## 3. KHI NÀO HỎI LẠI USER
+- **CHỈ hỏi lại khi THẬT SỰ không có thông tin gì**: "Tìm note", "Tìm cái đó"
+- **KHÔNG HỎI LẠI** nếu có bất kỳ keyword nào: "note kafka hôm nào" → đủ rồi, GỌI TOOL
+- **KHÔNG BAO GIỜ hỏi lại ngày** nếu user đang hỏi "hôm nào/khi nào" → dùng range rộng
+---
+# QUY TẮC TRẢ LỜI (CỰC KỲ QUAN TRỌNG)
+1. **NGẮN GỌN DƯỚI 100 TỪ**: Trả lời súc tích, đi thẳng vấn đề. KHÔNG dài dòng.
+2. **TÓM TẮT NỘI DUNG**: Mỗi memo chỉ hiển thị **tóm tắt ngắn gọn** (tối đa 15 từ), KHÔNG trích dẫn toàn bộ nội dung.
+3. **FORMAT**: 
+   - **📝 (YYYY-MM-DD):** [tóm tắt ngắn gọn nội dung]
+   - Nếu nhiều memo, liệt kê dạng danh sách bullet
+4. **KHÔNG BỊA ĐẶT**: Không tự chế nội dung.
+5. **NGÔN NGỮ**: Thân thiện, tự nhiên, như nói chuyện với bạn.
+6. Nếu count=0: "Không tìm thấy ghi chú nào 🤷"
+7. **Trả lời bằng text thuần/markdown**, KHÔNG wrap JSON.
+8. **CHỈ HIỂN THỊ ĐẦY ĐỦ** khi user yêu cầu rõ: "cho xem chi tiết", "đọc full nội dung"."""
+def get_system_prompt_template() -> str:
+    """Return the raw prompt template with {{date_str}} placeholder.
+    Used by the push script to upload to Langfuse.
    """
-    date_str = datetime.now().strftime("%d/%m/%Y")
+    return _PROMPT_TEMPLATE
+def _fetch_langfuse_prompt() -> str | None:
+    """
+    Try to fetch the latest prompt from Langfuse.
+    Returns the compiled prompt string, or None if unavailable.
+    Uses Langfuse's built-in caching (default TTL=60s).
+    """
    try:
-        if os.path.exists(PROMPT_FILE_PATH):
+        from common.langfuse_client import get_langfuse_client
-            with open(PROMPT_FILE_PATH, "r", encoding="utf-8") as handle:
+        client = get_langfuse_client()
-                prompt_template = handle.read()
+        if not client:
-            rendered = prompt_template.replace("{date_str}", date_str)
+            return None
-            return _ensure_json_instruction(rendered)
-    except Exception as exc:
+        prompt = client.get_prompt(
-        print(f"Error reading system prompt file: {exc}")
+            name="cucu-system-prompt",
+            label="production",
-    fallback = f"""# ROLE
+            cache_ttl_seconds=60,  # Re-fetch every 60s
-You are CiCi, a CANIFA fashion assistant.
+        )
-Today: {date_str}
+        date_str = datetime.now(VIETNAM_TZ).strftime("%Y-%m-%d")
+        weekday_str = _get_weekday_str()
-Never fabricate. Keep responses concise.
+        compiled = prompt.compile(date_str=date_str, weekday_str=weekday_str)
-"""
+        logger.info("✅ Prompt fetched from Langfuse (version=%s)", prompt.version)
-    return _ensure_json_instruction(fallback)
+        return compiled
+    except Exception as e:
+        logger.warning("⚠️ Langfuse prompt fetch failed: %s — using local fallback", e)
+        return None
+def get_system_prompt() -> str:
+    """
+    Get the system prompt. Priority:
+      1. Langfuse prompt management (realtime, editable)
+      2. Local fallback template
+    """
+    # Try Langfuse first
+    langfuse_prompt = _fetch_langfuse_prompt()
+    if langfuse_prompt:
+        return langfuse_prompt
+    # Fallback to local template
+    date_str = datetime.now(VIETNAM_TZ).strftime("%Y-%m-%d")
+    weekday_str = _get_weekday_str()
+    prompt = _PROMPT_TEMPLATE.replace("{{date_str}}", date_str).replace("{{weekday_str}}", weekday_str)
+    logger.info("📝 Using local prompt fallback (date=%s, weekday=%s)", date_str, weekday_str)
+    return prompt
--- a/backend/agent/system_prompt.txt
+++ b/backend/agent/system_prompt.txt
-# VAI TRÒ
-Bạn là **CuCu Assistant** - Trợ lý quản lý ghi chú cá nhân (Memos).
- Thông minh, ngắn gọn, đi thẳng vào vấn đề.
- NHIỆM VỤ DUY NHẤT: Giúp người dùng tìm kiếm và truy vấn lại các ghi chú (memos) họ đã lưu.
- Hôm nay: {date_str}
---
-# QUY TẮC SỬ DỤNG TOOL "memo_retrieval_tool"
-Bạn chỉ có 1 tool duy nhất là `memo_retrieval_tool`. Hãy sử dụng nó thông minh.
-## 0. KHI NÀO **KHÔNG** ĐƯỢC GỌI TOOL (QUAN TRỌNG)
- Nếu user **chỉ chào hỏi / small talk** (VD: "chào em", "hello", "hi bro", "alo"), hãy:
-  - Trả lời lại một câu chào ngắn gọn, KHÔNG gọi `memo_retrieval_tool`.
- Chỉ gọi tool khi user hỏi RÕ về **ghi chú / note / ngày / nội dung / tag**.
- Không được "đoán" là user đang hỏi về Kafka, work, v.v. nếu câu hiện tại **chỉ là lời chào**.
- Không dùng **câu hỏi cũ** để tự ý gọi tool cho câu mới nếu câu mới chỉ là lời chào.
-## 1. TỰ TÍNH TOÁN NGÀY THÁNG (QUAN TRỌNG)
-Người dùng sẽ hỏi ngày tương đối (hôm qua, tuần trước...). Bạn PHẢI tự tính ra ngày cụ thể (YYYY-MM-DD) dựa trên "Hôm nay: {date_str}".
- **"Hôm nay note gì?"**
-  → `start_date` = {date_str}, `end_date` = {date_str}
- **"Hôm qua note gì?"**
-  → `start_date` = {date_str} - 1 ngày
- **"Hôm kia note gì?"**
-  → `start_date` = {date_str} - 2 ngày
- **"Tuần trước note gì?"**
-  → `start_date` = {date_str} - 7 ngày, `end_date` = {date_str} (hoặc range cụ thể của tuần trước)
- **"Tháng 1 note gì?"**
-  → `start_date` = "2026-01-01", `end_date` = "2026-01-31"
-## 2. PHÂN TÍCH PARAMETERS
- **`content_search`**: Dùng khi user hỏi về nội dung (VD: "dự án A", "pass wifi", "số điện thoại"). Dùng Regex nên hãy chọn keyword đặc trưng.
- **`tag`**: Dùng khi user nhắc đến tag/chủ đề (VD: "#work", "#idea"). Chỉ điền nếu user KHẲNG ĐỊNH là tag.
-### 2.1. XỬ LÝ "TOPIC" THÀNH TAG / CONTENT_SEARCH
- Nếu user nhắc đến **một chủ đề ngắn gọn** (VD: "Kafka", "English", "health") nhưng **không có dấu #**:
-  - Hãy coi đó là một **topic**.
-  - Nếu topic là **một từ đơn, không có khoảng trắng** (VD: "Kafka", "work"):
-    - ƯU TIÊN map thành `tag` (VD: `tag="kafka"` hoặc `tag="work"` — KHÔNG cần dấu `#`).
-  - Nếu topic là **cụm từ dài** (VD: "Kafka performance", "meeting tuần trước"):
-    - Map thành `content_search` (VD: `content_search="Kafka performance"`).
- Nếu không chắc đó là tag hay chỉ là từ khóa nội dung:
-  - Có thể điền **cả hai**:
-    - `tag="kafka"`
-    - `content_search="Kafka"`
-  - Khi đó MongoDB sẽ lọc theo tag (nếu note có tag) và/hoặc nội dung có chứa keyword.
-## 3. VÍ DỤ GỌI TOOL
-**Case 1: Hỏi theo ngày**
-*Input: "Hôm qua tao có note gì không?" (Giả sử hôm nay 2026-01-24)*
-→ Bot tính: Hôm qua = 2026-01-23
-→ Tool call: `memo_retrieval_tool(start_date="2026-01-23")`
-**Case 2: Tìm nội dung + Ngày**
-*Input: "Tuần này tao note gì về 'meeting'?" (Hôm nay 2026-01-24)*
-→ Bot tính: Tuần này ~ 2026-01-19 đến 2026-01-25
-→ Tool call:
-```python
-memo_retrieval_tool(
-    start_date="2026-01-19",
-    end_date="2026-01-25",
-    content_search="meeting"
-)
-```
-**Case 3: Tìm theo tag**
-*Input: "Tìm mấy cái note #idea tháng trước"*
-→ Tool call:
-```python
-memo_retrieval_tool(
-    start_date="2025-12-01",
-    end_date="2025-12-31",
-    tag="idea"
-)
-```
-**Case 4: Tìm nội dung chung chung (Không rõ ngày)**
-*Input: "Tìm lại pass wifi"*
-→ Bot tự chọn range rộng hoặc không giới hạn (tùy tool support, ở đây tool bắt buộc start_date thì lấy ngày xa xưa hoặc 1 tháng gần nhất tùy ngữ cảnh, hoặc hỏi lại user. NHƯNG tốt nhất cứ search 1 năm gần đây).
-→ Tool call: `memo_retrieval_tool(start_date="2025-01-01", content_search="pass wifi")`
---
-# QUY TẮC TRẢ LỜI (RESPONSE)
-1. **DỰA TRÊN KẾT QUẢ TOOL**:
-   - Nếu có memos: Liệt kê ngắn gọn, trích dẫn nội dung chính.
-   - Nếu `count` = 0: Trả lời "Không tìm thấy ghi chú nào trong khoảng thời gian này/với từ khóa này."
-2. **KHÔNG BỊA ĐẶT**: Không tự chế ra nội dung memo không có trong data.
-3. **FORMAT MENU**:
-   - Ghi chú 1 (2026-01-24): [Nội dung tóm tắt]
-   - Ghi chú 2 (2026-01-23): [Nội dung tóm tắt]
-4. **NGÔN NGỮ**: Giao tiếp tự nhiên, thân thiện (bro-style nếu user thích, hoặc lịch sự mặc định).
---
-# FORMAT ĐẦU RA (JSON)
-Bot trả lời dưới dạng JSON (để Frontend render hoặc parse):
-```json
-{{
-    "ai_response": "Đây là các ghi chú mình tìm thấy hôm qua...",
-    "found_memos": [
-        {{
-            "id": "...",
-            "content": "...",
-            "created_at": "..."
-        }}
-    ]
-}}
-```
-*Lưu ý: Nếu tool trả về data, hãy tóm tắt vào `ai_response` và dán raw data vào `found_memos` nếu cần.*
\ No newline at end of file
--- a/backend/agent/tools/memo_retrieval_tool.py
+++ b/backend/agent/tools/memo_retrieval_tool.py
--- a/backend/api/chatbot/chat_route.py
+++ b/backend/api/chatbot/chat_route.py
 """
 Chatbot API Route
 -----------------
-`POST /api/agent/chat` - chính là endpoint chat cho CiCi Assistant.
+`POST /api/agent/chat` - endpoint chat cho CuCu Assistant (non-streaming).
-Logic xử lý nằm ở `agent.controller.chat_controller`.
+`POST /api/agent/chat/stream` - endpoint chat SSE streaming.
+Logic xử lý nằm ở `agent.controller`.
 """
 import logging
 from dataclasses import dataclass
+from typing import AsyncGenerator
 from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
+from fastapi.responses import StreamingResponse
 from opentelemetry import trace
-from agent.controller import chat_controller
+# Lazy imports - defer heavy AI modules to first use
-from agent.models import QueryRequest
+# from agent.controller import chat_controller, chat_controller_stream
+# from agent.models import QueryRequest
 from common.message_limit import message_limit_service
 from config import DEFAULT_MODEL
@@ -20,6 +24,26 @@ logger = logging.getLogger(__name__)
 tracer = trace.get_tracer(__name__)
 router = APIRouter()
+# Cache for lazy-loaded modules
+_agent_modules = {}
+def _get_agent_controller():
+    """Lazy-load agent.controller to defer LangChain/LangGraph import."""
+    if "controller" not in _agent_modules:
+        from agent.controller import chat_controller, chat_controller_stream
+        _agent_modules["controller"] = chat_controller
+        _agent_modules["controller_stream"] = chat_controller_stream
+    return _agent_modules["controller"], _agent_modules["controller_stream"]
+def _get_query_request_model():
+    """Lazy-load agent.models.QueryRequest."""
+    if "QueryRequest" not in _agent_modules:
+        from agent.models import QueryRequest
+        _agent_modules["QueryRequest"] = QueryRequest
+    return _agent_modules["QueryRequest"]
 @dataclass
 class Identity:
@@ -41,7 +65,6 @@ def _get_identity(request: Request) -> Identity:
    history_key = primary_id
    rate_limit_key = primary_id
  else:
-    # Guest: dùng device_id, fallback 'anonymous'
    primary_id = device_id or "anonymous"
    history_key = device_id or "anonymous"
    rate_limit_key = device_id or "anonymous"
@@ -54,38 +77,35 @@ def _get_identity(request: Request) -> Identity:
  )
-@router.post("/api/agent/chat", summary="Chat with CiCi Assistant")
+@router.post("/api/agent/chat", summary="Chat with CuCu Assistant")
-async def cici_chat(request: Request, req: QueryRequest, background_tasks: BackgroundTasks):
+async def cucu_chat(request: Request, background_tasks: BackgroundTasks):
-  """
+  """Endpoint chat không stream - trả về response JSON đầy đủ một lần."""
-  Endpoint chat không stream - trả về response JSON đầy đủ một lần.
+  # Lazy-load AI modules on first call
+  QueryRequest = _get_query_request_model()
+  chat_controller, _ = _get_agent_controller()
+  body = await request.json()
+  req = QueryRequest(**body)
-  - Tự lấy user/device từ middleware (`get_user_identity`)
-  - Gọi `chat_controller` để xử lý toàn bộ logic LLM + tools
-  - Tự tăng counter rate limit sau khi trả lời xong
-  """
-  # 1. Xác định identity
  identity = _get_identity(request)
  user_id = identity.primary_id
  logger.info("📥 [Incoming Chat] User=%s | Query=%s", user_id, req.user_query)
-  # Span cho tracing (optional)
  span = trace.get_current_span()
  span.set_attribute("user.id", user_id)
  span.set_attribute("chat.user_query", req.user_query)
  try:
-    # 2. Gọi controller xử lý
    result = await chat_controller(
      query=req.user_query,
      user_id=user_id,
      background_tasks=background_tasks,
      model_name=DEFAULT_MODEL,
      images=req.images,
-      identity_key=identity.history_key,  # Guest: device_id, User: user_id
+      identity_key=identity.history_key,
    )
-    # 3. Tăng usage info (rate limit) sau khi thành công
    usage_info = await message_limit_service.increment(
      identity_key=identity.rate_limit_key,
      is_authenticated=identity.is_authenticated,
@@ -104,7 +124,58 @@ async def cici_chat(request: Request, req: QueryRequest, background_tasks: Backg
      },
    }
  except Exception as e:
-    logger.error("Error in cici_chat: %s", e, exc_info=True)
+    logger.error("Error in cucu_chat: %s", e, exc_info=True)
    raise HTTPException(status_code=500, detail=str(e)) from e
+@router.post("/api/agent/chat/stream", summary="Chat with CuCu Assistant (SSE Streaming)")
+async def cucu_chat_stream(request: Request):
+  """
+  Endpoint chat SSE streaming — trả về token-by-token qua Server-Sent Events.
+  SSE format:
+    data: {"token": "partial"}\n\n     — mỗi token chunk
+    data: {"done": true, "ai_response": "full text"}\n\n  — kết thúc
+  """
+  # Lazy-load AI modules on first call
+  QueryRequest = _get_query_request_model()
+  _, chat_controller_stream = _get_agent_controller()
+  body = await request.json()
+  req = QueryRequest(**body)
+  identity = _get_identity(request)
+  user_id = identity.primary_id
+  logger.info("📥 [Incoming Stream] User=%s | Query=%s", user_id, req.user_query)
+  async def sse_generator() -> AsyncGenerator[str, None]:
+    try:
+      async for chunk_json in chat_controller_stream(
+        query=req.user_query,
+        user_id=user_id,
+        model_name=DEFAULT_MODEL,
+        images=req.images,
+        identity_key=identity.history_key,
+      ):
+        yield f"data: {chunk_json}\n\n"
+      # Increment rate limit after stream completes
+      await message_limit_service.increment(
+        identity_key=identity.rate_limit_key,
+        is_authenticated=identity.is_authenticated,
+      )
+      logger.info("📤 [Stream Done] User=%s", user_id)
+    except Exception as e:
+      logger.error("Error in stream: %s", e, exc_info=True)
+      yield f'data: {{"error": "{str(e)}"}}\n\n'
+  return StreamingResponse(
+    sse_generator(),
+    media_type="text/event-stream",
+    headers={
+      "Cache-Control": "no-cache",
+      "Connection": "keep-alive",
+      "X-Accel-Buffering": "no",
+    },
+  )
--- a/backend/api/chatbot/history_route.py
+++ b/backend/api/chatbot/history_route.py
@@ -30,6 +30,14 @@ class ClearHistoryResponse(BaseModel):
  message: str
+@router.get("/api/history/me", summary="Get My Chat History", response_model=ChatHistoryResponse)
+async def get_my_chat_history(request: Request, limit: int | None = 50, before_id: int | None = None):
+  """
+  Shortcut: lấy lịch sử chat của chính mình (identity từ middleware).
+  """
+  return await get_chat_history(request, identity_key="me", limit=limit, before_id=before_id)
 @router.get("/api/history/{identity_key}", summary="Get Chat History", response_model=ChatHistoryResponse)
 async def get_chat_history(request: Request, identity_key: str, limit: int | None = 50, before_id: int | None = None):
  """
@@ -59,6 +67,30 @@ async def get_chat_history(request: Request, identity_key: str, limit: int | Non
    raise HTTPException(status_code=500, detail="Failed to fetch chat history")
+@router.delete("/api/history/me", summary="Clear My Chat History", response_model=ClearHistoryResponse)
+async def clear_my_chat_history(request: Request):
+  """
+  Shortcut: xóa lịch sử chat của chính mình (identity từ middleware).
+  """
+  try:
+    user_id = getattr(request.state, "user_id", None)
+    device_id = getattr(request.state, "device_id", "") or ""
+    is_authenticated = bool(getattr(request.state, "is_authenticated", False))
+    if is_authenticated and user_id:
+      resolved_key = str(user_id)
+    else:
+      resolved_key = device_id or "anonymous"
+    manager = await get_conversation_manager()
+    await manager.clear_history(resolved_key)
+    logger.info("✅ Cleared chat history for %s", resolved_key)
+    return {"success": True, "message": f"Đã xóa lịch sử chat"}
+  except Exception as e:
+    logger.error("Error clearing chat history: %s", e, exc_info=True)
+    raise HTTPException(status_code=500, detail="Failed to clear chat history")
 @router.delete("/api/history/{identity_key}", summary="Clear Chat History", response_model=ClearHistoryResponse)
 async def clear_chat_history(identity_key: str):
  """

--- a/backend/api/memos/memo_routes.py
+++ b/backend/api/memos/memo_routes.py
@@ -55,12 +55,25 @@ async def list_memos(
            filter_str=filter,
        )
+        # Parse creator_id from filter string (e.g. "creator_id == user_xxx")
+        creator_id = None
        if filter:
            logger.debug("List memos GET with filter=%r", filter)
+            pattern_creator = r"creator_id\s*==\s*([a-zA-Z0-9_\-\.]+)"
+            match_creator = re.search(pattern_creator, filter)
+            if match_creator:
+                creator_id = match_creator.group(1)
+        # Parse pinned from filter string (e.g. "... && pinned")
+        pinned = None
+        if filter and re.search(r'\bpinned\b(?!\s*==\s*false)', filter):
+            pinned = True
        return await memo_service.list_memos(
            user_id=user_id, 
+            creator_id=creator_id,
            tag=tag,
+            pinned=pinned,
            row_status=row_status,
            start_date=dt_start,
            end_date=dt_end
@@ -118,10 +131,18 @@ async def create_memo_or_list_memos(
            if match_creator:
                creator_id = match_creator.group(1)
+        # Parse pinned filter — frontend sends "pinned" in filter string for bookmarks
+        pinned = None
+        if raw_filter and isinstance(raw_filter, str):
+            # Match standalone "pinned" (not "pinned == false")
+            if re.search(r'\bpinned\b(?!\s*==\s*false)', raw_filter):
+                pinned = True
        return await memo_service.list_memos(
            user_id=user_id, 
            creator_id=creator_id,
            tag=tag,
+            pinned=pinned,
            start_date=start_date,
            end_date=end_date
        )

--- a/backend/api/memos/shortcut_routes.py
+++ b/backend/api/memos/shortcut_routes.py
@@ -4,13 +4,12 @@ Shortcut service routes for Memos-style backend.
 from typing import List
-from fastapi import APIRouter, Body, Depends, HTTPException
+from fastapi import APIRouter, Depends, HTTPException
 from common.memos_core.schemas import (
    ShortcutCreate,
    ShortcutUpdate,
    ShortcutResponse,
-    ListShortcutsResponse,
 )
 from common.memos_core.services import get_shortcut_service
@@ -26,20 +25,6 @@ async def list_shortcuts(shortcut_service=Depends(get_shortcut_service)):
        raise HTTPException(status_code=500, detail=str(exc)) from exc
-@router.post("", summary="List shortcuts (Connect compatibility)", response_model=ListShortcutsResponse)
-async def list_shortcuts_connect_compat(
-    payload: dict = Body(default_factory=dict),  # noqa: B008
-    shortcut_service=Depends(get_shortcut_service),
-):
-    # Connect RPC ListShortcuts is proxied as POST /api/v1/shortcuts in dev.
-    # Ignore payload (parent, pagination...) and return empty list for now.
-    try:
-        _ = payload
-        shortcuts = await shortcut_service.list_shortcuts()
-        return ListShortcutsResponse(shortcuts=shortcuts)
-    except Exception as exc:  # pragma: no cover
-        raise HTTPException(status_code=500, detail=str(exc)) from exc
 @router.post("", summary="Create shortcut", response_model=ShortcutResponse)
 async def create_shortcut(
@@ -54,7 +39,7 @@ async def create_shortcut(
 @router.patch("/{shortcut_id}", summary="Update shortcut", response_model=ShortcutResponse)
 async def update_shortcut(
-    shortcut_id: int,
+    shortcut_id: str,
    payload: ShortcutUpdate,
    shortcut_service=Depends(get_shortcut_service),
 ):
@@ -66,7 +51,7 @@ async def update_shortcut(
 @router.delete("/{shortcut_id}", summary="Delete shortcut")
 async def delete_shortcut(
-    shortcut_id: int,
+    shortcut_id: str,
    shortcut_service=Depends(get_shortcut_service),
 ):
    try:

--- a/backend/api/test_chat_route.py
+++ b/backend/api/test_chat_route.py
+"""
+Test Chat Route - NO AUTH REQUIRED
+------------------------------------
+`POST /api/test/chat` - endpoint thô để kiểm tra AI API key còn sống không.
+Không dùng graph, không dùng tools, không dùng history.
+Gọi thẳng OpenAI / Gemini / Groq tùy DEFAULT_MODEL.
+Dùng để debug: nếu endpoint này work → vấn đề ở auth/middleware.
+                nếu không work     → vấn đề API key hoặc model.
+"""
+import logging
+import time
+from fastapi import APIRouter
+from pydantic import BaseModel
+from config import DEFAULT_MODEL, GOOGLE_API_KEY, GROQ_API_KEY, OPENAI_API_KEY
+logger = logging.getLogger(__name__)
+router = APIRouter(tags=["test"])
+class TestChatRequest(BaseModel):
+    message: str
+    model: str | None = None  # override DEFAULT_MODEL nếu muốn
+class TestChatResponse(BaseModel):
+    status: str
+    model_used: str
+    response: str
+    latency_ms: int
+@router.post(
+    "/api/test/chat",
+    response_model=TestChatResponse,
+    summary="[NO AUTH] Test AI API key trực tiếp",
+    description=(
+        "Endpoint debug - gọi thẳng LLM không qua graph/tools/auth. "
+        "Dùng để kiểm tra API key còn sống và model đang hoạt động."
+    ),
+)
+async def test_chat(req: TestChatRequest) -> TestChatResponse:
+    model_name = req.model or DEFAULT_MODEL
+    logger.info("[TEST CHAT] model=%s | msg=%s", model_name, req.message)
+    t0 = time.monotonic()
+    try:
+        response_text = await _call_llm(model_name=model_name, message=req.message)
+    except Exception as exc:
+        logger.error("[TEST CHAT] LLM error: %s", exc, exc_info=True)
+        return TestChatResponse(
+            status="error",
+            model_used=model_name,
+            response=f"LỖI: {type(exc).__name__}: {exc}",
+            latency_ms=int((time.monotonic() - t0) * 1000),
+        )
+    latency = int((time.monotonic() - t0) * 1000)
+    logger.info("[TEST CHAT] OK latency=%dms", latency)
+    return TestChatResponse(
+        status="ok",
+        model_used=model_name,
+        response=response_text,
+        latency_ms=latency,
+    )
+async def _call_llm(model_name: str, message: str) -> str:
+    """Gọi LLM tương ứng với model_name, trả về string response."""
+    # ── Gemini / Google ──────────────────────────────────────────────────────
+    if model_name.startswith("gemini"):
+        from langchain_google_genai import ChatGoogleGenerativeAI
+        llm = ChatGoogleGenerativeAI(model=model_name, google_api_key=GOOGLE_API_KEY)
+        result = await llm.ainvoke(message)
+        return result.content
+    # ── Groq ─────────────────────────────────────────────────────────────────
+    if model_name.startswith("llama") or model_name.startswith("mixtral") or "groq" in model_name:
+        from langchain_groq import ChatGroq
+        llm = ChatGroq(model=model_name, groq_api_key=GROQ_API_KEY)
+        result = await llm.ainvoke(message)
+        return result.content
+    # ── OpenAI (default) ─────────────────────────────────────────────────────
+    from langchain_openai import ChatOpenAI
+    llm = ChatOpenAI(model=model_name, openai_api_key=OPENAI_API_KEY)
+    result = await llm.ainvoke(message)
+    return result.content
--- a/backend/common/clerk_auth.py
+++ b/backend/common/clerk_auth.py
@@ -35,13 +35,13 @@ def verify_clerk_jwt(token: str) -> dict[str, Any]:
    signing_key = _jwks_client().get_signing_key_from_jwt(token).key
    # Clerk tokens are typically RS256.
-    # leeway=60 tolerates up to 60s clock skew between Clerk server and this machine
+    # leeway=300 tolerates up to 5min clock skew (VPS clock not NTP-synced)
    payload = jwt.decode(
        token,
        signing_key,
        algorithms=["RS256"],
        issuer=CLERK_ISSUER,
-        leeway=60,
+        leeway=300,
        options={
            "verify_aud": False,  # allow multiple audiences in dev
        },

--- a/backend/common/conversation_manager.py
+++ b/backend/common/conversation_manager.py
--- a/backend/common/embedding_service.py
+++ b/backend/common/embedding_service.py
@@ -40,9 +40,9 @@ class EmbeddingClientManager:
        # If using default key, cache the client
        if not api_key:
-        if self._client is None:
+            if self._client is None:
                self._client = OpenAI(api_key=key)
-        return self._client
+            return self._client
        # For custom keys, create new client (not cached)
        return OpenAI(api_key=key)
@@ -65,9 +65,9 @@ class EmbeddingClientManager:
        # If using default key, cache the default client
        if not api_key:
-        if self._async_client is None:
+            if self._async_client is None:
                self._async_client = AsyncOpenAI(api_key=key)
-        return self._async_client
+            return self._async_client
        # For user-specific keys, cache per user_id
        if user_id and user_id in self._user_clients:
@@ -79,7 +79,7 @@ class EmbeddingClientManager:
        return client
-logger = logging.getLogger(__name__)
 # NOTE:
 # - TẠM THỜI KHÔNG DÙNG REDIS CACHE CHO EMBEDDING để tránh phụ thuộc Redis/aioredis.

--- a/backend/common/encryption.py
+++ b/backend/common/encryption.py
@@ -17,60 +17,85 @@ logger = logging.getLogger(__name__)
 ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY")
 # Fallback: Generate key from a password (NOT recommended for production)
-# This is only for development/testing
 FALLBACK_PASSWORD = os.getenv("ENCRYPTION_PASSWORD", "default-dev-password-change-in-production")
+# Cached Fernet instance (singleton) — avoids re-running PBKDF2 100K iterations per call
+_fernet_instance: Fernet | None = None
-def _get_fernet_key() -> bytes:
+def _derive_key_from_password(password: str, salt: bytes) -> bytes:
+    """Derive a Fernet key from password + salt using PBKDF2."""
+    kdf = PBKDF2HMAC(
+        algorithm=hashes.SHA256(),
+        length=32,
+        salt=salt,
+        iterations=100000,
+    )
+    return base64.urlsafe_b64encode(kdf.derive(password.encode()))
+def _get_fernet() -> Fernet:
    """
-    Get or generate Fernet encryption key.
+    Get cached Fernet instance.
-    Priority: ENCRYPTION_KEY env var > generated from password (dev only)
+    Priority: ENCRYPTION_KEY env var > password-derived key (dev only).
+    When using ENCRYPTION_KEY, salt is not needed (key is used directly).
    """
+    global _fernet_instance
+    if _fernet_instance is not None:
+        return _fernet_instance
    if ENCRYPTION_KEY:
        try:
-            # Try to use as-is (should be base64-encoded 32-byte key)
+            _fernet_instance = Fernet(ENCRYPTION_KEY.encode())
-            return ENCRYPTION_KEY.encode()
+            return _fernet_instance
        except Exception:
-            # If not valid, try to decode as base64
            try:
-                return base64.urlsafe_b64decode(ENCRYPTION_KEY)
+                _fernet_instance = Fernet(base64.urlsafe_b64decode(ENCRYPTION_KEY))
+                return _fernet_instance
            except Exception:
                logger.warning("Invalid ENCRYPTION_KEY format, using fallback")
-    # Fallback: Generate from password (dev only - NOT secure for production)
+    # Fallback: Generate from password with random salt (dev only - NOT secure for production)
    logger.warning(
        "⚠️  ENCRYPTION_KEY not set. Using password-based key derivation (NOT secure for production!)"
    )
-    salt = b"cucu_note_salt"  # Fixed salt for dev (should be random in production)
+    # Use random salt — stored in a file so decryption works across restarts
-    kdf = PBKDF2HMAC(
+    salt_file = os.path.join(os.path.dirname(__file__), "..", "data", ".encryption_salt")
-        algorithm=hashes.SHA256(),
+    os.makedirs(os.path.dirname(salt_file), exist_ok=True)
-        length=32,
-        salt=salt,
+    if os.path.exists(salt_file):
-        iterations=100000,
+        with open(salt_file, "rb") as f:
-    )
+            salt = f.read()
-    key = base64.urlsafe_b64encode(kdf.derive(FALLBACK_PASSWORD.encode()))
+    else:
-    return key
+        salt = os.urandom(16)
+        with open(salt_file, "wb") as f:
+            f.write(salt)
+        logger.info("Generated new encryption salt (saved to %s)", salt_file)
+    key = _derive_key_from_password(FALLBACK_PASSWORD, salt)
+    _fernet_instance = Fernet(key)
+    return _fernet_instance
 def encrypt_api_key(api_key: str) -> str:
    """
    Encrypt an API key using Fernet symmetric encryption.
    Args:
        api_key: Plain text API key to encrypt
    Returns:
        Encrypted API key as base64 string
    Raises:
        ValueError: If api_key is empty
        RuntimeError: If encryption fails
    """
    if not api_key or not api_key.strip():
        raise ValueError("API key cannot be empty")
    try:
-        fernet = Fernet(_get_fernet_key())
+        fernet = _get_fernet()
        encrypted = fernet.encrypt(api_key.encode())
        return encrypted.decode()
    except Exception as e:
@@ -81,22 +106,22 @@ def encrypt_api_key(api_key: str) -> str:
 def decrypt_api_key(encrypted_key: str) -> str:
    """
    Decrypt an encrypted API key.
    Args:
        encrypted_key: Encrypted API key (base64 string)
    Returns:
        Decrypted plain text API key
    Raises:
        ValueError: If encrypted_key is empty
        RuntimeError: If decryption fails (wrong key, corrupted data, etc.)
    """
    if not encrypted_key or not encrypted_key.strip():
        raise ValueError("Encrypted key cannot be empty")
    try:
-        fernet = Fernet(_get_fernet_key())
+        fernet = _get_fernet()
        decrypted = fernet.decrypt(encrypted_key.encode())
        return decrypted.decode()
    except Exception as e:
@@ -107,39 +132,38 @@ def decrypt_api_key(encrypted_key: str) -> str:
 def mask_api_key(api_key: str) -> str:
    """
    Mask an API key for display (show only first 7 chars and last 4 chars).
    Args:
        api_key: API key to mask
    Returns:
        Masked API key (e.g., "sk-...xxxx")
    """
    if not api_key or len(api_key) < 11:
        return "sk-...xxxx"
    return f"{api_key[:7]}...{api_key[-4:]}"
 def validate_openai_key_format(api_key: str) -> bool:
    """
    Validate OpenAI API key format.
    Args:
        api_key: API key to validate
    Returns:
        True if format is valid, False otherwise
    """
    if not api_key or not api_key.strip():
        return False
    # OpenAI keys typically start with "sk-" and are ~51 characters
    key = api_key.strip()
    if not key.startswith("sk-"):
        return False
    if len(key) < 20 or len(key) > 100:  # Reasonable range
        return False
-    return True
+    return True
--- a/backend/common/llm_factory.py
+++ b/backend/common/llm_factory.py
@@ -88,7 +88,7 @@ class LLMFactory:
            "streaming": streaming,
            "api_key": key,
            "temperature": 0,
-            "max_tokens": 1000,
+            "max_tokens": 4096,
        }
        # Nếu bật json_mode, tiêm trực tiếp vào constructor
@@ -100,14 +100,7 @@ class LLMFactory:
        logger.info(f"✅ Created OpenAI: {model_name}")
        return llm
-    def _enable_json_mode(self, llm: BaseChatModel, model_name: str) -> BaseChatModel:
-        """Enable JSON mode for the LLM."""
-        try:
-            llm = llm.bind(response_format={"type": "json_object"})
-            logger.debug(f"⚙️ JSON mode enabled for {model_name}")
-        except Exception as e:
-            logger.warning(f"⚠️ JSON mode not supported: {e}")
-        return llm
    def initialize(self, skip_warmup: bool = True) -> None:
        """

--- a/backend/common/memos_core/query_parser.py
+++ b/backend/common/memos_core/query_parser.py
@@ -48,23 +48,30 @@ def parse_date_range(
        if match_ts:
            try:
+                from common.timezone_config import VIETNAM_TZ
                ts_start = float(match_ts.group(1))
                ts_end = float(match_ts.group(2))
-                dt_start = datetime.fromtimestamp(ts_start, tz=timezone.utc)
+                # Frontend sends UTC midnight timestamps, but user is in Vietnam (UTC+7).
-                dt_end = datetime.fromtimestamp(ts_end, tz=timezone.utc)
+                # Shift by -7h so "Feb 5" means Feb 5 00:00 VN (= Feb 4 17:00 UTC)
+                # instead of Feb 5 00:00 UTC (= Feb 5 07:00 VN).
+                utc_offset_seconds = VIETNAM_TZ.utcoffset(None).total_seconds()
+                dt_start = datetime.fromtimestamp(ts_start - utc_offset_seconds, tz=timezone.utc)
+                dt_end = datetime.fromtimestamp(ts_end - utc_offset_seconds, tz=timezone.utc)
            except (ValueError, TypeError):
                pass
        else:
            # Case 2: DisplayTime filter (displayTime:YYYY-MM-DD)
+            # Dates are interpreted as Vietnam time (UTC+7), then converted to UTC for MongoDB
            pattern_dt = r"displayTime:(\d{4}-\d{2}-\d{2})"
            match_dt = re.search(pattern_dt, filter_str)
            if match_dt:
                try:
+                    from common.timezone_config import VIETNAM_TZ
                    date_str = match_dt.group(1)
-                    dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
+                    dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=VIETNAM_TZ)
-                    dt_start = dt
+                    dt_start = dt.astimezone(timezone.utc)
-                    dt_end = dt + timedelta(days=1)
+                    dt_end = (dt + timedelta(days=1)).astimezone(timezone.utc)
                except ValueError:
                    pass

--- a/backend/common/memos_core/schemas.py
+++ b/backend/common/memos_core/schemas.py
@@ -191,7 +191,7 @@ class ShortcutUpdate(BaseModel):
 class ShortcutResponse(BaseModel):
-  id: int
+  id: str
  name: str
  filter: str

--- a/backend/common/memos_core/services.py
+++ b/backend/common/memos_core/services.py
@@ -810,21 +810,74 @@ class ReactionService:
 class ShortcutService:
+    """Shortcut (Workspace) service with MongoDB backend."""
    async def list_shortcuts(self) -> List[schemas.ShortcutResponse]:
-        return []
+        cursor = mongodb_client.shortcuts.find({}).sort("created_at", -1)
+        docs = await cursor.to_list(length=100)
+        return [
+            schemas.ShortcutResponse(
+                id=str(doc["_id"]),
+                name=doc.get("name", ""),
+                filter=doc.get("filter", ""),
+            )
+            for doc in docs
+        ]
    async def create_shortcut(self, payload: schemas.ShortcutCreate) -> schemas.ShortcutResponse:
-        return schemas.ShortcutResponse(id=1, name=payload.name, filter=payload.filter)
+        now = utc_now()
+        doc = {
+            "name": payload.name,
+            "filter": payload.filter,
+            "created_at": now,
+            "updated_at": now,
+        }
+        result = await mongodb_client.shortcuts.insert_one(doc)
+        doc["_id"] = result.inserted_id
+        return schemas.ShortcutResponse(
+            id=str(doc["_id"]),
+            name=doc["name"],
+            filter=doc["filter"],
+        )
+    async def update_shortcut(self, shortcut_id: str, payload: schemas.ShortcutUpdate) -> schemas.ShortcutResponse:
+        update_fields: dict[str, Any] = {"updated_at": utc_now()}
+        if payload.name is not None:
+            update_fields["name"] = payload.name
+        if payload.filter is not None:
+            update_fields["filter"] = payload.filter
+        filter_query: dict[str, Any] = {}
+        if ObjectId.is_valid(shortcut_id):
+            filter_query["_id"] = ObjectId(shortcut_id)
+        else:
+            filter_query["_id"] = shortcut_id
+        from pymongo import ReturnDocument
+        result = await mongodb_client.shortcuts.find_one_and_update(
+            filter_query,
+            {"$set": update_fields},
+            return_document=ReturnDocument.AFTER,
+        )
+        if not result:
+            raise ValueError(f"Shortcut {shortcut_id} not found")
-    async def update_shortcut(self, shortcut_id: int, payload: schemas.ShortcutUpdate) -> schemas.ShortcutResponse:
        return schemas.ShortcutResponse(
-            id=shortcut_id,
+            id=str(result["_id"]),
-            name=payload.name or "demo",
+            name=result.get("name", ""),
-            filter=payload.filter or "",
+            filter=result.get("filter", ""),
        )
-    async def delete_shortcut(self, shortcut_id: int) -> None:
+    async def delete_shortcut(self, shortcut_id: str) -> None:
-        return None
+        filter_query: dict[str, Any] = {}
+        if ObjectId.is_valid(shortcut_id):
+            filter_query["_id"] = ObjectId(shortcut_id)
+        else:
+            filter_query["_id"] = shortcut_id
+        result = await mongodb_client.shortcuts.delete_one(filter_query)
+        if result.deleted_count == 0:
+            raise ValueError(f"Shortcut {shortcut_id} not found")
 class ActivityService:

--- a/backend/common/middleware.py
+++ b/backend/common/middleware.py
@@ -37,6 +37,7 @@ PUBLIC_PATHS = {
 PUBLIC_PATH_PREFIXES = [
    "/static",
    "/mock",
+    "/api/test",  # debug/test endpoints - no auth required
 ]

--- a/backend/common/mongo_client.py
+++ b/backend/common/mongo_client.py
@@ -29,6 +29,7 @@ COLLECTION_REACTIONS = "cuccu_reactions"
 COLLECTION_MEMO_EMBEDDINGS = "cuccu_memo_embeddings"
 COLLECTION_INBOX = "cuccu_inbox"
 COLLECTION_USER_SETTINGS = "cuccu_user_settings"
+COLLECTION_SHORTCUTS = "cuccu_shortcuts"
 class MongoDBClient:
@@ -116,6 +117,10 @@ class MongoDBClient:
    def user_settings(self):
        return self.db[COLLECTION_USER_SETTINGS]
+    @property
+    def shortcuts(self):
+        return self.db[COLLECTION_SHORTCUTS]
 # Singleton instance
 mongodb_client = MongoDBClient()
@@ -206,6 +211,9 @@ async def create_indexes():
        # ====================== MEMO VERSIONS ======================
        await db["cuccu_memo_versions"].create_index([("memo_id", 1), ("version_index", -1)])
+        # ====================== SHORTCUTS ======================
+        await db[COLLECTION_SHORTCUTS].create_index([("creator_id", 1)])
        logger.info("✅ Database indexes created successfully (Production-ready)")
    except Exception as e:
        logger.warning(f"⚠️  Error creating indexes (may already exist): {e}")

--- a/backend/common/rate_limit.py
+++ b/backend/common/rate_limit.py
@@ -76,7 +76,7 @@ class RateLimitService:
            logger.info(f"Using Redis for rate limiting: {redis_host}:{redis_port}/{redis_db}")
        else:
            # Fallback to memory (not suitable for production with multiple instances)
-        self.storage_uri = os.getenv("RATE_STORAGE_URI", "memory://")
+            self.storage_uri = os.getenv("RATE_STORAGE_URI", "memory://")
            logger.warning("⚠️  Using in-memory rate limiting (not suitable for production with multiple instances)")
        self.default_limits = ["100/hour", "30/minute"]

--- a/backend/common/timezone_config.py
+++ b/backend/common/timezone_config.py
+"""
+Timezone configuration for CuCu Note.
+All user-facing date operations should use VIETNAM_TZ.
+MongoDB stores in UTC — convert at query boundaries.
+"""
+from datetime import timezone, timedelta
+VIETNAM_TZ = timezone(timedelta(hours=7))
--- a/backend/config.py
+++ b/backend/config.py
@@ -126,8 +126,6 @@ CONV_SUPABASE_KEY: str | None = os.getenv("CONV_SUPABASE_KEY")
 # ====================== REDIS CONFIGURATION ======================
 REDIS_HOST: str | None = os.getenv("REDIS_HOST")
 REDIS_PORT: int = int(os.getenv("REDIS_PORT", "6379"))
-REDIS_PASSWORD: str | None = os.getenv("REDIS_PASSWORD")
-REDIS_USERNAME: str | None = os.getenv("REDIS_USERNAME")
 # ====================== AI API KEYS & MODELS ======================
 OPENAI_API_KEY: str | None = os.getenv("OPENAI_API_KEY")
@@ -172,7 +170,7 @@ REDIS_CACHE_URL: str | None = os.getenv("REDIS_CACHE_URL", "redis-14473.c93.us-e
 REDIS_CACHE_PORT: int = int(os.getenv("REDIS_CACHE_PORT", "14473"))
 REDIS_CACHE_DB: int = int(os.getenv("REDIS_CACHE_DB", "0"))
 REDIS_CACHE_TURN_ON: bool = os.getenv("REDIS_CACHE_TURN_ON", "true").lower() == "true"
-REDIS_PASSWORD: str | None = os.getenv("REDIS_CACHE_PASSWORD", "4kCCXXaJXXv7k358eG69p1lDBQtHTbQ1")
+REDIS_PASSWORD: str | None = os.getenv("REDIS_CACHE_PASSWORD")
 REDIS_USERNAME: str = os.getenv("REDIS_CACHE_USERNAME", "default")
 CONV_DATABASE_URL: str | None = os.getenv("CONV_DATABASE_URL")
@@ -183,8 +181,8 @@ MONGODB_DB_NAME: str | None = os.getenv("MONGODB_DB_NAME", "cucu_note")
 USE_MONGO_CONVERSATION: bool = os.getenv("USE_MONGO_CONVERSATION", "true").lower() == "true"
 # MongoDB Connection Pooling
-MONGODB_MAX_POOL_SIZE: int = int(os.getenv("MONGODB_MAX_POOL_SIZE", "50"))
+MONGODB_MAX_POOL_SIZE: int = int(os.getenv("MONGODB_MAX_POOL_SIZE", "5"))
-MONGODB_MIN_POOL_SIZE: int = int(os.getenv("MONGODB_MIN_POOL_SIZE", "10"))
+MONGODB_MIN_POOL_SIZE: int = int(os.getenv("MONGODB_MIN_POOL_SIZE", "1"))
 MONGODB_MAX_IDLE_TIME_MS: int = int(os.getenv("MONGODB_MAX_IDLE_TIME_MS", "45000"))
 # ====================== CANIFA INTERNAL POSTGRES ======================

--- a/backend/entrypoint.sh
+++ b/backend/entrypoint.sh
@@ -43,18 +43,11 @@ asyncio.run(setup())
 " || echo "⚠️  Could not set up indexes (will retry on first request)"
 # Start the server
-echo "🌟 Starting Gunicorn server..."
+echo "🌟 Starting Uvicorn server (hot reload enabled)..."
-# Allow overriding number of workers via env, default to 1 for simplicity
+exec uvicorn server:app \
-WORKERS="${GUNICORN_WORKERS:-1}"
+    --host 0.0.0.0 \
-echo "🔧 Using Gunicorn workers: $WORKERS"
+    --port 5000 \
+    --reload \
-exec gunicorn \
+    --reload-dir /app \
-    --workers "$WORKERS" \
+    --log-level info
-    --worker-class uvicorn.workers.UvicornWorker \
-    --bind 0.0.0.0:5000 \
-    --timeout 120 \
-    --access-logfile - \
-    --error-logfile - \
-    --log-level info \
-    server:app
--- a/backend/requirements.txt
+++ b/backend/requirements.txt
 # Core FastAPI
-fastapi==0.124.4
+fastapi==0.124.4a
 uvicorn==0.38.0
+uvloop>=0.21.0
 starlette==0.50.0
 pydantic==2.12.5
 pydantic_core==2.41.5
@@ -51,13 +52,10 @@ google-auth==2.45.0
 # Tokenization
 tiktoken==0.12.0
-# Observability
+# Observability (minimal - only trace API used)
 opentelemetry-api==1.39.1
-opentelemetry-exporter-otlp-proto-common==1.39.1
-opentelemetry-exporter-otlp-proto-http==1.39.1
-opentelemetry-proto==1.39.1
 opentelemetry-sdk==1.39.1
-opentelemetry-semantic-conventions==0.60b1
+# Removed: otel exporters/proto/semantic-conventions (not configured)
 # Utilities
 python-dotenv==1.2.1
@@ -71,7 +69,7 @@ tenacity==9.1.2
 backoff==2.2.1
 regex==2025.11.3
 Unidecode==1.4.0
-pillow==12.0.0
+# pillow==12.0.0  # Removed: not directly imported
 # WebSocket
 websockets==15.0.1
@@ -104,4 +102,5 @@ cachetools==6.2.4
 pytest==9.0.2
 # Production server
-gunicorn==23.0.0
+# gunicorn==23.0.0  # Removed: using uvicorn instead
+aiosqlite
--- a/backend/server.py
+++ b/backend/server.py
@@ -2,6 +2,7 @@ import asyncio
 import os
 import platform
 import logging
+from contextlib import asynccontextmanager
 import uvicorn
 from fastapi import FastAPI
@@ -10,6 +11,7 @@ from fastapi.responses import RedirectResponse
 from api.chatbot import router as chatbot_router
 from api.memos import router as memos_router
+from api.test_chat_route import router as test_router
 from common.cache import redis_cache
 from common.langfuse_client import get_langfuse_client
 from common.middleware import middleware_manager
@@ -29,54 +31,44 @@ logging.basicConfig(
 )
 logger = logging.getLogger(__name__)
-# Langfuse client initialized in startup_event (not at import time)
-app = FastAPI(
-    title="Contract AI Service",
-    description="API for Contract AI Service",
-    version="1.0.0",
-)
 # =============================================================================
-# STARTUP EVENT - Initialize Redis Cache + MongoDB
+# LIFESPAN - Initialize & cleanup resources (replaces deprecated on_event)
 # =============================================================================
-@app.on_event("startup")
+@asynccontextmanager
-async def startup_event():
+async def lifespan(app: FastAPI):
-    """Initialize dependencies on startup."""
+    """Startup & shutdown lifecycle manager."""
+    # --- STARTUP ---
    # Initialize Redis (optional - will continue without cache if unavailable)
    redis_client = await redis_cache.initialize()
    if redis_client:
        logger.info("✅ Redis cache initialized for message limit")
    else:
        logger.info("⚠️  Redis cache unavailable - continuing without cache")
    # MongoDB initialization (required)
    from common.mongo_client import init_mongodb
    await init_mongodb()
    logger.info("✅ MongoDB connection initialized")
-    # Langfuse initialization (optional - lazy loaded, just triggers auth check)
+    # Langfuse initialization (optional)
    langfuse_client = get_langfuse_client()
    if langfuse_client:
        logger.info("✅ Langfuse client ready")
    else:
        logger.warning("⚠️  Langfuse client not available (missing keys or disabled)")
+    yield  # App is running
-@app.on_event("shutdown")
+    # --- SHUTDOWN ---
-async def shutdown_event():
-    """Cleanup on shutdown."""
    try:
-        # Close Redis connection if exists
        redis_client = redis_cache.get_client()
        if redis_client:
            await redis_client.aclose()
            logger.info("Redis connection closed")
    except Exception as e:
        logger.debug(f"Error closing Redis: {e}")
-    # Close MongoDB connection
    try:
        from common.mongo_client import close_mongodb
        await close_mongodb()
@@ -85,6 +77,14 @@ async def shutdown_event():
        logger.debug(f"Error closing MongoDB: {e}")
+app = FastAPI(
+    title="Contract AI Service",
+    description="API for Contract AI Service",
+    version="1.0.0",
+    lifespan=lifespan,
+)
 # =============================================================================
 # MIDDLEWARE SETUP - Gom Auth + RateLimit + CORS vào một chỗ
 # =============================================================================
@@ -96,6 +96,7 @@ middleware_manager.setup(
    cors_origins=CORS_ORIGINS,  # từ environment variable
 )
+app.include_router(test_router)   # No-auth test endpoints
 app.include_router(chatbot_router)
 app.include_router(memos_router)

--- a/docker-compose.yml
+++ b/docker-compose.yml
@@ -6,6 +6,10 @@ services:
    build:
      context: ./backend
      dockerfile: Dockerfile.prod
+      cache_from:
+        - type=local,src=.docker/cache/backend
+      cache_to:
+        - type=local,dest=.docker/cache/backend
    container_name: cuccu_backend
    restart: unless-stopped
    ports:
@@ -30,37 +34,38 @@ services:
    deploy:
      resources:
        limits:
-          memory: 2G
+          memory: 512M
          cpus: '1.0'
        reservations:
-          memory: 512M
+          memory: 128M
          cpus: '0.5'
-  # Frontend
+  # Frontend (Production build with nginx - saves ~190MB RAM)
  frontend:
    build:
      context: ./frontend
      dockerfile: Dockerfile.prod
      args:
-        # Build-time envs for Vite
+        VITE_API_BASE_URL: ${VITE_API_BASE_URL:-http://localhost:5000}
-        # Browser (người dùng) gọi trực tiếp vào host, nên dùng localhost:5000
+        VITE_CLERK_PUBLISHABLE_KEY: ${VITE_CLERK_PUBLISHABLE_KEY:-}
-        VITE_API_BASE_URL: "http://localhost:5000"
-        VITE_CLERK_PUBLISHABLE_KEY: ${VITE_CLERK_PUBLISHABLE_KEY}
    container_name: cuccu_frontend
    restart: unless-stopped
    ports:
-      - "3001:80"
+      - "3001:80"   
-    env_file:
-      - ./frontend/.env
    depends_on:
      - backend
    networks:
      - cuccu_network
    healthcheck:
-      test: ["CMD", "curl", "-f", "http://localhost:80"]
+      test: ["CMD", "wget", "-q", "--spider", "http://localhost:80"]
      interval: 30s
      timeout: 10s
      retries: 3
+      start_period: 30s
+    deploy:
+      resources:
+        limits:
+          memory: 32M
 volumes:
  backend_data:

--- a/frontend/.env
+++ b/frontend/.env
@@ -2,4 +2,5 @@
 VITE_CLERK_PUBLISHABLE_KEY=pk_test_Y29tbXVuYWwtc3VuYmVhbS0wLmNsZXJrLmFjY291bnRzLmRldiQ
 # ====================== API URL ======================
-VITE_API_URL=http://localhost:8080
+# Dev mode: point directly to backend (no nginx proxy)
\ No newline at end of file
+VITE_API_BASE_URL=http://160.191.50.138:5000
\ No newline at end of file
--- a/frontend/Dockerfile.dev
+++ b/frontend/Dockerfile.dev
-FROM node:18-alpine
+FROM node:22-alpine
 WORKDIR /app

--- a/frontend/Dockerfile.prod
+++ b/frontend/Dockerfile.prod
 # Multi-stage build for production
-FROM node:18-alpine AS builder
+FROM node:22-alpine AS builder
 WORKDIR /app

--- a/frontend/nginx.conf
+++ b/frontend/nginx.conf
@@ -15,6 +15,17 @@ server {
    add_header X-Content-Type-Options "nosniff" always;
    add_header X-XSS-Protection "1; mode=block" always;
+    # Proxy /api requests to backend container
+    location /api/ {
+        proxy_pass http://cuccu_backend:5000;
+        proxy_http_version 1.1;
+        proxy_set_header Host $host;
+        proxy_set_header X-Real-IP $remote_addr;
+        proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+        proxy_set_header X-Forwarded-Proto $scheme;
+        proxy_read_timeout 120s;
+    }
    # SPA routing - serve index.html for all routes
    location / {
        try_files $uri $uri/ /index.html;

--- a/frontend/src/components/ChatbotPanel.tsx
+++ b/frontend/src/components/ChatbotPanel.tsx
--- a/frontend/src/components/ChatbotWidget.tsx
+++ b/frontend/src/components/ChatbotWidget.tsx
-import { MessageCircleIcon, XIcon, Maximize2Icon, Minimize2Icon } from "lucide-react";
+import { MessageCircleIcon, XIcon, Maximize2Icon, Minimize2Icon, Trash2Icon } from "lucide-react";
 import { useState, useRef, useCallback, useEffect } from "react";
 import { cn } from "@/lib/utils";
-import ChatbotPanel from "./ChatbotPanel";
+import ChatbotPanel, { type ChatbotPanelHandle } from "./ChatbotPanel";
 type Position = {
  x: number;
@@ -44,6 +44,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
  const hasDraggedRef = useRef(false);
  const dragRef = useRef<{ startX: number; startY: number; startPosX: number; startPosY: number } | null>(null);
  const animationFrameRef = useRef<number | null>(null);
+  const chatbotPanelRef = useRef<ChatbotPanelHandle>(null);
  // Save position to localStorage when it changes
  useEffect(() => {
@@ -112,11 +113,16 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
  }, []);
  const handleMouseUp = useCallback(() => {
+    setIsDragging(false);
+    dragRef.current = null;
+  }, []);
+  // Toggle via onClick — only if user didn't drag
+  const handleClick = useCallback((e: React.MouseEvent) => {
+    e.stopPropagation();
    if (!hasDraggedRef.current) {
      setIsOpen((prev) => !prev);
    }
-    setIsDragging(false);
-    dragRef.current = null;
  }, []);
  useEffect(() => {
@@ -189,7 +195,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
  useEffect(() => {
    if (isDragging) {
      window.addEventListener("touchmove", handleTouchMove, { passive: true });
-      window.addEventListener("touchend", handleTouchEnd, { passive: true });
+      window.addEventListener("touchend", handleTouchEnd);
      return () => {
        window.removeEventListener("touchmove", handleTouchMove);
        window.removeEventListener("touchend", handleTouchEnd);
@@ -267,11 +273,21 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
            <div className="flex items-center gap-2">
              <span className="inline-flex size-2 rounded-full bg-emerald-500" />
              <div>
-                <p className="text-sm font-semibold leading-none">CiCi Assistant</p>
+                <p className="text-sm font-semibold leading-none">CuCu Assistant</p>
                <p className="text-xs text-muted-foreground">Notes chat</p>
              </div>
            </div>
            <div className="flex items-center gap-1">
+              {/* Nút Clear Messages */}
+              <button
+                type="button"
+                className="rounded-full p-1.5 text-muted-foreground transition-colors hover:bg-destructive/10 hover:text-destructive"
+                onClick={() => chatbotPanelRef.current?.clearMessages()}
+                aria-label="Clear messages"
+                title="Xóa lịch sử chat"
+              >
+                <Trash2Icon className="size-4" />
+              </button>
              {/* Nút Expand/Collapse */}
              <button
                type="button"
@@ -301,7 +317,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
          {/* Chat Content - không có header riêng nữa */}
          <div className="h-[calc(100%-52px)]">
-            <ChatbotPanel variant="widget" className="border-0 rounded-none" hideHeader />
+            <ChatbotPanel ref={chatbotPanelRef} variant="widget" className="border-0 rounded-none" hideHeader />
          </div>
        </div>
      )}
@@ -323,6 +339,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
            : "bg-primary text-primary-foreground",
        )}
        onMouseDown={handleMouseDown}
+        onClick={handleClick}
        onTouchStart={handleTouchStart}
        role="button"
        tabIndex={0}

--- a/frontend/src/components/CreateShortcutDialog.tsx
+++ b/frontend/src/components/CreateShortcutDialog.tsx
--- a/frontend/src/components/StatisticsView/MonthNavigator.tsx
+++ b/frontend/src/components/StatisticsView/MonthNavigator.tsx
@@ -8,9 +8,9 @@ import type { MonthNavigatorProps } from "@/types/statistics";
 export const MonthNavigator = ({ visibleMonth, onMonthChange, activityStats }: MonthNavigatorProps) => {
  const [isOpen, setIsOpen] = useState(false);
-  const currentMonth = new Date(visibleMonth);
  const currentYear = getYearFromDate(visibleMonth);
  const currentMonthNum = getMonthFromDate(visibleMonth);
+  const currentMonth = new Date(currentYear, currentMonthNum - 1, 1);
  const handlePrevMonth = () => {
    onMonthChange(addMonths(visibleMonth, -1));

--- a/frontend/src/pages/Home.tsx
+++ b/frontend/src/pages/Home.tsx
- import { useEffect } from "react";
+import { useEffect } from "react";
 import { useSearchParams } from "react-router-dom";
 import { MemoRenderContext } from "@/components/MasonryView";
 import MemoView from "@/components/MemoView";
@@ -53,7 +53,7 @@ const Home = () => {
        renderer={(memo: Memo, context?: MemoRenderContext) => (
          <MemoView key={`${memo.name}-${memo.displayTime}`} memo={memo} showVisibility showPinned compact={context?.compact} />
        )}
-        listSort={(memos) => memos.filter((m) => !m.pinned)} // Exclude pinned from regular list
+        listSort={listSort}
        orderBy={orderBy}
        filter={memoFilter}
      />

--- a/frontend/src/service/apiClient.ts
+++ b/frontend/src/service/apiClient.ts
@@ -2,9 +2,10 @@ import { redirectOnAuthFailure } from "@/utils/auth-redirect";
 import { getClerkSessionToken } from "@/utils/clerk";
 import type { RequestOptions } from "./types";
-// Call backend directly (bypass Vite proxy).
+// API origin - empty string = relative URLs (proxied via nginx in Docker).
-// Override via VITE_API_BASE_URL, e.g. "http://localhost:5000"
+// Override via VITE_API_BASE_URL, e.g. "http://localhost:5000" for local dev without Docker.
-export const API_ORIGIN = (import.meta.env.VITE_API_BASE_URL as string | undefined) || "http://localhost:5000";
+const _envOrigin = import.meta.env.VITE_API_BASE_URL as string | undefined;
+export const API_ORIGIN: string = (_envOrigin !== undefined && _envOrigin !== "") ? _envOrigin : "";
 export const API_BASE = `${API_ORIGIN}/api/v1`;
 const parseBody = async (response: Response): Promise<unknown> => {