Commit 2fec4891 authored by root's avatar root

Update backend agent, API routes, frontend components, docker config & .gitignore

parent fe844925
......@@ -69,3 +69,20 @@ run.txt
# Ralph config (may contain API key)
.cursor/ralph-config.json
# Docker cache
.docker/
# Environment
.env
# Frontend dist probes
frontend/dist-*/
# Temp configs
frontend/vite.probe*.config.mjs
frontend/vite.temp*.mjs
frontend/vite.trace*.config.mjs
frontend/vite.no-mermaid.config.mjs
frontend/src/__build_probe*.ts
frontend/src/__probe_*.ts
......@@ -3,6 +3,7 @@ Fashion Q&A Agent Controller
Langfuse will auto-trace via LangChain integration (no code changes needed).
"""
import json
import logging
import time
import uuid
......@@ -78,19 +79,31 @@ async def chat_controller(
memory = await get_conversation_manager()
# Load History
history_dicts = await memory.get_chat_history(effective_identity_key, limit=20)
history_messages: list[BaseMessage] = [
HumanMessage(content=m["message"]) if m["is_human"] else AIMessage(content=m["message"])
for m in history_dicts
]
history_dicts = await memory.get_chat_history(effective_identity_key, limit=5)
history_messages: list[BaseMessage] = []
for m in history_dicts:
if m["is_human"]:
history_messages.append(HumanMessage(content=m["message"]))
else:
# AI responses may be saved as JSON — extract readable text
ai_content = m["message"]
try:
parsed = json.loads(ai_content)
if isinstance(parsed, dict) and "ai_response" in parsed:
ai_content = parsed["ai_response"]
except (json.JSONDecodeError, TypeError):
pass
history_messages.append(AIMessage(content=ai_content))
# Prepare State
# - history: previous conversation messages (for context)
# - messages: starts with current query only (tool calls will be appended by LangGraph)
# - user_query: current user message
user_query_message: BaseMessage = HumanMessage(content=query)
messages_with_query: list[BaseMessage] = [*history_messages, user_query_message]
initial_state: AgentState = {
"user_query": user_query_message,
"messages": messages_with_query,
"messages": [user_query_message],
"history": history_messages,
"user_id": user_id,
"images_embedding": [],
......@@ -124,7 +137,9 @@ async def chat_controller(
# Parse Response
all_product_ids = extract_product_ids(result.get("messages", []))
ai_raw_content = result.get("ai_response").content if result.get("ai_response") else ""
logger.info("RAW LLM output (%d chars): %s", len(ai_raw_content), ai_raw_content[:500])
ai_text_response, final_product_ids = parse_ai_response(ai_raw_content, all_product_ids)
logger.info("PARSED ai_response (%d chars): %s", len(ai_text_response), ai_text_response[:300])
response_payload = {
"ai_response": ai_text_response,
......@@ -153,3 +168,157 @@ async def chat_controller(
logger.info("chat_controller finished in %.2fs", duration)
return {**response_payload, "cached": False}
async def chat_controller_stream(
query: str,
user_id: str,
model_name: str = DEFAULT_MODEL,
images: list[str] | None = None,
identity_key: str | None = None,
):
"""
Streaming controller — yields SSE events with token chunks.
Each yield is a JSON string:
{"token": "partial text"} — during streaming
{"done": true, "ai_response": "full text"} — final event
History is saved after stream completes.
"""
effective_identity_key = identity_key or user_id
logger.info(
"chat_controller_stream start: model=%s, user_id=%s",
model_name, user_id,
)
# ====================== CACHE LAYER ======================
if REDIS_CACHE_TURN_ON:
cached_response = await redis_cache.get_response(
user_id=effective_identity_key, query=query
)
if cached_response:
logger.info(f"⚡ CACHE HIT (stream) for identity_key={effective_identity_key}")
# Stream cached response as one chunk
ai_text = cached_response.get("ai_response", "")
yield json.dumps({"token": ai_text}, ensure_ascii=False)
yield json.dumps({"done": True, "ai_response": ai_text}, ensure_ascii=False)
# Save history
memory = await get_conversation_manager()
await handle_post_chat_async(
memory=memory,
identity_key=effective_identity_key,
human_query=query,
ai_response=cached_response,
)
return
# ====================== STREAM LLM FLOW ======================
graph = build_graph()
memory = await get_conversation_manager()
# Load History
history_dicts = await memory.get_chat_history(effective_identity_key, limit=5)
history_messages: list[BaseMessage] = []
for m in history_dicts:
if m["is_human"]:
history_messages.append(HumanMessage(content=m["message"]))
else:
ai_content = m["message"]
try:
parsed = json.loads(ai_content)
if isinstance(parsed, dict) and "ai_response" in parsed:
ai_content = parsed["ai_response"]
except (json.JSONDecodeError, TypeError):
pass
history_messages.append(AIMessage(content=ai_content))
user_query_message: BaseMessage = HumanMessage(content=query)
initial_state: AgentState = {
"user_query": user_query_message,
"messages": [user_query_message],
"history": history_messages,
"user_id": user_id,
"images_embedding": [],
"ai_response": None,
}
run_uuid = uuid.uuid4()
run_id_str = str(run_uuid)
langfuse_handler = get_callback_handler()
exec_config = RunnableConfig(
configurable={
"user_id": user_id,
"transient_images": images or [],
"run_id": run_id_str,
},
run_id=run_uuid,
metadata={"run_id": run_id_str, "tags": "chatbot,production,stream"},
callbacks=[langfuse_handler] if langfuse_handler else [],
)
# Stream using astream_events
start_time = time.time()
full_response = ""
is_final_response = False
session_id = f"{user_id}-{run_id_str[:8]}"
with propagate_attributes(user_id=user_id, session_id=session_id):
async for event in graph.astream_events(
initial_state, config=exec_config, version="v2"
):
kind = event.get("event", "")
# Only stream tokens from the chat model (not tool calls)
if kind == "on_chat_model_stream":
chunk = event.get("data", {}).get("chunk")
if chunk and hasattr(chunk, "content") and chunk.content:
# Skip if this is a tool_call chunk (no text content)
if hasattr(chunk, "tool_calls") and chunk.tool_calls:
continue
# Only stream the FINAL response (after tool execution)
# We detect this by tracking: if tools were called,
# the final agent response comes after tool results
token = chunk.content
full_response += token
is_final_response = True
yield json.dumps({"token": token}, ensure_ascii=False)
# When the agent finishes and we got tokens, prepare to end
elif kind == "on_chain_end" and event.get("name") == "agent":
if is_final_response:
# Reset for potential next agent iteration
pass
duration = time.time() - start_time
logger.info("chat_controller_stream finished in %.2fs (%d chars)", duration, len(full_response))
# Parse and yield final event
ai_text_response, _ = parse_ai_response(full_response, [])
yield json.dumps({"done": True, "ai_response": ai_text_response}, ensure_ascii=False)
# Build response payload for caching and history
response_payload = {
"ai_response": ai_text_response,
"product_ids": [],
}
# Save to cache
if REDIS_CACHE_TURN_ON:
await redis_cache.set_response(
user_id=effective_identity_key,
query=query,
response_data=response_payload,
ttl=300,
)
# Save to history
await handle_post_chat_async(
memory=memory,
identity_key=effective_identity_key,
human_query=query,
ai_response=response_payload,
)
......@@ -46,29 +46,31 @@ class CANIFAGraph:
self.retrieval_tools = self.all_tools
self.llm_with_tools = self.llm.bind_tools(self.all_tools, strict=True)
self.system_prompt = get_system_prompt()
self.prompt_template = ChatPromptTemplate.from_messages(
# NOTE: prompt is NOT cached here — fetched fresh each request
# so Langfuse updates take effect immediately.
self.cache = InMemoryCache()
def _build_chain(self):
"""Build chain with fresh system prompt (from Langfuse or local fallback)."""
system_prompt = get_system_prompt()
prompt_template = ChatPromptTemplate.from_messages(
[
("system", self.system_prompt),
("system", system_prompt),
MessagesPlaceholder(variable_name="history"),
MessagesPlaceholder(variable_name="user_query"),
MessagesPlaceholder(variable_name="messages"),
]
)
self.chain = self.prompt_template | self.llm_with_tools
self.cache = InMemoryCache()
return prompt_template | self.llm_with_tools
async def _agent_node(self, state: AgentState, config: RunnableConfig) -> dict:
"""Agent node - Chỉ việc đổ dữ liệu riêng vào khuôn đã có sẵn."""
"""Agent node — rebuilds chain each call for realtime prompt updates."""
messages = state.get("messages", [])
history = state.get("history", [])
user_query = state.get("user_query")
transient_images = config.get("configurable", {}).get("transient_images", [])
if transient_images and messages:
pass
# Invoke chain with user_query, history, and messages
response = await self.chain.ainvoke({
chain = self._build_chain()
response = await chain.ainvoke({
"user_query": [user_query] if user_query else [],
"history": history,
"messages": messages
......
......@@ -10,7 +10,7 @@ import uuid
from langchain_core.messages import HumanMessage, ToolMessage
from langchain_core.runnables import RunnableConfig
from common.conversation_manager import ConversationManager
from common.conversation_manager import MongoDBConversationManager
from common.langfuse_client import get_callback_handler
from .models import AgentState
......@@ -137,7 +137,7 @@ def prepare_execution_context(query: str, user_id: str, history: list, images: l
async def handle_post_chat_async(
memory: ConversationManager,
memory: MongoDBConversationManager,
identity_key: str,
human_query: str,
ai_response: dict | None
......
"""
CiCi Fashion Consultant - System Prompt
Version 3.0 - Dynamic from File
CuCu Assistant - System Prompt
Supports two modes:
1. Langfuse prompt management (realtime, editable from Langfuse dashboard)
2. Local fallback (inline template)
"""
import os
import logging
from datetime import datetime
from functools import lru_cache
PROMPT_FILE_PATH = os.path.join(os.path.dirname(__file__), "system_prompt.txt")
from common.timezone_config import VIETNAM_TZ
logger = logging.getLogger(__name__)
def _ensure_json_instruction(prompt_text: str) -> str:
if "json" in prompt_text.lower():
return prompt_text
return f"{prompt_text}\n\nReturn JSON (json) object with keys: ai_response, product_ids."
# Vietnamese weekday names
_WEEKDAY_MAP = {
0: "Thứ 2",
1: "Thứ 3",
2: "Thứ 4",
3: "Thứ 5",
4: "Thứ 6",
5: "Thứ 7",
6: "Chủ nhật",
}
def get_system_prompt() -> str:
"""
System prompt for CiCi Fashion Agent.
def _get_weekday_str() -> str:
return _WEEKDAY_MAP[datetime.now(VIETNAM_TZ).weekday()]
# ──────────────────────────── Local template ────────────────────────────
# This is the SAME prompt pushed to Langfuse via scripts/push_prompt_to_langfuse.py
# {{date_str}} is the only variable, replaced at runtime.
_PROMPT_TEMPLATE = """# VAI TRÒ
Bạn là **CuCu Assistant** - Trợ lý quản lý ghi chú cá nhân (Memos).
- Thông minh, ngắn gọn, đi thẳng vào vấn đề.
- NHIỆM VỤ DUY NHẤT: Giúp người dùng tìm kiếm và truy vấn lại các ghi chú (memos) họ đã lưu.
- Hôm nay: {{date_str}} ({{weekday_str}})
---
# QUY TẮC SỬ DỤNG TOOL "memo_retrieval_tool"
## 0. KHI NÀO GỌI TOOL vs KHÔNG GỌI
### KHÔNG gọi tool (chỉ chào lại):
- Câu CHỈ có lời chào, KHÔNG nhắc gì đến note/ghi chú/chủ đề: "hello", "hi bro", "chào em"
### CÓ gọi tool (ưu tiên tìm kiếm):
- Câu có nhắc đến **bất kỳ từ khóa nào** liên quan note/ghi chú/chủ đề, DÙ CÓ LỜI CHÀO đi kèm:
- "chào em, tao note kafka hôm nào ấy" → GỌI TOOL tìm kafka
- "hello, hôm qua tao note gì" → GỌI TOOL tìm theo ngày
- "ê bro, tìm note về meeting" → GỌI TOOL tìm meeting
**NGUYÊN TẮC: Nếu câu có chứa từ khóa/chủ đề/topic → LUÔN GỌI TOOL, bỏ qua phần chào hỏi.**
## 1. TỰ TÍNH TOÁN NGÀY THÁNG
Bạn PHẢI tự tính ngày cụ thể (YYYY-MM-DD) dựa trên "Hôm nay: {{date_str}} ({{weekday_str}})".
Quy ước thứ: Thứ 2 = Monday, Thứ 3 = Tuesday, Thứ 4 = Wednesday, Thứ 5 = Thursday, Thứ 6 = Friday, Thứ 7 = Saturday, Chủ nhật = Sunday.
### CÁC MỐC THỜI GIAN THÔNG DỤNG:
- "Hôm nay" → `start_date` = `end_date` = {{date_str}}
- "Hôm qua" → `start_date` = `end_date` = {{date_str}} - 1 ngày
- "Tuần trước" (không nói ngày cụ thể) → `start_date` = thứ 2 tuần trước, `end_date` = chủ nhật tuần trước
- "Tuần này" → `start_date` = thứ 2 tuần này, `end_date` = {{date_str}}
- "Tháng này" → `start_date` = ngày đầu tháng, `end_date` = {{date_str}}
- "Năm nay" → `start_date` = ngày 1/1, `end_date` = {{date_str}}
### CỰC KỲ QUAN TRỌNG — "THỨ X TUẦN TRƯỚC/NÀY" = ĐÚNG 1 NGÀY:
Khi user nói "thứ X tuần trước" hoặc "thứ X tuần này", tính ra ĐÚNG 1 NGÀY cụ thể:
- `start_date` = `end_date` = ngày đó (YYYY-MM-DD)
- Ví dụ: Nếu hôm nay là 2026-02-25 (Thứ 4), thì:
- "thứ 5 tuần trước" → 2026-02-19 (chỉ 1 ngày!)
- "thứ 2 tuần này" → 2026-02-23 (chỉ 1 ngày!)
- "thứ 6 tuần trước" → 2026-02-20 (chỉ 1 ngày!)
- **KHÔNG ĐƯỢC dùng range cả tuần** — user hỏi đúng 1 ngày thì trả đúng 1 ngày!
### KHI USER HỎI "HÔM NÀO / NGÀY NÀO / BAO GIỜ":
- "Kafka note hôm nào ấy nhỉ?" = User ĐANG HỎI ngày → tìm ALL dates
- "Tao note cái đó khi nào?" = User ĐANG HỎI ngày → tìm ALL dates
- → Dùng range rộng: `start_date` = "2020-01-01", `end_date` = {{date_str}}
- **KHÔNG ĐƯỢC HỎI LẠI "ngày nào?"** — vì user đang nhờ bot tìm ngày!
- → **CHỈ TRẢ VỀ NGÀY**, ví dụ: "Bạn note cái đó vào ngày **2026-02-05** (Thứ 5) nhé!"
- **KHÔNG cần trích dẫn toàn bộ nội dung** khi user hỏi "hôm nào/khi nào" — user chỉ cần biết NGÀY.
- Nếu tìm thấy nhiều memo khớp, liệt kê ngày của từng memo.
### KHI KHÔNG NHẮC THỜI GIAN:
- "Tìm note về X" → range rộng: `start_date` = "2020-01-01", `end_date` = {{date_str}}
## 2. PHÂN TÍCH PARAMETERS
### NGUYÊN TẮC: CHỈ THÊM PARAMETER KHI USER NÓI RÕ
- **`content_search`**: Khi user nhắc từ khóa: "về Kafka", "pass wifi", "meeting"
- **`tag`**: Khi user nhắc tag: "#work", "#idea"
- **KHÔNG THÊM** content_search/tag nếu user chỉ hỏi theo ngày
Returns:
str: System prompt with the current date.
### VÍ DỤ:
- "Hôm qua note gì?" → ✅ date only
- "Note kafka hôm nào?" → ✅ `content_search="kafka"` + range rộng
- "Tìm note #work tuần này" → ✅ `tag="work"` + date tuần này
## 3. KHI NÀO HỎI LẠI USER
- **CHỈ hỏi lại khi THẬT SỰ không có thông tin gì**: "Tìm note", "Tìm cái đó"
- **KHÔNG HỎI LẠI** nếu có bất kỳ keyword nào: "note kafka hôm nào" → đủ rồi, GỌI TOOL
- **KHÔNG BAO GIỜ hỏi lại ngày** nếu user đang hỏi "hôm nào/khi nào" → dùng range rộng
---
# QUY TẮC TRẢ LỜI (CỰC KỲ QUAN TRỌNG)
1. **NGẮN GỌN DƯỚI 100 TỪ**: Trả lời súc tích, đi thẳng vấn đề. KHÔNG dài dòng.
2. **TÓM TẮT NỘI DUNG**: Mỗi memo chỉ hiển thị **tóm tắt ngắn gọn** (tối đa 15 từ), KHÔNG trích dẫn toàn bộ nội dung.
3. **FORMAT**:
- **📝 (YYYY-MM-DD):** [tóm tắt ngắn gọn nội dung]
- Nếu nhiều memo, liệt kê dạng danh sách bullet
4. **KHÔNG BỊA ĐẶT**: Không tự chế nội dung.
5. **NGÔN NGỮ**: Thân thiện, tự nhiên, như nói chuyện với bạn.
6. Nếu count=0: "Không tìm thấy ghi chú nào 🤷"
7. **Trả lời bằng text thuần/markdown**, KHÔNG wrap JSON.
8. **CHỈ HIỂN THỊ ĐẦY ĐỦ** khi user yêu cầu rõ: "cho xem chi tiết", "đọc full nội dung"."""
def get_system_prompt_template() -> str:
"""Return the raw prompt template with {{date_str}} placeholder.
Used by the push script to upload to Langfuse.
"""
date_str = datetime.now().strftime("%d/%m/%Y")
return _PROMPT_TEMPLATE
def _fetch_langfuse_prompt() -> str | None:
"""
Try to fetch the latest prompt from Langfuse.
Returns the compiled prompt string, or None if unavailable.
Uses Langfuse's built-in caching (default TTL=60s).
"""
try:
if os.path.exists(PROMPT_FILE_PATH):
with open(PROMPT_FILE_PATH, "r", encoding="utf-8") as handle:
prompt_template = handle.read()
rendered = prompt_template.replace("{date_str}", date_str)
return _ensure_json_instruction(rendered)
except Exception as exc:
print(f"Error reading system prompt file: {exc}")
fallback = f"""# ROLE
You are CiCi, a CANIFA fashion assistant.
Today: {date_str}
Never fabricate. Keep responses concise.
"""
return _ensure_json_instruction(fallback)
from common.langfuse_client import get_langfuse_client
client = get_langfuse_client()
if not client:
return None
prompt = client.get_prompt(
name="cucu-system-prompt",
label="production",
cache_ttl_seconds=60, # Re-fetch every 60s
)
date_str = datetime.now(VIETNAM_TZ).strftime("%Y-%m-%d")
weekday_str = _get_weekday_str()
compiled = prompt.compile(date_str=date_str, weekday_str=weekday_str)
logger.info("✅ Prompt fetched from Langfuse (version=%s)", prompt.version)
return compiled
except Exception as e:
logger.warning("⚠️ Langfuse prompt fetch failed: %s — using local fallback", e)
return None
def get_system_prompt() -> str:
"""
Get the system prompt. Priority:
1. Langfuse prompt management (realtime, editable)
2. Local fallback template
"""
# Try Langfuse first
langfuse_prompt = _fetch_langfuse_prompt()
if langfuse_prompt:
return langfuse_prompt
# Fallback to local template
date_str = datetime.now(VIETNAM_TZ).strftime("%Y-%m-%d")
weekday_str = _get_weekday_str()
prompt = _PROMPT_TEMPLATE.replace("{{date_str}}", date_str).replace("{{weekday_str}}", weekday_str)
logger.info("📝 Using local prompt fallback (date=%s, weekday=%s)", date_str, weekday_str)
return prompt
# VAI TRÒ
Bạn là **CuCu Assistant** - Trợ lý quản lý ghi chú cá nhân (Memos).
- Thông minh, ngắn gọn, đi thẳng vào vấn đề.
- NHIỆM VỤ DUY NHẤT: Giúp người dùng tìm kiếm và truy vấn lại các ghi chú (memos) họ đã lưu.
- Hôm nay: {date_str}
---
# QUY TẮC SỬ DỤNG TOOL "memo_retrieval_tool"
Bạn chỉ có 1 tool duy nhất là `memo_retrieval_tool`. Hãy sử dụng nó thông minh.
## 0. KHI NÀO **KHÔNG** ĐƯỢC GỌI TOOL (QUAN TRỌNG)
- Nếu user **chỉ chào hỏi / small talk** (VD: "chào em", "hello", "hi bro", "alo"), hãy:
- Trả lời lại một câu chào ngắn gọn, KHÔNG gọi `memo_retrieval_tool`.
- Chỉ gọi tool khi user hỏi RÕ về **ghi chú / note / ngày / nội dung / tag**.
- Không được "đoán" là user đang hỏi về Kafka, work, v.v. nếu câu hiện tại **chỉ là lời chào**.
- Không dùng **câu hỏi cũ** để tự ý gọi tool cho câu mới nếu câu mới chỉ là lời chào.
## 1. TỰ TÍNH TOÁN NGÀY THÁNG (QUAN TRỌNG)
Người dùng sẽ hỏi ngày tương đối (hôm qua, tuần trước...). Bạn PHẢI tự tính ra ngày cụ thể (YYYY-MM-DD) dựa trên "Hôm nay: {date_str}".
- **"Hôm nay note gì?"**
→ `start_date` = {date_str}, `end_date` = {date_str}
- **"Hôm qua note gì?"**
→ `start_date` = {date_str} - 1 ngày
- **"Hôm kia note gì?"**
→ `start_date` = {date_str} - 2 ngày
- **"Tuần trước note gì?"**
→ `start_date` = {date_str} - 7 ngày, `end_date` = {date_str} (hoặc range cụ thể của tuần trước)
- **"Tháng 1 note gì?"**
→ `start_date` = "2026-01-01", `end_date` = "2026-01-31"
## 2. PHÂN TÍCH PARAMETERS
- **`content_search`**: Dùng khi user hỏi về nội dung (VD: "dự án A", "pass wifi", "số điện thoại"). Dùng Regex nên hãy chọn keyword đặc trưng.
- **`tag`**: Dùng khi user nhắc đến tag/chủ đề (VD: "#work", "#idea"). Chỉ điền nếu user KHẲNG ĐỊNH là tag.
### 2.1. XỬ LÝ "TOPIC" THÀNH TAG / CONTENT_SEARCH
- Nếu user nhắc đến **một chủ đề ngắn gọn** (VD: "Kafka", "English", "health") nhưng **không có dấu #**:
- Hãy coi đó là một **topic**.
- Nếu topic là **một từ đơn, không có khoảng trắng** (VD: "Kafka", "work"):
- ƯU TIÊN map thành `tag` (VD: `tag="kafka"` hoặc `tag="work"` — KHÔNG cần dấu `#`).
- Nếu topic là **cụm từ dài** (VD: "Kafka performance", "meeting tuần trước"):
- Map thành `content_search` (VD: `content_search="Kafka performance"`).
- Nếu không chắc đó là tag hay chỉ là từ khóa nội dung:
- Có thể điền **cả hai**:
- `tag="kafka"`
- `content_search="Kafka"`
- Khi đó MongoDB sẽ lọc theo tag (nếu note có tag) và/hoặc nội dung có chứa keyword.
## 3. VÍ DỤ GỌI TOOL
**Case 1: Hỏi theo ngày**
*Input: "Hôm qua tao có note gì không?" (Giả sử hôm nay 2026-01-24)*
→ Bot tính: Hôm qua = 2026-01-23
→ Tool call: `memo_retrieval_tool(start_date="2026-01-23")`
**Case 2: Tìm nội dung + Ngày**
*Input: "Tuần này tao note gì về 'meeting'?" (Hôm nay 2026-01-24)*
→ Bot tính: Tuần này ~ 2026-01-19 đến 2026-01-25
→ Tool call:
```python
memo_retrieval_tool(
start_date="2026-01-19",
end_date="2026-01-25",
content_search="meeting"
)
```
**Case 3: Tìm theo tag**
*Input: "Tìm mấy cái note #idea tháng trước"*
→ Tool call:
```python
memo_retrieval_tool(
start_date="2025-12-01",
end_date="2025-12-31",
tag="idea"
)
```
**Case 4: Tìm nội dung chung chung (Không rõ ngày)**
*Input: "Tìm lại pass wifi"*
→ Bot tự chọn range rộng hoặc không giới hạn (tùy tool support, ở đây tool bắt buộc start_date thì lấy ngày xa xưa hoặc 1 tháng gần nhất tùy ngữ cảnh, hoặc hỏi lại user. NHƯNG tốt nhất cứ search 1 năm gần đây).
→ Tool call: `memo_retrieval_tool(start_date="2025-01-01", content_search="pass wifi")`
---
# QUY TẮC TRẢ LỜI (RESPONSE)
1. **DỰA TRÊN KẾT QUẢ TOOL**:
- Nếu có memos: Liệt kê ngắn gọn, trích dẫn nội dung chính.
- Nếu `count` = 0: Trả lời "Không tìm thấy ghi chú nào trong khoảng thời gian này/với từ khóa này."
2. **KHÔNG BỊA ĐẶT**: Không tự chế ra nội dung memo không có trong data.
3. **FORMAT MENU**:
- Ghi chú 1 (2026-01-24): [Nội dung tóm tắt]
- Ghi chú 2 (2026-01-23): [Nội dung tóm tắt]
4. **NGÔN NGỮ**: Giao tiếp tự nhiên, thân thiện (bro-style nếu user thích, hoặc lịch sự mặc định).
---
# FORMAT ĐẦU RA (JSON)
Bot trả lời dưới dạng JSON (để Frontend render hoặc parse):
```json
{{
"ai_response": "Đây là các ghi chú mình tìm thấy hôm qua...",
"found_memos": [
{{
"id": "...",
"content": "...",
"created_at": "..."
}}
]
}}
```
*Lưu ý: Nếu tool trả về data, hãy tóm tắt vào `ai_response` và dán raw data vào `found_memos` nếu cần.*
\ No newline at end of file
This diff is collapsed.
"""
Chatbot API Route
-----------------
`POST /api/agent/chat` - chính là endpoint chat cho CiCi Assistant.
Logic xử lý nằm ở `agent.controller.chat_controller`.
`POST /api/agent/chat` - endpoint chat cho CuCu Assistant (non-streaming).
`POST /api/agent/chat/stream` - endpoint chat SSE streaming.
Logic xử lý nằm ở `agent.controller`.
"""
import logging
from dataclasses import dataclass
from typing import AsyncGenerator
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
from fastapi.responses import StreamingResponse
from opentelemetry import trace
from agent.controller import chat_controller
from agent.models import QueryRequest
# Lazy imports - defer heavy AI modules to first use
# from agent.controller import chat_controller, chat_controller_stream
# from agent.models import QueryRequest
from common.message_limit import message_limit_service
from config import DEFAULT_MODEL
......@@ -20,6 +24,26 @@ logger = logging.getLogger(__name__)
tracer = trace.get_tracer(__name__)
router = APIRouter()
# Cache for lazy-loaded modules
_agent_modules = {}
def _get_agent_controller():
"""Lazy-load agent.controller to defer LangChain/LangGraph import."""
if "controller" not in _agent_modules:
from agent.controller import chat_controller, chat_controller_stream
_agent_modules["controller"] = chat_controller
_agent_modules["controller_stream"] = chat_controller_stream
return _agent_modules["controller"], _agent_modules["controller_stream"]
def _get_query_request_model():
"""Lazy-load agent.models.QueryRequest."""
if "QueryRequest" not in _agent_modules:
from agent.models import QueryRequest
_agent_modules["QueryRequest"] = QueryRequest
return _agent_modules["QueryRequest"]
@dataclass
class Identity:
......@@ -41,7 +65,6 @@ def _get_identity(request: Request) -> Identity:
history_key = primary_id
rate_limit_key = primary_id
else:
# Guest: dùng device_id, fallback 'anonymous'
primary_id = device_id or "anonymous"
history_key = device_id or "anonymous"
rate_limit_key = device_id or "anonymous"
......@@ -54,38 +77,35 @@ def _get_identity(request: Request) -> Identity:
)
@router.post("/api/agent/chat", summary="Chat with CiCi Assistant")
async def cici_chat(request: Request, req: QueryRequest, background_tasks: BackgroundTasks):
"""
Endpoint chat không stream - trả về response JSON đầy đủ một lần.
@router.post("/api/agent/chat", summary="Chat with CuCu Assistant")
async def cucu_chat(request: Request, background_tasks: BackgroundTasks):
"""Endpoint chat không stream - trả về response JSON đầy đủ một lần."""
# Lazy-load AI modules on first call
QueryRequest = _get_query_request_model()
chat_controller, _ = _get_agent_controller()
body = await request.json()
req = QueryRequest(**body)
- Tự lấy user/device từ middleware (`get_user_identity`)
- Gọi `chat_controller` để xử lý toàn bộ logic LLM + tools
- Tự tăng counter rate limit sau khi trả lời xong
"""
# 1. Xác định identity
identity = _get_identity(request)
user_id = identity.primary_id
logger.info("📥 [Incoming Chat] User=%s | Query=%s", user_id, req.user_query)
# Span cho tracing (optional)
span = trace.get_current_span()
span.set_attribute("user.id", user_id)
span.set_attribute("chat.user_query", req.user_query)
try:
# 2. Gọi controller xử lý
result = await chat_controller(
query=req.user_query,
user_id=user_id,
background_tasks=background_tasks,
model_name=DEFAULT_MODEL,
images=req.images,
identity_key=identity.history_key, # Guest: device_id, User: user_id
identity_key=identity.history_key,
)
# 3. Tăng usage info (rate limit) sau khi thành công
usage_info = await message_limit_service.increment(
identity_key=identity.rate_limit_key,
is_authenticated=identity.is_authenticated,
......@@ -104,7 +124,58 @@ async def cici_chat(request: Request, req: QueryRequest, background_tasks: Backg
},
}
except Exception as e:
logger.error("Error in cici_chat: %s", e, exc_info=True)
logger.error("Error in cucu_chat: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e)) from e
@router.post("/api/agent/chat/stream", summary="Chat with CuCu Assistant (SSE Streaming)")
async def cucu_chat_stream(request: Request):
"""
Endpoint chat SSE streaming — trả về token-by-token qua Server-Sent Events.
SSE format:
data: {"token": "partial"}\n\n — mỗi token chunk
data: {"done": true, "ai_response": "full text"}\n\n — kết thúc
"""
# Lazy-load AI modules on first call
QueryRequest = _get_query_request_model()
_, chat_controller_stream = _get_agent_controller()
body = await request.json()
req = QueryRequest(**body)
identity = _get_identity(request)
user_id = identity.primary_id
logger.info("📥 [Incoming Stream] User=%s | Query=%s", user_id, req.user_query)
async def sse_generator() -> AsyncGenerator[str, None]:
try:
async for chunk_json in chat_controller_stream(
query=req.user_query,
user_id=user_id,
model_name=DEFAULT_MODEL,
images=req.images,
identity_key=identity.history_key,
):
yield f"data: {chunk_json}\n\n"
# Increment rate limit after stream completes
await message_limit_service.increment(
identity_key=identity.rate_limit_key,
is_authenticated=identity.is_authenticated,
)
logger.info("📤 [Stream Done] User=%s", user_id)
except Exception as e:
logger.error("Error in stream: %s", e, exc_info=True)
yield f'data: {{"error": "{str(e)}"}}\n\n'
return StreamingResponse(
sse_generator(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
......@@ -30,6 +30,14 @@ class ClearHistoryResponse(BaseModel):
message: str
@router.get("/api/history/me", summary="Get My Chat History", response_model=ChatHistoryResponse)
async def get_my_chat_history(request: Request, limit: int | None = 50, before_id: int | None = None):
"""
Shortcut: lấy lịch sử chat của chính mình (identity từ middleware).
"""
return await get_chat_history(request, identity_key="me", limit=limit, before_id=before_id)
@router.get("/api/history/{identity_key}", summary="Get Chat History", response_model=ChatHistoryResponse)
async def get_chat_history(request: Request, identity_key: str, limit: int | None = 50, before_id: int | None = None):
"""
......@@ -59,6 +67,30 @@ async def get_chat_history(request: Request, identity_key: str, limit: int | Non
raise HTTPException(status_code=500, detail="Failed to fetch chat history")
@router.delete("/api/history/me", summary="Clear My Chat History", response_model=ClearHistoryResponse)
async def clear_my_chat_history(request: Request):
"""
Shortcut: xóa lịch sử chat của chính mình (identity từ middleware).
"""
try:
user_id = getattr(request.state, "user_id", None)
device_id = getattr(request.state, "device_id", "") or ""
is_authenticated = bool(getattr(request.state, "is_authenticated", False))
if is_authenticated and user_id:
resolved_key = str(user_id)
else:
resolved_key = device_id or "anonymous"
manager = await get_conversation_manager()
await manager.clear_history(resolved_key)
logger.info("✅ Cleared chat history for %s", resolved_key)
return {"success": True, "message": f"Đã xóa lịch sử chat"}
except Exception as e:
logger.error("Error clearing chat history: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail="Failed to clear chat history")
@router.delete("/api/history/{identity_key}", summary="Clear Chat History", response_model=ClearHistoryResponse)
async def clear_chat_history(identity_key: str):
"""
......
......@@ -55,12 +55,25 @@ async def list_memos(
filter_str=filter,
)
# Parse creator_id from filter string (e.g. "creator_id == user_xxx")
creator_id = None
if filter:
logger.debug("List memos GET with filter=%r", filter)
pattern_creator = r"creator_id\s*==\s*([a-zA-Z0-9_\-\.]+)"
match_creator = re.search(pattern_creator, filter)
if match_creator:
creator_id = match_creator.group(1)
# Parse pinned from filter string (e.g. "... && pinned")
pinned = None
if filter and re.search(r'\bpinned\b(?!\s*==\s*false)', filter):
pinned = True
return await memo_service.list_memos(
user_id=user_id,
creator_id=creator_id,
tag=tag,
pinned=pinned,
row_status=row_status,
start_date=dt_start,
end_date=dt_end
......@@ -118,10 +131,18 @@ async def create_memo_or_list_memos(
if match_creator:
creator_id = match_creator.group(1)
# Parse pinned filter — frontend sends "pinned" in filter string for bookmarks
pinned = None
if raw_filter and isinstance(raw_filter, str):
# Match standalone "pinned" (not "pinned == false")
if re.search(r'\bpinned\b(?!\s*==\s*false)', raw_filter):
pinned = True
return await memo_service.list_memos(
user_id=user_id,
creator_id=creator_id,
tag=tag,
pinned=pinned,
start_date=start_date,
end_date=end_date
)
......
......@@ -4,13 +4,12 @@ Shortcut service routes for Memos-style backend.
from typing import List
from fastapi import APIRouter, Body, Depends, HTTPException
from fastapi import APIRouter, Depends, HTTPException
from common.memos_core.schemas import (
ShortcutCreate,
ShortcutUpdate,
ShortcutResponse,
ListShortcutsResponse,
)
from common.memos_core.services import get_shortcut_service
......@@ -26,20 +25,6 @@ async def list_shortcuts(shortcut_service=Depends(get_shortcut_service)):
raise HTTPException(status_code=500, detail=str(exc)) from exc
@router.post("", summary="List shortcuts (Connect compatibility)", response_model=ListShortcutsResponse)
async def list_shortcuts_connect_compat(
payload: dict = Body(default_factory=dict), # noqa: B008
shortcut_service=Depends(get_shortcut_service),
):
# Connect RPC ListShortcuts is proxied as POST /api/v1/shortcuts in dev.
# Ignore payload (parent, pagination...) and return empty list for now.
try:
_ = payload
shortcuts = await shortcut_service.list_shortcuts()
return ListShortcutsResponse(shortcuts=shortcuts)
except Exception as exc: # pragma: no cover
raise HTTPException(status_code=500, detail=str(exc)) from exc
@router.post("", summary="Create shortcut", response_model=ShortcutResponse)
async def create_shortcut(
......@@ -54,7 +39,7 @@ async def create_shortcut(
@router.patch("/{shortcut_id}", summary="Update shortcut", response_model=ShortcutResponse)
async def update_shortcut(
shortcut_id: int,
shortcut_id: str,
payload: ShortcutUpdate,
shortcut_service=Depends(get_shortcut_service),
):
......@@ -66,7 +51,7 @@ async def update_shortcut(
@router.delete("/{shortcut_id}", summary="Delete shortcut")
async def delete_shortcut(
shortcut_id: int,
shortcut_id: str,
shortcut_service=Depends(get_shortcut_service),
):
try:
......
"""
Test Chat Route - NO AUTH REQUIRED
------------------------------------
`POST /api/test/chat` - endpoint thô để kiểm tra AI API key còn sống không.
Không dùng graph, không dùng tools, không dùng history.
Gọi thẳng OpenAI / Gemini / Groq tùy DEFAULT_MODEL.
Dùng để debug: nếu endpoint này work → vấn đề ở auth/middleware.
nếu không work → vấn đề API key hoặc model.
"""
import logging
import time
from fastapi import APIRouter
from pydantic import BaseModel
from config import DEFAULT_MODEL, GOOGLE_API_KEY, GROQ_API_KEY, OPENAI_API_KEY
logger = logging.getLogger(__name__)
router = APIRouter(tags=["test"])
class TestChatRequest(BaseModel):
message: str
model: str | None = None # override DEFAULT_MODEL nếu muốn
class TestChatResponse(BaseModel):
status: str
model_used: str
response: str
latency_ms: int
@router.post(
"/api/test/chat",
response_model=TestChatResponse,
summary="[NO AUTH] Test AI API key trực tiếp",
description=(
"Endpoint debug - gọi thẳng LLM không qua graph/tools/auth. "
"Dùng để kiểm tra API key còn sống và model đang hoạt động."
),
)
async def test_chat(req: TestChatRequest) -> TestChatResponse:
model_name = req.model or DEFAULT_MODEL
logger.info("[TEST CHAT] model=%s | msg=%s", model_name, req.message)
t0 = time.monotonic()
try:
response_text = await _call_llm(model_name=model_name, message=req.message)
except Exception as exc:
logger.error("[TEST CHAT] LLM error: %s", exc, exc_info=True)
return TestChatResponse(
status="error",
model_used=model_name,
response=f"LỖI: {type(exc).__name__}: {exc}",
latency_ms=int((time.monotonic() - t0) * 1000),
)
latency = int((time.monotonic() - t0) * 1000)
logger.info("[TEST CHAT] OK latency=%dms", latency)
return TestChatResponse(
status="ok",
model_used=model_name,
response=response_text,
latency_ms=latency,
)
async def _call_llm(model_name: str, message: str) -> str:
"""Gọi LLM tương ứng với model_name, trả về string response."""
# ── Gemini / Google ──────────────────────────────────────────────────────
if model_name.startswith("gemini"):
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model=model_name, google_api_key=GOOGLE_API_KEY)
result = await llm.ainvoke(message)
return result.content
# ── Groq ─────────────────────────────────────────────────────────────────
if model_name.startswith("llama") or model_name.startswith("mixtral") or "groq" in model_name:
from langchain_groq import ChatGroq
llm = ChatGroq(model=model_name, groq_api_key=GROQ_API_KEY)
result = await llm.ainvoke(message)
return result.content
# ── OpenAI (default) ─────────────────────────────────────────────────────
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model=model_name, openai_api_key=OPENAI_API_KEY)
result = await llm.ainvoke(message)
return result.content
......@@ -35,13 +35,13 @@ def verify_clerk_jwt(token: str) -> dict[str, Any]:
signing_key = _jwks_client().get_signing_key_from_jwt(token).key
# Clerk tokens are typically RS256.
# leeway=60 tolerates up to 60s clock skew between Clerk server and this machine
# leeway=300 tolerates up to 5min clock skew (VPS clock not NTP-synced)
payload = jwt.decode(
token,
signing_key,
algorithms=["RS256"],
issuer=CLERK_ISSUER,
leeway=60,
leeway=300,
options={
"verify_aud": False, # allow multiple audiences in dev
},
......
This diff is collapsed.
......@@ -79,7 +79,7 @@ class EmbeddingClientManager:
return client
logger = logging.getLogger(__name__)
# NOTE:
# - TẠM THỜI KHÔNG DÙNG REDIS CACHE CHO EMBEDDING để tránh phụ thuộc Redis/aioredis.
......
......@@ -17,39 +17,64 @@ logger = logging.getLogger(__name__)
ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY")
# Fallback: Generate key from a password (NOT recommended for production)
# This is only for development/testing
FALLBACK_PASSWORD = os.getenv("ENCRYPTION_PASSWORD", "default-dev-password-change-in-production")
# Cached Fernet instance (singleton) — avoids re-running PBKDF2 100K iterations per call
_fernet_instance: Fernet | None = None
def _get_fernet_key() -> bytes:
def _derive_key_from_password(password: str, salt: bytes) -> bytes:
"""Derive a Fernet key from password + salt using PBKDF2."""
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100000,
)
return base64.urlsafe_b64encode(kdf.derive(password.encode()))
def _get_fernet() -> Fernet:
"""
Get or generate Fernet encryption key.
Priority: ENCRYPTION_KEY env var > generated from password (dev only)
Get cached Fernet instance.
Priority: ENCRYPTION_KEY env var > password-derived key (dev only).
When using ENCRYPTION_KEY, salt is not needed (key is used directly).
"""
global _fernet_instance
if _fernet_instance is not None:
return _fernet_instance
if ENCRYPTION_KEY:
try:
# Try to use as-is (should be base64-encoded 32-byte key)
return ENCRYPTION_KEY.encode()
_fernet_instance = Fernet(ENCRYPTION_KEY.encode())
return _fernet_instance
except Exception:
# If not valid, try to decode as base64
try:
return base64.urlsafe_b64decode(ENCRYPTION_KEY)
_fernet_instance = Fernet(base64.urlsafe_b64decode(ENCRYPTION_KEY))
return _fernet_instance
except Exception:
logger.warning("Invalid ENCRYPTION_KEY format, using fallback")
# Fallback: Generate from password (dev only - NOT secure for production)
# Fallback: Generate from password with random salt (dev only - NOT secure for production)
logger.warning(
"⚠️ ENCRYPTION_KEY not set. Using password-based key derivation (NOT secure for production!)"
)
salt = b"cucu_note_salt" # Fixed salt for dev (should be random in production)
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100000,
)
key = base64.urlsafe_b64encode(kdf.derive(FALLBACK_PASSWORD.encode()))
return key
# Use random salt — stored in a file so decryption works across restarts
salt_file = os.path.join(os.path.dirname(__file__), "..", "data", ".encryption_salt")
os.makedirs(os.path.dirname(salt_file), exist_ok=True)
if os.path.exists(salt_file):
with open(salt_file, "rb") as f:
salt = f.read()
else:
salt = os.urandom(16)
with open(salt_file, "wb") as f:
f.write(salt)
logger.info("Generated new encryption salt (saved to %s)", salt_file)
key = _derive_key_from_password(FALLBACK_PASSWORD, salt)
_fernet_instance = Fernet(key)
return _fernet_instance
def encrypt_api_key(api_key: str) -> str:
......@@ -70,7 +95,7 @@ def encrypt_api_key(api_key: str) -> str:
raise ValueError("API key cannot be empty")
try:
fernet = Fernet(_get_fernet_key())
fernet = _get_fernet()
encrypted = fernet.encrypt(api_key.encode())
return encrypted.decode()
except Exception as e:
......@@ -96,7 +121,7 @@ def decrypt_api_key(encrypted_key: str) -> str:
raise ValueError("Encrypted key cannot be empty")
try:
fernet = Fernet(_get_fernet_key())
fernet = _get_fernet()
decrypted = fernet.decrypt(encrypted_key.encode())
return decrypted.decode()
except Exception as e:
......@@ -142,4 +167,3 @@ def validate_openai_key_format(api_key: str) -> bool:
return False
return True
......@@ -88,7 +88,7 @@ class LLMFactory:
"streaming": streaming,
"api_key": key,
"temperature": 0,
"max_tokens": 1000,
"max_tokens": 4096,
}
# Nếu bật json_mode, tiêm trực tiếp vào constructor
......@@ -100,14 +100,7 @@ class LLMFactory:
logger.info(f"✅ Created OpenAI: {model_name}")
return llm
def _enable_json_mode(self, llm: BaseChatModel, model_name: str) -> BaseChatModel:
"""Enable JSON mode for the LLM."""
try:
llm = llm.bind(response_format={"type": "json_object"})
logger.debug(f"⚙️ JSON mode enabled for {model_name}")
except Exception as e:
logger.warning(f"⚠️ JSON mode not supported: {e}")
return llm
def initialize(self, skip_warmup: bool = True) -> None:
"""
......
......@@ -48,23 +48,30 @@ def parse_date_range(
if match_ts:
try:
from common.timezone_config import VIETNAM_TZ
ts_start = float(match_ts.group(1))
ts_end = float(match_ts.group(2))
dt_start = datetime.fromtimestamp(ts_start, tz=timezone.utc)
dt_end = datetime.fromtimestamp(ts_end, tz=timezone.utc)
# Frontend sends UTC midnight timestamps, but user is in Vietnam (UTC+7).
# Shift by -7h so "Feb 5" means Feb 5 00:00 VN (= Feb 4 17:00 UTC)
# instead of Feb 5 00:00 UTC (= Feb 5 07:00 VN).
utc_offset_seconds = VIETNAM_TZ.utcoffset(None).total_seconds()
dt_start = datetime.fromtimestamp(ts_start - utc_offset_seconds, tz=timezone.utc)
dt_end = datetime.fromtimestamp(ts_end - utc_offset_seconds, tz=timezone.utc)
except (ValueError, TypeError):
pass
else:
# Case 2: DisplayTime filter (displayTime:YYYY-MM-DD)
# Dates are interpreted as Vietnam time (UTC+7), then converted to UTC for MongoDB
pattern_dt = r"displayTime:(\d{4}-\d{2}-\d{2})"
match_dt = re.search(pattern_dt, filter_str)
if match_dt:
try:
from common.timezone_config import VIETNAM_TZ
date_str = match_dt.group(1)
dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc)
dt_start = dt
dt_end = dt + timedelta(days=1)
dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=VIETNAM_TZ)
dt_start = dt.astimezone(timezone.utc)
dt_end = (dt + timedelta(days=1)).astimezone(timezone.utc)
except ValueError:
pass
......
......@@ -191,7 +191,7 @@ class ShortcutUpdate(BaseModel):
class ShortcutResponse(BaseModel):
id: int
id: str
name: str
filter: str
......
......@@ -810,21 +810,74 @@ class ReactionService:
class ShortcutService:
"""Shortcut (Workspace) service with MongoDB backend."""
async def list_shortcuts(self) -> List[schemas.ShortcutResponse]:
return []
cursor = mongodb_client.shortcuts.find({}).sort("created_at", -1)
docs = await cursor.to_list(length=100)
return [
schemas.ShortcutResponse(
id=str(doc["_id"]),
name=doc.get("name", ""),
filter=doc.get("filter", ""),
)
for doc in docs
]
async def create_shortcut(self, payload: schemas.ShortcutCreate) -> schemas.ShortcutResponse:
return schemas.ShortcutResponse(id=1, name=payload.name, filter=payload.filter)
now = utc_now()
doc = {
"name": payload.name,
"filter": payload.filter,
"created_at": now,
"updated_at": now,
}
result = await mongodb_client.shortcuts.insert_one(doc)
doc["_id"] = result.inserted_id
return schemas.ShortcutResponse(
id=str(doc["_id"]),
name=doc["name"],
filter=doc["filter"],
)
async def update_shortcut(self, shortcut_id: str, payload: schemas.ShortcutUpdate) -> schemas.ShortcutResponse:
update_fields: dict[str, Any] = {"updated_at": utc_now()}
if payload.name is not None:
update_fields["name"] = payload.name
if payload.filter is not None:
update_fields["filter"] = payload.filter
filter_query: dict[str, Any] = {}
if ObjectId.is_valid(shortcut_id):
filter_query["_id"] = ObjectId(shortcut_id)
else:
filter_query["_id"] = shortcut_id
from pymongo import ReturnDocument
result = await mongodb_client.shortcuts.find_one_and_update(
filter_query,
{"$set": update_fields},
return_document=ReturnDocument.AFTER,
)
if not result:
raise ValueError(f"Shortcut {shortcut_id} not found")
async def update_shortcut(self, shortcut_id: int, payload: schemas.ShortcutUpdate) -> schemas.ShortcutResponse:
return schemas.ShortcutResponse(
id=shortcut_id,
name=payload.name or "demo",
filter=payload.filter or "",
id=str(result["_id"]),
name=result.get("name", ""),
filter=result.get("filter", ""),
)
async def delete_shortcut(self, shortcut_id: int) -> None:
return None
async def delete_shortcut(self, shortcut_id: str) -> None:
filter_query: dict[str, Any] = {}
if ObjectId.is_valid(shortcut_id):
filter_query["_id"] = ObjectId(shortcut_id)
else:
filter_query["_id"] = shortcut_id
result = await mongodb_client.shortcuts.delete_one(filter_query)
if result.deleted_count == 0:
raise ValueError(f"Shortcut {shortcut_id} not found")
class ActivityService:
......
......@@ -37,6 +37,7 @@ PUBLIC_PATHS = {
PUBLIC_PATH_PREFIXES = [
"/static",
"/mock",
"/api/test", # debug/test endpoints - no auth required
]
......
......@@ -29,6 +29,7 @@ COLLECTION_REACTIONS = "cuccu_reactions"
COLLECTION_MEMO_EMBEDDINGS = "cuccu_memo_embeddings"
COLLECTION_INBOX = "cuccu_inbox"
COLLECTION_USER_SETTINGS = "cuccu_user_settings"
COLLECTION_SHORTCUTS = "cuccu_shortcuts"
class MongoDBClient:
......@@ -116,6 +117,10 @@ class MongoDBClient:
def user_settings(self):
return self.db[COLLECTION_USER_SETTINGS]
@property
def shortcuts(self):
return self.db[COLLECTION_SHORTCUTS]
# Singleton instance
mongodb_client = MongoDBClient()
......@@ -206,6 +211,9 @@ async def create_indexes():
# ====================== MEMO VERSIONS ======================
await db["cuccu_memo_versions"].create_index([("memo_id", 1), ("version_index", -1)])
# ====================== SHORTCUTS ======================
await db[COLLECTION_SHORTCUTS].create_index([("creator_id", 1)])
logger.info("✅ Database indexes created successfully (Production-ready)")
except Exception as e:
logger.warning(f"⚠️ Error creating indexes (may already exist): {e}")
......
"""
Timezone configuration for CuCu Note.
All user-facing date operations should use VIETNAM_TZ.
MongoDB stores in UTC — convert at query boundaries.
"""
from datetime import timezone, timedelta
VIETNAM_TZ = timezone(timedelta(hours=7))
......@@ -126,8 +126,6 @@ CONV_SUPABASE_KEY: str | None = os.getenv("CONV_SUPABASE_KEY")
# ====================== REDIS CONFIGURATION ======================
REDIS_HOST: str | None = os.getenv("REDIS_HOST")
REDIS_PORT: int = int(os.getenv("REDIS_PORT", "6379"))
REDIS_PASSWORD: str | None = os.getenv("REDIS_PASSWORD")
REDIS_USERNAME: str | None = os.getenv("REDIS_USERNAME")
# ====================== AI API KEYS & MODELS ======================
OPENAI_API_KEY: str | None = os.getenv("OPENAI_API_KEY")
......@@ -172,7 +170,7 @@ REDIS_CACHE_URL: str | None = os.getenv("REDIS_CACHE_URL", "redis-14473.c93.us-e
REDIS_CACHE_PORT: int = int(os.getenv("REDIS_CACHE_PORT", "14473"))
REDIS_CACHE_DB: int = int(os.getenv("REDIS_CACHE_DB", "0"))
REDIS_CACHE_TURN_ON: bool = os.getenv("REDIS_CACHE_TURN_ON", "true").lower() == "true"
REDIS_PASSWORD: str | None = os.getenv("REDIS_CACHE_PASSWORD", "4kCCXXaJXXv7k358eG69p1lDBQtHTbQ1")
REDIS_PASSWORD: str | None = os.getenv("REDIS_CACHE_PASSWORD")
REDIS_USERNAME: str = os.getenv("REDIS_CACHE_USERNAME", "default")
CONV_DATABASE_URL: str | None = os.getenv("CONV_DATABASE_URL")
......@@ -183,8 +181,8 @@ MONGODB_DB_NAME: str | None = os.getenv("MONGODB_DB_NAME", "cucu_note")
USE_MONGO_CONVERSATION: bool = os.getenv("USE_MONGO_CONVERSATION", "true").lower() == "true"
# MongoDB Connection Pooling
MONGODB_MAX_POOL_SIZE: int = int(os.getenv("MONGODB_MAX_POOL_SIZE", "50"))
MONGODB_MIN_POOL_SIZE: int = int(os.getenv("MONGODB_MIN_POOL_SIZE", "10"))
MONGODB_MAX_POOL_SIZE: int = int(os.getenv("MONGODB_MAX_POOL_SIZE", "5"))
MONGODB_MIN_POOL_SIZE: int = int(os.getenv("MONGODB_MIN_POOL_SIZE", "1"))
MONGODB_MAX_IDLE_TIME_MS: int = int(os.getenv("MONGODB_MAX_IDLE_TIME_MS", "45000"))
# ====================== CANIFA INTERNAL POSTGRES ======================
......
......@@ -43,18 +43,11 @@ asyncio.run(setup())
" || echo "⚠️ Could not set up indexes (will retry on first request)"
# Start the server
echo "🌟 Starting Gunicorn server..."
# Allow overriding number of workers via env, default to 1 for simplicity
WORKERS="${GUNICORN_WORKERS:-1}"
echo "🔧 Using Gunicorn workers: $WORKERS"
exec gunicorn \
--workers "$WORKERS" \
--worker-class uvicorn.workers.UvicornWorker \
--bind 0.0.0.0:5000 \
--timeout 120 \
--access-logfile - \
--error-logfile - \
--log-level info \
server:app
echo "🌟 Starting Uvicorn server (hot reload enabled)..."
exec uvicorn server:app \
--host 0.0.0.0 \
--port 5000 \
--reload \
--reload-dir /app \
--log-level info
# Core FastAPI
fastapi==0.124.4
fastapi==0.124.4a
uvicorn==0.38.0
uvloop>=0.21.0
starlette==0.50.0
pydantic==2.12.5
pydantic_core==2.41.5
......@@ -51,13 +52,10 @@ google-auth==2.45.0
# Tokenization
tiktoken==0.12.0
# Observability
# Observability (minimal - only trace API used)
opentelemetry-api==1.39.1
opentelemetry-exporter-otlp-proto-common==1.39.1
opentelemetry-exporter-otlp-proto-http==1.39.1
opentelemetry-proto==1.39.1
opentelemetry-sdk==1.39.1
opentelemetry-semantic-conventions==0.60b1
# Removed: otel exporters/proto/semantic-conventions (not configured)
# Utilities
python-dotenv==1.2.1
......@@ -71,7 +69,7 @@ tenacity==9.1.2
backoff==2.2.1
regex==2025.11.3
Unidecode==1.4.0
pillow==12.0.0
# pillow==12.0.0 # Removed: not directly imported
# WebSocket
websockets==15.0.1
......@@ -104,4 +102,5 @@ cachetools==6.2.4
pytest==9.0.2
# Production server
gunicorn==23.0.0
# gunicorn==23.0.0 # Removed: using uvicorn instead
aiosqlite
......@@ -2,6 +2,7 @@ import asyncio
import os
import platform
import logging
from contextlib import asynccontextmanager
import uvicorn
from fastapi import FastAPI
......@@ -10,6 +11,7 @@ from fastapi.responses import RedirectResponse
from api.chatbot import router as chatbot_router
from api.memos import router as memos_router
from api.test_chat_route import router as test_router
from common.cache import redis_cache
from common.langfuse_client import get_langfuse_client
from common.middleware import middleware_manager
......@@ -29,21 +31,14 @@ logging.basicConfig(
)
logger = logging.getLogger(__name__)
# Langfuse client initialized in startup_event (not at import time)
app = FastAPI(
title="Contract AI Service",
description="API for Contract AI Service",
version="1.0.0",
)
# =============================================================================
# STARTUP EVENT - Initialize Redis Cache + MongoDB
# LIFESPAN - Initialize & cleanup resources (replaces deprecated on_event)
# =============================================================================
@app.on_event("startup")
async def startup_event():
"""Initialize dependencies on startup."""
@asynccontextmanager
async def lifespan(app: FastAPI):
"""Startup & shutdown lifecycle manager."""
# --- STARTUP ---
# Initialize Redis (optional - will continue without cache if unavailable)
redis_client = await redis_cache.initialize()
if redis_client:
......@@ -56,19 +51,17 @@ async def startup_event():
await init_mongodb()
logger.info("✅ MongoDB connection initialized")
# Langfuse initialization (optional - lazy loaded, just triggers auth check)
# Langfuse initialization (optional)
langfuse_client = get_langfuse_client()
if langfuse_client:
logger.info("✅ Langfuse client ready")
else:
logger.warning("⚠️ Langfuse client not available (missing keys or disabled)")
yield # App is running
@app.on_event("shutdown")
async def shutdown_event():
"""Cleanup on shutdown."""
# --- SHUTDOWN ---
try:
# Close Redis connection if exists
redis_client = redis_cache.get_client()
if redis_client:
await redis_client.aclose()
......@@ -76,7 +69,6 @@ async def shutdown_event():
except Exception as e:
logger.debug(f"Error closing Redis: {e}")
# Close MongoDB connection
try:
from common.mongo_client import close_mongodb
await close_mongodb()
......@@ -85,6 +77,14 @@ async def shutdown_event():
logger.debug(f"Error closing MongoDB: {e}")
app = FastAPI(
title="Contract AI Service",
description="API for Contract AI Service",
version="1.0.0",
lifespan=lifespan,
)
# =============================================================================
# MIDDLEWARE SETUP - Gom Auth + RateLimit + CORS vào một chỗ
# =============================================================================
......@@ -96,6 +96,7 @@ middleware_manager.setup(
cors_origins=CORS_ORIGINS, # từ environment variable
)
app.include_router(test_router) # No-auth test endpoints
app.include_router(chatbot_router)
app.include_router(memos_router)
......
......@@ -6,6 +6,10 @@ services:
build:
context: ./backend
dockerfile: Dockerfile.prod
cache_from:
- type=local,src=.docker/cache/backend
cache_to:
- type=local,dest=.docker/cache/backend
container_name: cuccu_backend
restart: unless-stopped
ports:
......@@ -30,37 +34,38 @@ services:
deploy:
resources:
limits:
memory: 2G
memory: 512M
cpus: '1.0'
reservations:
memory: 512M
memory: 128M
cpus: '0.5'
# Frontend
# Frontend (Production build with nginx - saves ~190MB RAM)
frontend:
build:
context: ./frontend
dockerfile: Dockerfile.prod
args:
# Build-time envs for Vite
# Browser (người dùng) gọi trực tiếp vào host, nên dùng localhost:5000
VITE_API_BASE_URL: "http://localhost:5000"
VITE_CLERK_PUBLISHABLE_KEY: ${VITE_CLERK_PUBLISHABLE_KEY}
VITE_API_BASE_URL: ${VITE_API_BASE_URL:-http://localhost:5000}
VITE_CLERK_PUBLISHABLE_KEY: ${VITE_CLERK_PUBLISHABLE_KEY:-}
container_name: cuccu_frontend
restart: unless-stopped
ports:
- "3001:80"
env_file:
- ./frontend/.env
depends_on:
- backend
networks:
- cuccu_network
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:80"]
test: ["CMD", "wget", "-q", "--spider", "http://localhost:80"]
interval: 30s
timeout: 10s
retries: 3
start_period: 30s
deploy:
resources:
limits:
memory: 32M
volumes:
backend_data:
......
......@@ -2,4 +2,5 @@
VITE_CLERK_PUBLISHABLE_KEY=pk_test_Y29tbXVuYWwtc3VuYmVhbS0wLmNsZXJrLmFjY291bnRzLmRldiQ
# ====================== API URL ======================
VITE_API_URL=http://localhost:8080
\ No newline at end of file
# Dev mode: point directly to backend (no nginx proxy)
VITE_API_BASE_URL=http://160.191.50.138:5000
\ No newline at end of file
FROM node:18-alpine
FROM node:22-alpine
WORKDIR /app
......
# Multi-stage build for production
FROM node:18-alpine AS builder
FROM node:22-alpine AS builder
WORKDIR /app
......
......@@ -15,6 +15,17 @@ server {
add_header X-Content-Type-Options "nosniff" always;
add_header X-XSS-Protection "1; mode=block" always;
# Proxy /api requests to backend container
location /api/ {
proxy_pass http://cuccu_backend:5000;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 120s;
}
# SPA routing - serve index.html for all routes
location / {
try_files $uri $uri/ /index.html;
......
This diff is collapsed.
import { MessageCircleIcon, XIcon, Maximize2Icon, Minimize2Icon } from "lucide-react";
import { MessageCircleIcon, XIcon, Maximize2Icon, Minimize2Icon, Trash2Icon } from "lucide-react";
import { useState, useRef, useCallback, useEffect } from "react";
import { cn } from "@/lib/utils";
import ChatbotPanel from "./ChatbotPanel";
import ChatbotPanel, { type ChatbotPanelHandle } from "./ChatbotPanel";
type Position = {
x: number;
......@@ -44,6 +44,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
const hasDraggedRef = useRef(false);
const dragRef = useRef<{ startX: number; startY: number; startPosX: number; startPosY: number } | null>(null);
const animationFrameRef = useRef<number | null>(null);
const chatbotPanelRef = useRef<ChatbotPanelHandle>(null);
// Save position to localStorage when it changes
useEffect(() => {
......@@ -112,11 +113,16 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
}, []);
const handleMouseUp = useCallback(() => {
setIsDragging(false);
dragRef.current = null;
}, []);
// Toggle via onClick — only if user didn't drag
const handleClick = useCallback((e: React.MouseEvent) => {
e.stopPropagation();
if (!hasDraggedRef.current) {
setIsOpen((prev) => !prev);
}
setIsDragging(false);
dragRef.current = null;
}, []);
useEffect(() => {
......@@ -189,7 +195,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
useEffect(() => {
if (isDragging) {
window.addEventListener("touchmove", handleTouchMove, { passive: true });
window.addEventListener("touchend", handleTouchEnd, { passive: true });
window.addEventListener("touchend", handleTouchEnd);
return () => {
window.removeEventListener("touchmove", handleTouchMove);
window.removeEventListener("touchend", handleTouchEnd);
......@@ -267,11 +273,21 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
<div className="flex items-center gap-2">
<span className="inline-flex size-2 rounded-full bg-emerald-500" />
<div>
<p className="text-sm font-semibold leading-none">CiCi Assistant</p>
<p className="text-sm font-semibold leading-none">CuCu Assistant</p>
<p className="text-xs text-muted-foreground">Notes chat</p>
</div>
</div>
<div className="flex items-center gap-1">
{/* Nút Clear Messages */}
<button
type="button"
className="rounded-full p-1.5 text-muted-foreground transition-colors hover:bg-destructive/10 hover:text-destructive"
onClick={() => chatbotPanelRef.current?.clearMessages()}
aria-label="Clear messages"
title="Xóa lịch sử chat"
>
<Trash2Icon className="size-4" />
</button>
{/* Nút Expand/Collapse */}
<button
type="button"
......@@ -301,7 +317,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
{/* Chat Content - không có header riêng nữa */}
<div className="h-[calc(100%-52px)]">
<ChatbotPanel variant="widget" className="border-0 rounded-none" hideHeader />
<ChatbotPanel ref={chatbotPanelRef} variant="widget" className="border-0 rounded-none" hideHeader />
</div>
</div>
)}
......@@ -323,6 +339,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
: "bg-primary text-primary-foreground",
)}
onMouseDown={handleMouseDown}
onClick={handleClick}
onTouchStart={handleTouchStart}
role="button"
tabIndex={0}
......
......@@ -8,9 +8,9 @@ import type { MonthNavigatorProps } from "@/types/statistics";
export const MonthNavigator = ({ visibleMonth, onMonthChange, activityStats }: MonthNavigatorProps) => {
const [isOpen, setIsOpen] = useState(false);
const currentMonth = new Date(visibleMonth);
const currentYear = getYearFromDate(visibleMonth);
const currentMonthNum = getMonthFromDate(visibleMonth);
const currentMonth = new Date(currentYear, currentMonthNum - 1, 1);
const handlePrevMonth = () => {
onMonthChange(addMonths(visibleMonth, -1));
......
import { useEffect } from "react";
import { useEffect } from "react";
import { useSearchParams } from "react-router-dom";
import { MemoRenderContext } from "@/components/MasonryView";
import MemoView from "@/components/MemoView";
......@@ -53,7 +53,7 @@ const Home = () => {
renderer={(memo: Memo, context?: MemoRenderContext) => (
<MemoView key={`${memo.name}-${memo.displayTime}`} memo={memo} showVisibility showPinned compact={context?.compact} />
)}
listSort={(memos) => memos.filter((m) => !m.pinned)} // Exclude pinned from regular list
listSort={listSort}
orderBy={orderBy}
filter={memoFilter}
/>
......
......@@ -2,9 +2,10 @@ import { redirectOnAuthFailure } from "@/utils/auth-redirect";
import { getClerkSessionToken } from "@/utils/clerk";
import type { RequestOptions } from "./types";
// Call backend directly (bypass Vite proxy).
// Override via VITE_API_BASE_URL, e.g. "http://localhost:5000"
export const API_ORIGIN = (import.meta.env.VITE_API_BASE_URL as string | undefined) || "http://localhost:5000";
// API origin - empty string = relative URLs (proxied via nginx in Docker).
// Override via VITE_API_BASE_URL, e.g. "http://localhost:5000" for local dev without Docker.
const _envOrigin = import.meta.env.VITE_API_BASE_URL as string | undefined;
export const API_ORIGIN: string = (_envOrigin !== undefined && _envOrigin !== "") ? _envOrigin : "";
export const API_BASE = `${API_ORIGIN}/api/v1`;
const parseBody = async (response: Response): Promise<unknown> => {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment