Commit 2fec4891 authored by root's avatar root

Update backend agent, API routes, frontend components, docker config & .gitignore

parent fe844925
...@@ -69,3 +69,20 @@ run.txt ...@@ -69,3 +69,20 @@ run.txt
# Ralph config (may contain API key) # Ralph config (may contain API key)
.cursor/ralph-config.json .cursor/ralph-config.json
# Docker cache
.docker/
# Environment
.env
# Frontend dist probes
frontend/dist-*/
# Temp configs
frontend/vite.probe*.config.mjs
frontend/vite.temp*.mjs
frontend/vite.trace*.config.mjs
frontend/vite.no-mermaid.config.mjs
frontend/src/__build_probe*.ts
frontend/src/__probe_*.ts
...@@ -3,6 +3,7 @@ Fashion Q&A Agent Controller ...@@ -3,6 +3,7 @@ Fashion Q&A Agent Controller
Langfuse will auto-trace via LangChain integration (no code changes needed). Langfuse will auto-trace via LangChain integration (no code changes needed).
""" """
import json
import logging import logging
import time import time
import uuid import uuid
...@@ -78,19 +79,31 @@ async def chat_controller( ...@@ -78,19 +79,31 @@ async def chat_controller(
memory = await get_conversation_manager() memory = await get_conversation_manager()
# Load History # Load History
history_dicts = await memory.get_chat_history(effective_identity_key, limit=20) history_dicts = await memory.get_chat_history(effective_identity_key, limit=5)
history_messages: list[BaseMessage] = [ history_messages: list[BaseMessage] = []
HumanMessage(content=m["message"]) if m["is_human"] else AIMessage(content=m["message"]) for m in history_dicts:
for m in history_dicts if m["is_human"]:
] history_messages.append(HumanMessage(content=m["message"]))
else:
# AI responses may be saved as JSON — extract readable text
ai_content = m["message"]
try:
parsed = json.loads(ai_content)
if isinstance(parsed, dict) and "ai_response" in parsed:
ai_content = parsed["ai_response"]
except (json.JSONDecodeError, TypeError):
pass
history_messages.append(AIMessage(content=ai_content))
# Prepare State # Prepare State
# - history: previous conversation messages (for context)
# - messages: starts with current query only (tool calls will be appended by LangGraph)
# - user_query: current user message
user_query_message: BaseMessage = HumanMessage(content=query) user_query_message: BaseMessage = HumanMessage(content=query)
messages_with_query: list[BaseMessage] = [*history_messages, user_query_message]
initial_state: AgentState = { initial_state: AgentState = {
"user_query": user_query_message, "user_query": user_query_message,
"messages": messages_with_query, "messages": [user_query_message],
"history": history_messages, "history": history_messages,
"user_id": user_id, "user_id": user_id,
"images_embedding": [], "images_embedding": [],
...@@ -124,7 +137,9 @@ async def chat_controller( ...@@ -124,7 +137,9 @@ async def chat_controller(
# Parse Response # Parse Response
all_product_ids = extract_product_ids(result.get("messages", [])) all_product_ids = extract_product_ids(result.get("messages", []))
ai_raw_content = result.get("ai_response").content if result.get("ai_response") else "" ai_raw_content = result.get("ai_response").content if result.get("ai_response") else ""
logger.info("RAW LLM output (%d chars): %s", len(ai_raw_content), ai_raw_content[:500])
ai_text_response, final_product_ids = parse_ai_response(ai_raw_content, all_product_ids) ai_text_response, final_product_ids = parse_ai_response(ai_raw_content, all_product_ids)
logger.info("PARSED ai_response (%d chars): %s", len(ai_text_response), ai_text_response[:300])
response_payload = { response_payload = {
"ai_response": ai_text_response, "ai_response": ai_text_response,
...@@ -153,3 +168,157 @@ async def chat_controller( ...@@ -153,3 +168,157 @@ async def chat_controller(
logger.info("chat_controller finished in %.2fs", duration) logger.info("chat_controller finished in %.2fs", duration)
return {**response_payload, "cached": False} return {**response_payload, "cached": False}
async def chat_controller_stream(
query: str,
user_id: str,
model_name: str = DEFAULT_MODEL,
images: list[str] | None = None,
identity_key: str | None = None,
):
"""
Streaming controller — yields SSE events with token chunks.
Each yield is a JSON string:
{"token": "partial text"} — during streaming
{"done": true, "ai_response": "full text"} — final event
History is saved after stream completes.
"""
effective_identity_key = identity_key or user_id
logger.info(
"chat_controller_stream start: model=%s, user_id=%s",
model_name, user_id,
)
# ====================== CACHE LAYER ======================
if REDIS_CACHE_TURN_ON:
cached_response = await redis_cache.get_response(
user_id=effective_identity_key, query=query
)
if cached_response:
logger.info(f"⚡ CACHE HIT (stream) for identity_key={effective_identity_key}")
# Stream cached response as one chunk
ai_text = cached_response.get("ai_response", "")
yield json.dumps({"token": ai_text}, ensure_ascii=False)
yield json.dumps({"done": True, "ai_response": ai_text}, ensure_ascii=False)
# Save history
memory = await get_conversation_manager()
await handle_post_chat_async(
memory=memory,
identity_key=effective_identity_key,
human_query=query,
ai_response=cached_response,
)
return
# ====================== STREAM LLM FLOW ======================
graph = build_graph()
memory = await get_conversation_manager()
# Load History
history_dicts = await memory.get_chat_history(effective_identity_key, limit=5)
history_messages: list[BaseMessage] = []
for m in history_dicts:
if m["is_human"]:
history_messages.append(HumanMessage(content=m["message"]))
else:
ai_content = m["message"]
try:
parsed = json.loads(ai_content)
if isinstance(parsed, dict) and "ai_response" in parsed:
ai_content = parsed["ai_response"]
except (json.JSONDecodeError, TypeError):
pass
history_messages.append(AIMessage(content=ai_content))
user_query_message: BaseMessage = HumanMessage(content=query)
initial_state: AgentState = {
"user_query": user_query_message,
"messages": [user_query_message],
"history": history_messages,
"user_id": user_id,
"images_embedding": [],
"ai_response": None,
}
run_uuid = uuid.uuid4()
run_id_str = str(run_uuid)
langfuse_handler = get_callback_handler()
exec_config = RunnableConfig(
configurable={
"user_id": user_id,
"transient_images": images or [],
"run_id": run_id_str,
},
run_id=run_uuid,
metadata={"run_id": run_id_str, "tags": "chatbot,production,stream"},
callbacks=[langfuse_handler] if langfuse_handler else [],
)
# Stream using astream_events
start_time = time.time()
full_response = ""
is_final_response = False
session_id = f"{user_id}-{run_id_str[:8]}"
with propagate_attributes(user_id=user_id, session_id=session_id):
async for event in graph.astream_events(
initial_state, config=exec_config, version="v2"
):
kind = event.get("event", "")
# Only stream tokens from the chat model (not tool calls)
if kind == "on_chat_model_stream":
chunk = event.get("data", {}).get("chunk")
if chunk and hasattr(chunk, "content") and chunk.content:
# Skip if this is a tool_call chunk (no text content)
if hasattr(chunk, "tool_calls") and chunk.tool_calls:
continue
# Only stream the FINAL response (after tool execution)
# We detect this by tracking: if tools were called,
# the final agent response comes after tool results
token = chunk.content
full_response += token
is_final_response = True
yield json.dumps({"token": token}, ensure_ascii=False)
# When the agent finishes and we got tokens, prepare to end
elif kind == "on_chain_end" and event.get("name") == "agent":
if is_final_response:
# Reset for potential next agent iteration
pass
duration = time.time() - start_time
logger.info("chat_controller_stream finished in %.2fs (%d chars)", duration, len(full_response))
# Parse and yield final event
ai_text_response, _ = parse_ai_response(full_response, [])
yield json.dumps({"done": True, "ai_response": ai_text_response}, ensure_ascii=False)
# Build response payload for caching and history
response_payload = {
"ai_response": ai_text_response,
"product_ids": [],
}
# Save to cache
if REDIS_CACHE_TURN_ON:
await redis_cache.set_response(
user_id=effective_identity_key,
query=query,
response_data=response_payload,
ttl=300,
)
# Save to history
await handle_post_chat_async(
memory=memory,
identity_key=effective_identity_key,
human_query=query,
ai_response=response_payload,
)
...@@ -46,29 +46,31 @@ class CANIFAGraph: ...@@ -46,29 +46,31 @@ class CANIFAGraph:
self.retrieval_tools = self.all_tools self.retrieval_tools = self.all_tools
self.llm_with_tools = self.llm.bind_tools(self.all_tools, strict=True) self.llm_with_tools = self.llm.bind_tools(self.all_tools, strict=True)
self.system_prompt = get_system_prompt() # NOTE: prompt is NOT cached here — fetched fresh each request
self.prompt_template = ChatPromptTemplate.from_messages( # so Langfuse updates take effect immediately.
self.cache = InMemoryCache()
def _build_chain(self):
"""Build chain with fresh system prompt (from Langfuse or local fallback)."""
system_prompt = get_system_prompt()
prompt_template = ChatPromptTemplate.from_messages(
[ [
("system", self.system_prompt), ("system", system_prompt),
MessagesPlaceholder(variable_name="history"), MessagesPlaceholder(variable_name="history"),
MessagesPlaceholder(variable_name="user_query"), MessagesPlaceholder(variable_name="user_query"),
MessagesPlaceholder(variable_name="messages"), MessagesPlaceholder(variable_name="messages"),
] ]
) )
self.chain = self.prompt_template | self.llm_with_tools return prompt_template | self.llm_with_tools
self.cache = InMemoryCache()
async def _agent_node(self, state: AgentState, config: RunnableConfig) -> dict: async def _agent_node(self, state: AgentState, config: RunnableConfig) -> dict:
"""Agent node - Chỉ việc đổ dữ liệu riêng vào khuôn đã có sẵn.""" """Agent node — rebuilds chain each call for realtime prompt updates."""
messages = state.get("messages", []) messages = state.get("messages", [])
history = state.get("history", []) history = state.get("history", [])
user_query = state.get("user_query") user_query = state.get("user_query")
transient_images = config.get("configurable", {}).get("transient_images", []) chain = self._build_chain()
if transient_images and messages: response = await chain.ainvoke({
pass
# Invoke chain with user_query, history, and messages
response = await self.chain.ainvoke({
"user_query": [user_query] if user_query else [], "user_query": [user_query] if user_query else [],
"history": history, "history": history,
"messages": messages "messages": messages
......
...@@ -10,7 +10,7 @@ import uuid ...@@ -10,7 +10,7 @@ import uuid
from langchain_core.messages import HumanMessage, ToolMessage from langchain_core.messages import HumanMessage, ToolMessage
from langchain_core.runnables import RunnableConfig from langchain_core.runnables import RunnableConfig
from common.conversation_manager import ConversationManager from common.conversation_manager import MongoDBConversationManager
from common.langfuse_client import get_callback_handler from common.langfuse_client import get_callback_handler
from .models import AgentState from .models import AgentState
...@@ -137,7 +137,7 @@ def prepare_execution_context(query: str, user_id: str, history: list, images: l ...@@ -137,7 +137,7 @@ def prepare_execution_context(query: str, user_id: str, history: list, images: l
async def handle_post_chat_async( async def handle_post_chat_async(
memory: ConversationManager, memory: MongoDBConversationManager,
identity_key: str, identity_key: str,
human_query: str, human_query: str,
ai_response: dict | None ai_response: dict | None
......
""" """
CiCi Fashion Consultant - System Prompt CuCu Assistant - System Prompt
Version 3.0 - Dynamic from File Supports two modes:
1. Langfuse prompt management (realtime, editable from Langfuse dashboard)
2. Local fallback (inline template)
""" """
import os import logging
from datetime import datetime from datetime import datetime
from functools import lru_cache
PROMPT_FILE_PATH = os.path.join(os.path.dirname(__file__), "system_prompt.txt") from common.timezone_config import VIETNAM_TZ
logger = logging.getLogger(__name__)
def _ensure_json_instruction(prompt_text: str) -> str: # Vietnamese weekday names
if "json" in prompt_text.lower(): _WEEKDAY_MAP = {
return prompt_text 0: "Thứ 2",
return f"{prompt_text}\n\nReturn JSON (json) object with keys: ai_response, product_ids." 1: "Thứ 3",
2: "Thứ 4",
3: "Thứ 5",
4: "Thứ 6",
5: "Thứ 7",
6: "Chủ nhật",
}
def get_system_prompt() -> str: def _get_weekday_str() -> str:
""" return _WEEKDAY_MAP[datetime.now(VIETNAM_TZ).weekday()]
System prompt for CiCi Fashion Agent.
# ──────────────────────────── Local template ────────────────────────────
# This is the SAME prompt pushed to Langfuse via scripts/push_prompt_to_langfuse.py
# {{date_str}} is the only variable, replaced at runtime.
_PROMPT_TEMPLATE = """# VAI TRÒ
Bạn là **CuCu Assistant** - Trợ lý quản lý ghi chú cá nhân (Memos).
- Thông minh, ngắn gọn, đi thẳng vào vấn đề.
- NHIỆM VỤ DUY NHẤT: Giúp người dùng tìm kiếm và truy vấn lại các ghi chú (memos) họ đã lưu.
- Hôm nay: {{date_str}} ({{weekday_str}})
---
# QUY TẮC SỬ DỤNG TOOL "memo_retrieval_tool"
## 0. KHI NÀO GỌI TOOL vs KHÔNG GỌI
### KHÔNG gọi tool (chỉ chào lại):
- Câu CHỈ có lời chào, KHÔNG nhắc gì đến note/ghi chú/chủ đề: "hello", "hi bro", "chào em"
### CÓ gọi tool (ưu tiên tìm kiếm):
- Câu có nhắc đến **bất kỳ từ khóa nào** liên quan note/ghi chú/chủ đề, DÙ CÓ LỜI CHÀO đi kèm:
- "chào em, tao note kafka hôm nào ấy" → GỌI TOOL tìm kafka
- "hello, hôm qua tao note gì" → GỌI TOOL tìm theo ngày
- "ê bro, tìm note về meeting" → GỌI TOOL tìm meeting
**NGUYÊN TẮC: Nếu câu có chứa từ khóa/chủ đề/topic → LUÔN GỌI TOOL, bỏ qua phần chào hỏi.**
## 1. TỰ TÍNH TOÁN NGÀY THÁNG
Bạn PHẢI tự tính ngày cụ thể (YYYY-MM-DD) dựa trên "Hôm nay: {{date_str}} ({{weekday_str}})".
Quy ước thứ: Thứ 2 = Monday, Thứ 3 = Tuesday, Thứ 4 = Wednesday, Thứ 5 = Thursday, Thứ 6 = Friday, Thứ 7 = Saturday, Chủ nhật = Sunday.
### CÁC MỐC THỜI GIAN THÔNG DỤNG:
- "Hôm nay" → `start_date` = `end_date` = {{date_str}}
- "Hôm qua" → `start_date` = `end_date` = {{date_str}} - 1 ngày
- "Tuần trước" (không nói ngày cụ thể) → `start_date` = thứ 2 tuần trước, `end_date` = chủ nhật tuần trước
- "Tuần này" → `start_date` = thứ 2 tuần này, `end_date` = {{date_str}}
- "Tháng này" → `start_date` = ngày đầu tháng, `end_date` = {{date_str}}
- "Năm nay" → `start_date` = ngày 1/1, `end_date` = {{date_str}}
### CỰC KỲ QUAN TRỌNG — "THỨ X TUẦN TRƯỚC/NÀY" = ĐÚNG 1 NGÀY:
Khi user nói "thứ X tuần trước" hoặc "thứ X tuần này", tính ra ĐÚNG 1 NGÀY cụ thể:
- `start_date` = `end_date` = ngày đó (YYYY-MM-DD)
- Ví dụ: Nếu hôm nay là 2026-02-25 (Thứ 4), thì:
- "thứ 5 tuần trước" → 2026-02-19 (chỉ 1 ngày!)
- "thứ 2 tuần này" → 2026-02-23 (chỉ 1 ngày!)
- "thứ 6 tuần trước" → 2026-02-20 (chỉ 1 ngày!)
- **KHÔNG ĐƯỢC dùng range cả tuần** — user hỏi đúng 1 ngày thì trả đúng 1 ngày!
### KHI USER HỎI "HÔM NÀO / NGÀY NÀO / BAO GIỜ":
- "Kafka note hôm nào ấy nhỉ?" = User ĐANG HỎI ngày → tìm ALL dates
- "Tao note cái đó khi nào?" = User ĐANG HỎI ngày → tìm ALL dates
- → Dùng range rộng: `start_date` = "2020-01-01", `end_date` = {{date_str}}
- **KHÔNG ĐƯỢC HỎI LẠI "ngày nào?"** — vì user đang nhờ bot tìm ngày!
- → **CHỈ TRẢ VỀ NGÀY**, ví dụ: "Bạn note cái đó vào ngày **2026-02-05** (Thứ 5) nhé!"
- **KHÔNG cần trích dẫn toàn bộ nội dung** khi user hỏi "hôm nào/khi nào" — user chỉ cần biết NGÀY.
- Nếu tìm thấy nhiều memo khớp, liệt kê ngày của từng memo.
### KHI KHÔNG NHẮC THỜI GIAN:
- "Tìm note về X" → range rộng: `start_date` = "2020-01-01", `end_date` = {{date_str}}
## 2. PHÂN TÍCH PARAMETERS
### NGUYÊN TẮC: CHỈ THÊM PARAMETER KHI USER NÓI RÕ
- **`content_search`**: Khi user nhắc từ khóa: "về Kafka", "pass wifi", "meeting"
- **`tag`**: Khi user nhắc tag: "#work", "#idea"
- **KHÔNG THÊM** content_search/tag nếu user chỉ hỏi theo ngày
Returns: ### VÍ DỤ:
str: System prompt with the current date. - "Hôm qua note gì?" → ✅ date only
- "Note kafka hôm nào?" → ✅ `content_search="kafka"` + range rộng
- "Tìm note #work tuần này" → ✅ `tag="work"` + date tuần này
## 3. KHI NÀO HỎI LẠI USER
- **CHỈ hỏi lại khi THẬT SỰ không có thông tin gì**: "Tìm note", "Tìm cái đó"
- **KHÔNG HỎI LẠI** nếu có bất kỳ keyword nào: "note kafka hôm nào" → đủ rồi, GỌI TOOL
- **KHÔNG BAO GIỜ hỏi lại ngày** nếu user đang hỏi "hôm nào/khi nào" → dùng range rộng
---
# QUY TẮC TRẢ LỜI (CỰC KỲ QUAN TRỌNG)
1. **NGẮN GỌN DƯỚI 100 TỪ**: Trả lời súc tích, đi thẳng vấn đề. KHÔNG dài dòng.
2. **TÓM TẮT NỘI DUNG**: Mỗi memo chỉ hiển thị **tóm tắt ngắn gọn** (tối đa 15 từ), KHÔNG trích dẫn toàn bộ nội dung.
3. **FORMAT**:
- **📝 (YYYY-MM-DD):** [tóm tắt ngắn gọn nội dung]
- Nếu nhiều memo, liệt kê dạng danh sách bullet
4. **KHÔNG BỊA ĐẶT**: Không tự chế nội dung.
5. **NGÔN NGỮ**: Thân thiện, tự nhiên, như nói chuyện với bạn.
6. Nếu count=0: "Không tìm thấy ghi chú nào 🤷"
7. **Trả lời bằng text thuần/markdown**, KHÔNG wrap JSON.
8. **CHỈ HIỂN THỊ ĐẦY ĐỦ** khi user yêu cầu rõ: "cho xem chi tiết", "đọc full nội dung"."""
def get_system_prompt_template() -> str:
"""Return the raw prompt template with {{date_str}} placeholder.
Used by the push script to upload to Langfuse.
""" """
date_str = datetime.now().strftime("%d/%m/%Y") return _PROMPT_TEMPLATE
def _fetch_langfuse_prompt() -> str | None:
"""
Try to fetch the latest prompt from Langfuse.
Returns the compiled prompt string, or None if unavailable.
Uses Langfuse's built-in caching (default TTL=60s).
"""
try: try:
if os.path.exists(PROMPT_FILE_PATH): from common.langfuse_client import get_langfuse_client
with open(PROMPT_FILE_PATH, "r", encoding="utf-8") as handle: client = get_langfuse_client()
prompt_template = handle.read() if not client:
rendered = prompt_template.replace("{date_str}", date_str) return None
return _ensure_json_instruction(rendered)
except Exception as exc: prompt = client.get_prompt(
print(f"Error reading system prompt file: {exc}") name="cucu-system-prompt",
label="production",
fallback = f"""# ROLE cache_ttl_seconds=60, # Re-fetch every 60s
You are CiCi, a CANIFA fashion assistant. )
Today: {date_str} date_str = datetime.now(VIETNAM_TZ).strftime("%Y-%m-%d")
weekday_str = _get_weekday_str()
Never fabricate. Keep responses concise. compiled = prompt.compile(date_str=date_str, weekday_str=weekday_str)
""" logger.info("✅ Prompt fetched from Langfuse (version=%s)", prompt.version)
return _ensure_json_instruction(fallback) return compiled
except Exception as e:
logger.warning("⚠️ Langfuse prompt fetch failed: %s — using local fallback", e)
return None
def get_system_prompt() -> str:
"""
Get the system prompt. Priority:
1. Langfuse prompt management (realtime, editable)
2. Local fallback template
"""
# Try Langfuse first
langfuse_prompt = _fetch_langfuse_prompt()
if langfuse_prompt:
return langfuse_prompt
# Fallback to local template
date_str = datetime.now(VIETNAM_TZ).strftime("%Y-%m-%d")
weekday_str = _get_weekday_str()
prompt = _PROMPT_TEMPLATE.replace("{{date_str}}", date_str).replace("{{weekday_str}}", weekday_str)
logger.info("📝 Using local prompt fallback (date=%s, weekday=%s)", date_str, weekday_str)
return prompt
# VAI TRÒ
Bạn là **CuCu Assistant** - Trợ lý quản lý ghi chú cá nhân (Memos).
- Thông minh, ngắn gọn, đi thẳng vào vấn đề.
- NHIỆM VỤ DUY NHẤT: Giúp người dùng tìm kiếm và truy vấn lại các ghi chú (memos) họ đã lưu.
- Hôm nay: {date_str}
---
# QUY TẮC SỬ DỤNG TOOL "memo_retrieval_tool"
Bạn chỉ có 1 tool duy nhất là `memo_retrieval_tool`. Hãy sử dụng nó thông minh.
## 0. KHI NÀO **KHÔNG** ĐƯỢC GỌI TOOL (QUAN TRỌNG)
- Nếu user **chỉ chào hỏi / small talk** (VD: "chào em", "hello", "hi bro", "alo"), hãy:
- Trả lời lại một câu chào ngắn gọn, KHÔNG gọi `memo_retrieval_tool`.
- Chỉ gọi tool khi user hỏi RÕ về **ghi chú / note / ngày / nội dung / tag**.
- Không được "đoán" là user đang hỏi về Kafka, work, v.v. nếu câu hiện tại **chỉ là lời chào**.
- Không dùng **câu hỏi cũ** để tự ý gọi tool cho câu mới nếu câu mới chỉ là lời chào.
## 1. TỰ TÍNH TOÁN NGÀY THÁNG (QUAN TRỌNG)
Người dùng sẽ hỏi ngày tương đối (hôm qua, tuần trước...). Bạn PHẢI tự tính ra ngày cụ thể (YYYY-MM-DD) dựa trên "Hôm nay: {date_str}".
- **"Hôm nay note gì?"**
→ `start_date` = {date_str}, `end_date` = {date_str}
- **"Hôm qua note gì?"**
→ `start_date` = {date_str} - 1 ngày
- **"Hôm kia note gì?"**
→ `start_date` = {date_str} - 2 ngày
- **"Tuần trước note gì?"**
→ `start_date` = {date_str} - 7 ngày, `end_date` = {date_str} (hoặc range cụ thể của tuần trước)
- **"Tháng 1 note gì?"**
→ `start_date` = "2026-01-01", `end_date` = "2026-01-31"
## 2. PHÂN TÍCH PARAMETERS
- **`content_search`**: Dùng khi user hỏi về nội dung (VD: "dự án A", "pass wifi", "số điện thoại"). Dùng Regex nên hãy chọn keyword đặc trưng.
- **`tag`**: Dùng khi user nhắc đến tag/chủ đề (VD: "#work", "#idea"). Chỉ điền nếu user KHẲNG ĐỊNH là tag.
### 2.1. XỬ LÝ "TOPIC" THÀNH TAG / CONTENT_SEARCH
- Nếu user nhắc đến **một chủ đề ngắn gọn** (VD: "Kafka", "English", "health") nhưng **không có dấu #**:
- Hãy coi đó là một **topic**.
- Nếu topic là **một từ đơn, không có khoảng trắng** (VD: "Kafka", "work"):
- ƯU TIÊN map thành `tag` (VD: `tag="kafka"` hoặc `tag="work"` — KHÔNG cần dấu `#`).
- Nếu topic là **cụm từ dài** (VD: "Kafka performance", "meeting tuần trước"):
- Map thành `content_search` (VD: `content_search="Kafka performance"`).
- Nếu không chắc đó là tag hay chỉ là từ khóa nội dung:
- Có thể điền **cả hai**:
- `tag="kafka"`
- `content_search="Kafka"`
- Khi đó MongoDB sẽ lọc theo tag (nếu note có tag) và/hoặc nội dung có chứa keyword.
## 3. VÍ DỤ GỌI TOOL
**Case 1: Hỏi theo ngày**
*Input: "Hôm qua tao có note gì không?" (Giả sử hôm nay 2026-01-24)*
→ Bot tính: Hôm qua = 2026-01-23
→ Tool call: `memo_retrieval_tool(start_date="2026-01-23")`
**Case 2: Tìm nội dung + Ngày**
*Input: "Tuần này tao note gì về 'meeting'?" (Hôm nay 2026-01-24)*
→ Bot tính: Tuần này ~ 2026-01-19 đến 2026-01-25
→ Tool call:
```python
memo_retrieval_tool(
start_date="2026-01-19",
end_date="2026-01-25",
content_search="meeting"
)
```
**Case 3: Tìm theo tag**
*Input: "Tìm mấy cái note #idea tháng trước"*
→ Tool call:
```python
memo_retrieval_tool(
start_date="2025-12-01",
end_date="2025-12-31",
tag="idea"
)
```
**Case 4: Tìm nội dung chung chung (Không rõ ngày)**
*Input: "Tìm lại pass wifi"*
→ Bot tự chọn range rộng hoặc không giới hạn (tùy tool support, ở đây tool bắt buộc start_date thì lấy ngày xa xưa hoặc 1 tháng gần nhất tùy ngữ cảnh, hoặc hỏi lại user. NHƯNG tốt nhất cứ search 1 năm gần đây).
→ Tool call: `memo_retrieval_tool(start_date="2025-01-01", content_search="pass wifi")`
---
# QUY TẮC TRẢ LỜI (RESPONSE)
1. **DỰA TRÊN KẾT QUẢ TOOL**:
- Nếu có memos: Liệt kê ngắn gọn, trích dẫn nội dung chính.
- Nếu `count` = 0: Trả lời "Không tìm thấy ghi chú nào trong khoảng thời gian này/với từ khóa này."
2. **KHÔNG BỊA ĐẶT**: Không tự chế ra nội dung memo không có trong data.
3. **FORMAT MENU**:
- Ghi chú 1 (2026-01-24): [Nội dung tóm tắt]
- Ghi chú 2 (2026-01-23): [Nội dung tóm tắt]
4. **NGÔN NGỮ**: Giao tiếp tự nhiên, thân thiện (bro-style nếu user thích, hoặc lịch sự mặc định).
---
# FORMAT ĐẦU RA (JSON)
Bot trả lời dưới dạng JSON (để Frontend render hoặc parse):
```json
{{
"ai_response": "Đây là các ghi chú mình tìm thấy hôm qua...",
"found_memos": [
{{
"id": "...",
"content": "...",
"created_at": "..."
}}
]
}}
```
*Lưu ý: Nếu tool trả về data, hãy tóm tắt vào `ai_response` và dán raw data vào `found_memos` nếu cần.*
\ No newline at end of file
This diff is collapsed.
""" """
Chatbot API Route Chatbot API Route
----------------- -----------------
`POST /api/agent/chat` - chính là endpoint chat cho CiCi Assistant. `POST /api/agent/chat` - endpoint chat cho CuCu Assistant (non-streaming).
Logic xử lý nằm ở `agent.controller.chat_controller`. `POST /api/agent/chat/stream` - endpoint chat SSE streaming.
Logic xử lý nằm ở `agent.controller`.
""" """
import logging import logging
from dataclasses import dataclass from dataclasses import dataclass
from typing import AsyncGenerator
from fastapi import APIRouter, BackgroundTasks, HTTPException, Request from fastapi import APIRouter, BackgroundTasks, HTTPException, Request
from fastapi.responses import StreamingResponse
from opentelemetry import trace from opentelemetry import trace
from agent.controller import chat_controller # Lazy imports - defer heavy AI modules to first use
from agent.models import QueryRequest # from agent.controller import chat_controller, chat_controller_stream
# from agent.models import QueryRequest
from common.message_limit import message_limit_service from common.message_limit import message_limit_service
from config import DEFAULT_MODEL from config import DEFAULT_MODEL
...@@ -20,6 +24,26 @@ logger = logging.getLogger(__name__) ...@@ -20,6 +24,26 @@ logger = logging.getLogger(__name__)
tracer = trace.get_tracer(__name__) tracer = trace.get_tracer(__name__)
router = APIRouter() router = APIRouter()
# Cache for lazy-loaded modules
_agent_modules = {}
def _get_agent_controller():
"""Lazy-load agent.controller to defer LangChain/LangGraph import."""
if "controller" not in _agent_modules:
from agent.controller import chat_controller, chat_controller_stream
_agent_modules["controller"] = chat_controller
_agent_modules["controller_stream"] = chat_controller_stream
return _agent_modules["controller"], _agent_modules["controller_stream"]
def _get_query_request_model():
"""Lazy-load agent.models.QueryRequest."""
if "QueryRequest" not in _agent_modules:
from agent.models import QueryRequest
_agent_modules["QueryRequest"] = QueryRequest
return _agent_modules["QueryRequest"]
@dataclass @dataclass
class Identity: class Identity:
...@@ -41,7 +65,6 @@ def _get_identity(request: Request) -> Identity: ...@@ -41,7 +65,6 @@ def _get_identity(request: Request) -> Identity:
history_key = primary_id history_key = primary_id
rate_limit_key = primary_id rate_limit_key = primary_id
else: else:
# Guest: dùng device_id, fallback 'anonymous'
primary_id = device_id or "anonymous" primary_id = device_id or "anonymous"
history_key = device_id or "anonymous" history_key = device_id or "anonymous"
rate_limit_key = device_id or "anonymous" rate_limit_key = device_id or "anonymous"
...@@ -54,38 +77,35 @@ def _get_identity(request: Request) -> Identity: ...@@ -54,38 +77,35 @@ def _get_identity(request: Request) -> Identity:
) )
@router.post("/api/agent/chat", summary="Chat with CiCi Assistant") @router.post("/api/agent/chat", summary="Chat with CuCu Assistant")
async def cici_chat(request: Request, req: QueryRequest, background_tasks: BackgroundTasks): async def cucu_chat(request: Request, background_tasks: BackgroundTasks):
""" """Endpoint chat không stream - trả về response JSON đầy đủ một lần."""
Endpoint chat không stream - trả về response JSON đầy đủ một lần. # Lazy-load AI modules on first call
QueryRequest = _get_query_request_model()
chat_controller, _ = _get_agent_controller()
body = await request.json()
req = QueryRequest(**body)
- Tự lấy user/device từ middleware (`get_user_identity`)
- Gọi `chat_controller` để xử lý toàn bộ logic LLM + tools
- Tự tăng counter rate limit sau khi trả lời xong
"""
# 1. Xác định identity
identity = _get_identity(request) identity = _get_identity(request)
user_id = identity.primary_id user_id = identity.primary_id
logger.info("📥 [Incoming Chat] User=%s | Query=%s", user_id, req.user_query) logger.info("📥 [Incoming Chat] User=%s | Query=%s", user_id, req.user_query)
# Span cho tracing (optional)
span = trace.get_current_span() span = trace.get_current_span()
span.set_attribute("user.id", user_id) span.set_attribute("user.id", user_id)
span.set_attribute("chat.user_query", req.user_query) span.set_attribute("chat.user_query", req.user_query)
try: try:
# 2. Gọi controller xử lý
result = await chat_controller( result = await chat_controller(
query=req.user_query, query=req.user_query,
user_id=user_id, user_id=user_id,
background_tasks=background_tasks, background_tasks=background_tasks,
model_name=DEFAULT_MODEL, model_name=DEFAULT_MODEL,
images=req.images, images=req.images,
identity_key=identity.history_key, # Guest: device_id, User: user_id identity_key=identity.history_key,
) )
# 3. Tăng usage info (rate limit) sau khi thành công
usage_info = await message_limit_service.increment( usage_info = await message_limit_service.increment(
identity_key=identity.rate_limit_key, identity_key=identity.rate_limit_key,
is_authenticated=identity.is_authenticated, is_authenticated=identity.is_authenticated,
...@@ -104,7 +124,58 @@ async def cici_chat(request: Request, req: QueryRequest, background_tasks: Backg ...@@ -104,7 +124,58 @@ async def cici_chat(request: Request, req: QueryRequest, background_tasks: Backg
}, },
} }
except Exception as e: except Exception as e:
logger.error("Error in cici_chat: %s", e, exc_info=True) logger.error("Error in cucu_chat: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail=str(e)) from e raise HTTPException(status_code=500, detail=str(e)) from e
@router.post("/api/agent/chat/stream", summary="Chat with CuCu Assistant (SSE Streaming)")
async def cucu_chat_stream(request: Request):
"""
Endpoint chat SSE streaming — trả về token-by-token qua Server-Sent Events.
SSE format:
data: {"token": "partial"}\n\n — mỗi token chunk
data: {"done": true, "ai_response": "full text"}\n\n — kết thúc
"""
# Lazy-load AI modules on first call
QueryRequest = _get_query_request_model()
_, chat_controller_stream = _get_agent_controller()
body = await request.json()
req = QueryRequest(**body)
identity = _get_identity(request)
user_id = identity.primary_id
logger.info("📥 [Incoming Stream] User=%s | Query=%s", user_id, req.user_query)
async def sse_generator() -> AsyncGenerator[str, None]:
try:
async for chunk_json in chat_controller_stream(
query=req.user_query,
user_id=user_id,
model_name=DEFAULT_MODEL,
images=req.images,
identity_key=identity.history_key,
):
yield f"data: {chunk_json}\n\n"
# Increment rate limit after stream completes
await message_limit_service.increment(
identity_key=identity.rate_limit_key,
is_authenticated=identity.is_authenticated,
)
logger.info("📤 [Stream Done] User=%s", user_id)
except Exception as e:
logger.error("Error in stream: %s", e, exc_info=True)
yield f'data: {{"error": "{str(e)}"}}\n\n'
return StreamingResponse(
sse_generator(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"X-Accel-Buffering": "no",
},
)
...@@ -30,6 +30,14 @@ class ClearHistoryResponse(BaseModel): ...@@ -30,6 +30,14 @@ class ClearHistoryResponse(BaseModel):
message: str message: str
@router.get("/api/history/me", summary="Get My Chat History", response_model=ChatHistoryResponse)
async def get_my_chat_history(request: Request, limit: int | None = 50, before_id: int | None = None):
"""
Shortcut: lấy lịch sử chat của chính mình (identity từ middleware).
"""
return await get_chat_history(request, identity_key="me", limit=limit, before_id=before_id)
@router.get("/api/history/{identity_key}", summary="Get Chat History", response_model=ChatHistoryResponse) @router.get("/api/history/{identity_key}", summary="Get Chat History", response_model=ChatHistoryResponse)
async def get_chat_history(request: Request, identity_key: str, limit: int | None = 50, before_id: int | None = None): async def get_chat_history(request: Request, identity_key: str, limit: int | None = 50, before_id: int | None = None):
""" """
...@@ -59,6 +67,30 @@ async def get_chat_history(request: Request, identity_key: str, limit: int | Non ...@@ -59,6 +67,30 @@ async def get_chat_history(request: Request, identity_key: str, limit: int | Non
raise HTTPException(status_code=500, detail="Failed to fetch chat history") raise HTTPException(status_code=500, detail="Failed to fetch chat history")
@router.delete("/api/history/me", summary="Clear My Chat History", response_model=ClearHistoryResponse)
async def clear_my_chat_history(request: Request):
"""
Shortcut: xóa lịch sử chat của chính mình (identity từ middleware).
"""
try:
user_id = getattr(request.state, "user_id", None)
device_id = getattr(request.state, "device_id", "") or ""
is_authenticated = bool(getattr(request.state, "is_authenticated", False))
if is_authenticated and user_id:
resolved_key = str(user_id)
else:
resolved_key = device_id or "anonymous"
manager = await get_conversation_manager()
await manager.clear_history(resolved_key)
logger.info("✅ Cleared chat history for %s", resolved_key)
return {"success": True, "message": f"Đã xóa lịch sử chat"}
except Exception as e:
logger.error("Error clearing chat history: %s", e, exc_info=True)
raise HTTPException(status_code=500, detail="Failed to clear chat history")
@router.delete("/api/history/{identity_key}", summary="Clear Chat History", response_model=ClearHistoryResponse) @router.delete("/api/history/{identity_key}", summary="Clear Chat History", response_model=ClearHistoryResponse)
async def clear_chat_history(identity_key: str): async def clear_chat_history(identity_key: str):
""" """
......
...@@ -55,12 +55,25 @@ async def list_memos( ...@@ -55,12 +55,25 @@ async def list_memos(
filter_str=filter, filter_str=filter,
) )
# Parse creator_id from filter string (e.g. "creator_id == user_xxx")
creator_id = None
if filter: if filter:
logger.debug("List memos GET with filter=%r", filter) logger.debug("List memos GET with filter=%r", filter)
pattern_creator = r"creator_id\s*==\s*([a-zA-Z0-9_\-\.]+)"
match_creator = re.search(pattern_creator, filter)
if match_creator:
creator_id = match_creator.group(1)
# Parse pinned from filter string (e.g. "... && pinned")
pinned = None
if filter and re.search(r'\bpinned\b(?!\s*==\s*false)', filter):
pinned = True
return await memo_service.list_memos( return await memo_service.list_memos(
user_id=user_id, user_id=user_id,
creator_id=creator_id,
tag=tag, tag=tag,
pinned=pinned,
row_status=row_status, row_status=row_status,
start_date=dt_start, start_date=dt_start,
end_date=dt_end end_date=dt_end
...@@ -118,10 +131,18 @@ async def create_memo_or_list_memos( ...@@ -118,10 +131,18 @@ async def create_memo_or_list_memos(
if match_creator: if match_creator:
creator_id = match_creator.group(1) creator_id = match_creator.group(1)
# Parse pinned filter — frontend sends "pinned" in filter string for bookmarks
pinned = None
if raw_filter and isinstance(raw_filter, str):
# Match standalone "pinned" (not "pinned == false")
if re.search(r'\bpinned\b(?!\s*==\s*false)', raw_filter):
pinned = True
return await memo_service.list_memos( return await memo_service.list_memos(
user_id=user_id, user_id=user_id,
creator_id=creator_id, creator_id=creator_id,
tag=tag, tag=tag,
pinned=pinned,
start_date=start_date, start_date=start_date,
end_date=end_date end_date=end_date
) )
......
...@@ -4,13 +4,12 @@ Shortcut service routes for Memos-style backend. ...@@ -4,13 +4,12 @@ Shortcut service routes for Memos-style backend.
from typing import List from typing import List
from fastapi import APIRouter, Body, Depends, HTTPException from fastapi import APIRouter, Depends, HTTPException
from common.memos_core.schemas import ( from common.memos_core.schemas import (
ShortcutCreate, ShortcutCreate,
ShortcutUpdate, ShortcutUpdate,
ShortcutResponse, ShortcutResponse,
ListShortcutsResponse,
) )
from common.memos_core.services import get_shortcut_service from common.memos_core.services import get_shortcut_service
...@@ -26,20 +25,6 @@ async def list_shortcuts(shortcut_service=Depends(get_shortcut_service)): ...@@ -26,20 +25,6 @@ async def list_shortcuts(shortcut_service=Depends(get_shortcut_service)):
raise HTTPException(status_code=500, detail=str(exc)) from exc raise HTTPException(status_code=500, detail=str(exc)) from exc
@router.post("", summary="List shortcuts (Connect compatibility)", response_model=ListShortcutsResponse)
async def list_shortcuts_connect_compat(
payload: dict = Body(default_factory=dict), # noqa: B008
shortcut_service=Depends(get_shortcut_service),
):
# Connect RPC ListShortcuts is proxied as POST /api/v1/shortcuts in dev.
# Ignore payload (parent, pagination...) and return empty list for now.
try:
_ = payload
shortcuts = await shortcut_service.list_shortcuts()
return ListShortcutsResponse(shortcuts=shortcuts)
except Exception as exc: # pragma: no cover
raise HTTPException(status_code=500, detail=str(exc)) from exc
@router.post("", summary="Create shortcut", response_model=ShortcutResponse) @router.post("", summary="Create shortcut", response_model=ShortcutResponse)
async def create_shortcut( async def create_shortcut(
...@@ -54,7 +39,7 @@ async def create_shortcut( ...@@ -54,7 +39,7 @@ async def create_shortcut(
@router.patch("/{shortcut_id}", summary="Update shortcut", response_model=ShortcutResponse) @router.patch("/{shortcut_id}", summary="Update shortcut", response_model=ShortcutResponse)
async def update_shortcut( async def update_shortcut(
shortcut_id: int, shortcut_id: str,
payload: ShortcutUpdate, payload: ShortcutUpdate,
shortcut_service=Depends(get_shortcut_service), shortcut_service=Depends(get_shortcut_service),
): ):
...@@ -66,7 +51,7 @@ async def update_shortcut( ...@@ -66,7 +51,7 @@ async def update_shortcut(
@router.delete("/{shortcut_id}", summary="Delete shortcut") @router.delete("/{shortcut_id}", summary="Delete shortcut")
async def delete_shortcut( async def delete_shortcut(
shortcut_id: int, shortcut_id: str,
shortcut_service=Depends(get_shortcut_service), shortcut_service=Depends(get_shortcut_service),
): ):
try: try:
......
"""
Test Chat Route - NO AUTH REQUIRED
------------------------------------
`POST /api/test/chat` - endpoint thô để kiểm tra AI API key còn sống không.
Không dùng graph, không dùng tools, không dùng history.
Gọi thẳng OpenAI / Gemini / Groq tùy DEFAULT_MODEL.
Dùng để debug: nếu endpoint này work → vấn đề ở auth/middleware.
nếu không work → vấn đề API key hoặc model.
"""
import logging
import time
from fastapi import APIRouter
from pydantic import BaseModel
from config import DEFAULT_MODEL, GOOGLE_API_KEY, GROQ_API_KEY, OPENAI_API_KEY
logger = logging.getLogger(__name__)
router = APIRouter(tags=["test"])
class TestChatRequest(BaseModel):
message: str
model: str | None = None # override DEFAULT_MODEL nếu muốn
class TestChatResponse(BaseModel):
status: str
model_used: str
response: str
latency_ms: int
@router.post(
"/api/test/chat",
response_model=TestChatResponse,
summary="[NO AUTH] Test AI API key trực tiếp",
description=(
"Endpoint debug - gọi thẳng LLM không qua graph/tools/auth. "
"Dùng để kiểm tra API key còn sống và model đang hoạt động."
),
)
async def test_chat(req: TestChatRequest) -> TestChatResponse:
model_name = req.model or DEFAULT_MODEL
logger.info("[TEST CHAT] model=%s | msg=%s", model_name, req.message)
t0 = time.monotonic()
try:
response_text = await _call_llm(model_name=model_name, message=req.message)
except Exception as exc:
logger.error("[TEST CHAT] LLM error: %s", exc, exc_info=True)
return TestChatResponse(
status="error",
model_used=model_name,
response=f"LỖI: {type(exc).__name__}: {exc}",
latency_ms=int((time.monotonic() - t0) * 1000),
)
latency = int((time.monotonic() - t0) * 1000)
logger.info("[TEST CHAT] OK latency=%dms", latency)
return TestChatResponse(
status="ok",
model_used=model_name,
response=response_text,
latency_ms=latency,
)
async def _call_llm(model_name: str, message: str) -> str:
"""Gọi LLM tương ứng với model_name, trả về string response."""
# ── Gemini / Google ──────────────────────────────────────────────────────
if model_name.startswith("gemini"):
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model=model_name, google_api_key=GOOGLE_API_KEY)
result = await llm.ainvoke(message)
return result.content
# ── Groq ─────────────────────────────────────────────────────────────────
if model_name.startswith("llama") or model_name.startswith("mixtral") or "groq" in model_name:
from langchain_groq import ChatGroq
llm = ChatGroq(model=model_name, groq_api_key=GROQ_API_KEY)
result = await llm.ainvoke(message)
return result.content
# ── OpenAI (default) ─────────────────────────────────────────────────────
from langchain_openai import ChatOpenAI
llm = ChatOpenAI(model=model_name, openai_api_key=OPENAI_API_KEY)
result = await llm.ainvoke(message)
return result.content
...@@ -35,13 +35,13 @@ def verify_clerk_jwt(token: str) -> dict[str, Any]: ...@@ -35,13 +35,13 @@ def verify_clerk_jwt(token: str) -> dict[str, Any]:
signing_key = _jwks_client().get_signing_key_from_jwt(token).key signing_key = _jwks_client().get_signing_key_from_jwt(token).key
# Clerk tokens are typically RS256. # Clerk tokens are typically RS256.
# leeway=60 tolerates up to 60s clock skew between Clerk server and this machine # leeway=300 tolerates up to 5min clock skew (VPS clock not NTP-synced)
payload = jwt.decode( payload = jwt.decode(
token, token,
signing_key, signing_key,
algorithms=["RS256"], algorithms=["RS256"],
issuer=CLERK_ISSUER, issuer=CLERK_ISSUER,
leeway=60, leeway=300,
options={ options={
"verify_aud": False, # allow multiple audiences in dev "verify_aud": False, # allow multiple audiences in dev
}, },
......
This diff is collapsed.
...@@ -40,9 +40,9 @@ class EmbeddingClientManager: ...@@ -40,9 +40,9 @@ class EmbeddingClientManager:
# If using default key, cache the client # If using default key, cache the client
if not api_key: if not api_key:
if self._client is None: if self._client is None:
self._client = OpenAI(api_key=key) self._client = OpenAI(api_key=key)
return self._client return self._client
# For custom keys, create new client (not cached) # For custom keys, create new client (not cached)
return OpenAI(api_key=key) return OpenAI(api_key=key)
...@@ -65,9 +65,9 @@ class EmbeddingClientManager: ...@@ -65,9 +65,9 @@ class EmbeddingClientManager:
# If using default key, cache the default client # If using default key, cache the default client
if not api_key: if not api_key:
if self._async_client is None: if self._async_client is None:
self._async_client = AsyncOpenAI(api_key=key) self._async_client = AsyncOpenAI(api_key=key)
return self._async_client return self._async_client
# For user-specific keys, cache per user_id # For user-specific keys, cache per user_id
if user_id and user_id in self._user_clients: if user_id and user_id in self._user_clients:
...@@ -79,7 +79,7 @@ class EmbeddingClientManager: ...@@ -79,7 +79,7 @@ class EmbeddingClientManager:
return client return client
logger = logging.getLogger(__name__)
# NOTE: # NOTE:
# - TẠM THỜI KHÔNG DÙNG REDIS CACHE CHO EMBEDDING để tránh phụ thuộc Redis/aioredis. # - TẠM THỜI KHÔNG DÙNG REDIS CACHE CHO EMBEDDING để tránh phụ thuộc Redis/aioredis.
......
...@@ -17,60 +17,85 @@ logger = logging.getLogger(__name__) ...@@ -17,60 +17,85 @@ logger = logging.getLogger(__name__)
ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY") ENCRYPTION_KEY = os.getenv("ENCRYPTION_KEY")
# Fallback: Generate key from a password (NOT recommended for production) # Fallback: Generate key from a password (NOT recommended for production)
# This is only for development/testing
FALLBACK_PASSWORD = os.getenv("ENCRYPTION_PASSWORD", "default-dev-password-change-in-production") FALLBACK_PASSWORD = os.getenv("ENCRYPTION_PASSWORD", "default-dev-password-change-in-production")
# Cached Fernet instance (singleton) — avoids re-running PBKDF2 100K iterations per call
_fernet_instance: Fernet | None = None
def _get_fernet_key() -> bytes:
def _derive_key_from_password(password: str, salt: bytes) -> bytes:
"""Derive a Fernet key from password + salt using PBKDF2."""
kdf = PBKDF2HMAC(
algorithm=hashes.SHA256(),
length=32,
salt=salt,
iterations=100000,
)
return base64.urlsafe_b64encode(kdf.derive(password.encode()))
def _get_fernet() -> Fernet:
""" """
Get or generate Fernet encryption key. Get cached Fernet instance.
Priority: ENCRYPTION_KEY env var > generated from password (dev only) Priority: ENCRYPTION_KEY env var > password-derived key (dev only).
When using ENCRYPTION_KEY, salt is not needed (key is used directly).
""" """
global _fernet_instance
if _fernet_instance is not None:
return _fernet_instance
if ENCRYPTION_KEY: if ENCRYPTION_KEY:
try: try:
# Try to use as-is (should be base64-encoded 32-byte key) _fernet_instance = Fernet(ENCRYPTION_KEY.encode())
return ENCRYPTION_KEY.encode() return _fernet_instance
except Exception: except Exception:
# If not valid, try to decode as base64
try: try:
return base64.urlsafe_b64decode(ENCRYPTION_KEY) _fernet_instance = Fernet(base64.urlsafe_b64decode(ENCRYPTION_KEY))
return _fernet_instance
except Exception: except Exception:
logger.warning("Invalid ENCRYPTION_KEY format, using fallback") logger.warning("Invalid ENCRYPTION_KEY format, using fallback")
# Fallback: Generate from password (dev only - NOT secure for production) # Fallback: Generate from password with random salt (dev only - NOT secure for production)
logger.warning( logger.warning(
"⚠️ ENCRYPTION_KEY not set. Using password-based key derivation (NOT secure for production!)" "⚠️ ENCRYPTION_KEY not set. Using password-based key derivation (NOT secure for production!)"
) )
salt = b"cucu_note_salt" # Fixed salt for dev (should be random in production) # Use random salt — stored in a file so decryption works across restarts
kdf = PBKDF2HMAC( salt_file = os.path.join(os.path.dirname(__file__), "..", "data", ".encryption_salt")
algorithm=hashes.SHA256(), os.makedirs(os.path.dirname(salt_file), exist_ok=True)
length=32,
salt=salt, if os.path.exists(salt_file):
iterations=100000, with open(salt_file, "rb") as f:
) salt = f.read()
key = base64.urlsafe_b64encode(kdf.derive(FALLBACK_PASSWORD.encode())) else:
return key salt = os.urandom(16)
with open(salt_file, "wb") as f:
f.write(salt)
logger.info("Generated new encryption salt (saved to %s)", salt_file)
key = _derive_key_from_password(FALLBACK_PASSWORD, salt)
_fernet_instance = Fernet(key)
return _fernet_instance
def encrypt_api_key(api_key: str) -> str: def encrypt_api_key(api_key: str) -> str:
""" """
Encrypt an API key using Fernet symmetric encryption. Encrypt an API key using Fernet symmetric encryption.
Args: Args:
api_key: Plain text API key to encrypt api_key: Plain text API key to encrypt
Returns: Returns:
Encrypted API key as base64 string Encrypted API key as base64 string
Raises: Raises:
ValueError: If api_key is empty ValueError: If api_key is empty
RuntimeError: If encryption fails RuntimeError: If encryption fails
""" """
if not api_key or not api_key.strip(): if not api_key or not api_key.strip():
raise ValueError("API key cannot be empty") raise ValueError("API key cannot be empty")
try: try:
fernet = Fernet(_get_fernet_key()) fernet = _get_fernet()
encrypted = fernet.encrypt(api_key.encode()) encrypted = fernet.encrypt(api_key.encode())
return encrypted.decode() return encrypted.decode()
except Exception as e: except Exception as e:
...@@ -81,22 +106,22 @@ def encrypt_api_key(api_key: str) -> str: ...@@ -81,22 +106,22 @@ def encrypt_api_key(api_key: str) -> str:
def decrypt_api_key(encrypted_key: str) -> str: def decrypt_api_key(encrypted_key: str) -> str:
""" """
Decrypt an encrypted API key. Decrypt an encrypted API key.
Args: Args:
encrypted_key: Encrypted API key (base64 string) encrypted_key: Encrypted API key (base64 string)
Returns: Returns:
Decrypted plain text API key Decrypted plain text API key
Raises: Raises:
ValueError: If encrypted_key is empty ValueError: If encrypted_key is empty
RuntimeError: If decryption fails (wrong key, corrupted data, etc.) RuntimeError: If decryption fails (wrong key, corrupted data, etc.)
""" """
if not encrypted_key or not encrypted_key.strip(): if not encrypted_key or not encrypted_key.strip():
raise ValueError("Encrypted key cannot be empty") raise ValueError("Encrypted key cannot be empty")
try: try:
fernet = Fernet(_get_fernet_key()) fernet = _get_fernet()
decrypted = fernet.decrypt(encrypted_key.encode()) decrypted = fernet.decrypt(encrypted_key.encode())
return decrypted.decode() return decrypted.decode()
except Exception as e: except Exception as e:
...@@ -107,39 +132,38 @@ def decrypt_api_key(encrypted_key: str) -> str: ...@@ -107,39 +132,38 @@ def decrypt_api_key(encrypted_key: str) -> str:
def mask_api_key(api_key: str) -> str: def mask_api_key(api_key: str) -> str:
""" """
Mask an API key for display (show only first 7 chars and last 4 chars). Mask an API key for display (show only first 7 chars and last 4 chars).
Args: Args:
api_key: API key to mask api_key: API key to mask
Returns: Returns:
Masked API key (e.g., "sk-...xxxx") Masked API key (e.g., "sk-...xxxx")
""" """
if not api_key or len(api_key) < 11: if not api_key or len(api_key) < 11:
return "sk-...xxxx" return "sk-...xxxx"
return f"{api_key[:7]}...{api_key[-4:]}" return f"{api_key[:7]}...{api_key[-4:]}"
def validate_openai_key_format(api_key: str) -> bool: def validate_openai_key_format(api_key: str) -> bool:
""" """
Validate OpenAI API key format. Validate OpenAI API key format.
Args: Args:
api_key: API key to validate api_key: API key to validate
Returns: Returns:
True if format is valid, False otherwise True if format is valid, False otherwise
""" """
if not api_key or not api_key.strip(): if not api_key or not api_key.strip():
return False return False
# OpenAI keys typically start with "sk-" and are ~51 characters # OpenAI keys typically start with "sk-" and are ~51 characters
key = api_key.strip() key = api_key.strip()
if not key.startswith("sk-"): if not key.startswith("sk-"):
return False return False
if len(key) < 20 or len(key) > 100: # Reasonable range if len(key) < 20 or len(key) > 100: # Reasonable range
return False return False
return True
return True
...@@ -88,7 +88,7 @@ class LLMFactory: ...@@ -88,7 +88,7 @@ class LLMFactory:
"streaming": streaming, "streaming": streaming,
"api_key": key, "api_key": key,
"temperature": 0, "temperature": 0,
"max_tokens": 1000, "max_tokens": 4096,
} }
# Nếu bật json_mode, tiêm trực tiếp vào constructor # Nếu bật json_mode, tiêm trực tiếp vào constructor
...@@ -100,14 +100,7 @@ class LLMFactory: ...@@ -100,14 +100,7 @@ class LLMFactory:
logger.info(f"✅ Created OpenAI: {model_name}") logger.info(f"✅ Created OpenAI: {model_name}")
return llm return llm
def _enable_json_mode(self, llm: BaseChatModel, model_name: str) -> BaseChatModel:
"""Enable JSON mode for the LLM."""
try:
llm = llm.bind(response_format={"type": "json_object"})
logger.debug(f"⚙️ JSON mode enabled for {model_name}")
except Exception as e:
logger.warning(f"⚠️ JSON mode not supported: {e}")
return llm
def initialize(self, skip_warmup: bool = True) -> None: def initialize(self, skip_warmup: bool = True) -> None:
""" """
......
...@@ -48,23 +48,30 @@ def parse_date_range( ...@@ -48,23 +48,30 @@ def parse_date_range(
if match_ts: if match_ts:
try: try:
from common.timezone_config import VIETNAM_TZ
ts_start = float(match_ts.group(1)) ts_start = float(match_ts.group(1))
ts_end = float(match_ts.group(2)) ts_end = float(match_ts.group(2))
dt_start = datetime.fromtimestamp(ts_start, tz=timezone.utc) # Frontend sends UTC midnight timestamps, but user is in Vietnam (UTC+7).
dt_end = datetime.fromtimestamp(ts_end, tz=timezone.utc) # Shift by -7h so "Feb 5" means Feb 5 00:00 VN (= Feb 4 17:00 UTC)
# instead of Feb 5 00:00 UTC (= Feb 5 07:00 VN).
utc_offset_seconds = VIETNAM_TZ.utcoffset(None).total_seconds()
dt_start = datetime.fromtimestamp(ts_start - utc_offset_seconds, tz=timezone.utc)
dt_end = datetime.fromtimestamp(ts_end - utc_offset_seconds, tz=timezone.utc)
except (ValueError, TypeError): except (ValueError, TypeError):
pass pass
else: else:
# Case 2: DisplayTime filter (displayTime:YYYY-MM-DD) # Case 2: DisplayTime filter (displayTime:YYYY-MM-DD)
# Dates are interpreted as Vietnam time (UTC+7), then converted to UTC for MongoDB
pattern_dt = r"displayTime:(\d{4}-\d{2}-\d{2})" pattern_dt = r"displayTime:(\d{4}-\d{2}-\d{2})"
match_dt = re.search(pattern_dt, filter_str) match_dt = re.search(pattern_dt, filter_str)
if match_dt: if match_dt:
try: try:
from common.timezone_config import VIETNAM_TZ
date_str = match_dt.group(1) date_str = match_dt.group(1)
dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=timezone.utc) dt = datetime.strptime(date_str, "%Y-%m-%d").replace(tzinfo=VIETNAM_TZ)
dt_start = dt dt_start = dt.astimezone(timezone.utc)
dt_end = dt + timedelta(days=1) dt_end = (dt + timedelta(days=1)).astimezone(timezone.utc)
except ValueError: except ValueError:
pass pass
......
...@@ -191,7 +191,7 @@ class ShortcutUpdate(BaseModel): ...@@ -191,7 +191,7 @@ class ShortcutUpdate(BaseModel):
class ShortcutResponse(BaseModel): class ShortcutResponse(BaseModel):
id: int id: str
name: str name: str
filter: str filter: str
......
...@@ -810,21 +810,74 @@ class ReactionService: ...@@ -810,21 +810,74 @@ class ReactionService:
class ShortcutService: class ShortcutService:
"""Shortcut (Workspace) service with MongoDB backend."""
async def list_shortcuts(self) -> List[schemas.ShortcutResponse]: async def list_shortcuts(self) -> List[schemas.ShortcutResponse]:
return [] cursor = mongodb_client.shortcuts.find({}).sort("created_at", -1)
docs = await cursor.to_list(length=100)
return [
schemas.ShortcutResponse(
id=str(doc["_id"]),
name=doc.get("name", ""),
filter=doc.get("filter", ""),
)
for doc in docs
]
async def create_shortcut(self, payload: schemas.ShortcutCreate) -> schemas.ShortcutResponse: async def create_shortcut(self, payload: schemas.ShortcutCreate) -> schemas.ShortcutResponse:
return schemas.ShortcutResponse(id=1, name=payload.name, filter=payload.filter) now = utc_now()
doc = {
"name": payload.name,
"filter": payload.filter,
"created_at": now,
"updated_at": now,
}
result = await mongodb_client.shortcuts.insert_one(doc)
doc["_id"] = result.inserted_id
return schemas.ShortcutResponse(
id=str(doc["_id"]),
name=doc["name"],
filter=doc["filter"],
)
async def update_shortcut(self, shortcut_id: str, payload: schemas.ShortcutUpdate) -> schemas.ShortcutResponse:
update_fields: dict[str, Any] = {"updated_at": utc_now()}
if payload.name is not None:
update_fields["name"] = payload.name
if payload.filter is not None:
update_fields["filter"] = payload.filter
filter_query: dict[str, Any] = {}
if ObjectId.is_valid(shortcut_id):
filter_query["_id"] = ObjectId(shortcut_id)
else:
filter_query["_id"] = shortcut_id
from pymongo import ReturnDocument
result = await mongodb_client.shortcuts.find_one_and_update(
filter_query,
{"$set": update_fields},
return_document=ReturnDocument.AFTER,
)
if not result:
raise ValueError(f"Shortcut {shortcut_id} not found")
async def update_shortcut(self, shortcut_id: int, payload: schemas.ShortcutUpdate) -> schemas.ShortcutResponse:
return schemas.ShortcutResponse( return schemas.ShortcutResponse(
id=shortcut_id, id=str(result["_id"]),
name=payload.name or "demo", name=result.get("name", ""),
filter=payload.filter or "", filter=result.get("filter", ""),
) )
async def delete_shortcut(self, shortcut_id: int) -> None: async def delete_shortcut(self, shortcut_id: str) -> None:
return None filter_query: dict[str, Any] = {}
if ObjectId.is_valid(shortcut_id):
filter_query["_id"] = ObjectId(shortcut_id)
else:
filter_query["_id"] = shortcut_id
result = await mongodb_client.shortcuts.delete_one(filter_query)
if result.deleted_count == 0:
raise ValueError(f"Shortcut {shortcut_id} not found")
class ActivityService: class ActivityService:
......
...@@ -37,6 +37,7 @@ PUBLIC_PATHS = { ...@@ -37,6 +37,7 @@ PUBLIC_PATHS = {
PUBLIC_PATH_PREFIXES = [ PUBLIC_PATH_PREFIXES = [
"/static", "/static",
"/mock", "/mock",
"/api/test", # debug/test endpoints - no auth required
] ]
......
...@@ -29,6 +29,7 @@ COLLECTION_REACTIONS = "cuccu_reactions" ...@@ -29,6 +29,7 @@ COLLECTION_REACTIONS = "cuccu_reactions"
COLLECTION_MEMO_EMBEDDINGS = "cuccu_memo_embeddings" COLLECTION_MEMO_EMBEDDINGS = "cuccu_memo_embeddings"
COLLECTION_INBOX = "cuccu_inbox" COLLECTION_INBOX = "cuccu_inbox"
COLLECTION_USER_SETTINGS = "cuccu_user_settings" COLLECTION_USER_SETTINGS = "cuccu_user_settings"
COLLECTION_SHORTCUTS = "cuccu_shortcuts"
class MongoDBClient: class MongoDBClient:
...@@ -116,6 +117,10 @@ class MongoDBClient: ...@@ -116,6 +117,10 @@ class MongoDBClient:
def user_settings(self): def user_settings(self):
return self.db[COLLECTION_USER_SETTINGS] return self.db[COLLECTION_USER_SETTINGS]
@property
def shortcuts(self):
return self.db[COLLECTION_SHORTCUTS]
# Singleton instance # Singleton instance
mongodb_client = MongoDBClient() mongodb_client = MongoDBClient()
...@@ -206,6 +211,9 @@ async def create_indexes(): ...@@ -206,6 +211,9 @@ async def create_indexes():
# ====================== MEMO VERSIONS ====================== # ====================== MEMO VERSIONS ======================
await db["cuccu_memo_versions"].create_index([("memo_id", 1), ("version_index", -1)]) await db["cuccu_memo_versions"].create_index([("memo_id", 1), ("version_index", -1)])
# ====================== SHORTCUTS ======================
await db[COLLECTION_SHORTCUTS].create_index([("creator_id", 1)])
logger.info("✅ Database indexes created successfully (Production-ready)") logger.info("✅ Database indexes created successfully (Production-ready)")
except Exception as e: except Exception as e:
logger.warning(f"⚠️ Error creating indexes (may already exist): {e}") logger.warning(f"⚠️ Error creating indexes (may already exist): {e}")
......
...@@ -76,7 +76,7 @@ class RateLimitService: ...@@ -76,7 +76,7 @@ class RateLimitService:
logger.info(f"Using Redis for rate limiting: {redis_host}:{redis_port}/{redis_db}") logger.info(f"Using Redis for rate limiting: {redis_host}:{redis_port}/{redis_db}")
else: else:
# Fallback to memory (not suitable for production with multiple instances) # Fallback to memory (not suitable for production with multiple instances)
self.storage_uri = os.getenv("RATE_STORAGE_URI", "memory://") self.storage_uri = os.getenv("RATE_STORAGE_URI", "memory://")
logger.warning("⚠️ Using in-memory rate limiting (not suitable for production with multiple instances)") logger.warning("⚠️ Using in-memory rate limiting (not suitable for production with multiple instances)")
self.default_limits = ["100/hour", "30/minute"] self.default_limits = ["100/hour", "30/minute"]
......
"""
Timezone configuration for CuCu Note.
All user-facing date operations should use VIETNAM_TZ.
MongoDB stores in UTC — convert at query boundaries.
"""
from datetime import timezone, timedelta
VIETNAM_TZ = timezone(timedelta(hours=7))
...@@ -126,8 +126,6 @@ CONV_SUPABASE_KEY: str | None = os.getenv("CONV_SUPABASE_KEY") ...@@ -126,8 +126,6 @@ CONV_SUPABASE_KEY: str | None = os.getenv("CONV_SUPABASE_KEY")
# ====================== REDIS CONFIGURATION ====================== # ====================== REDIS CONFIGURATION ======================
REDIS_HOST: str | None = os.getenv("REDIS_HOST") REDIS_HOST: str | None = os.getenv("REDIS_HOST")
REDIS_PORT: int = int(os.getenv("REDIS_PORT", "6379")) REDIS_PORT: int = int(os.getenv("REDIS_PORT", "6379"))
REDIS_PASSWORD: str | None = os.getenv("REDIS_PASSWORD")
REDIS_USERNAME: str | None = os.getenv("REDIS_USERNAME")
# ====================== AI API KEYS & MODELS ====================== # ====================== AI API KEYS & MODELS ======================
OPENAI_API_KEY: str | None = os.getenv("OPENAI_API_KEY") OPENAI_API_KEY: str | None = os.getenv("OPENAI_API_KEY")
...@@ -172,7 +170,7 @@ REDIS_CACHE_URL: str | None = os.getenv("REDIS_CACHE_URL", "redis-14473.c93.us-e ...@@ -172,7 +170,7 @@ REDIS_CACHE_URL: str | None = os.getenv("REDIS_CACHE_URL", "redis-14473.c93.us-e
REDIS_CACHE_PORT: int = int(os.getenv("REDIS_CACHE_PORT", "14473")) REDIS_CACHE_PORT: int = int(os.getenv("REDIS_CACHE_PORT", "14473"))
REDIS_CACHE_DB: int = int(os.getenv("REDIS_CACHE_DB", "0")) REDIS_CACHE_DB: int = int(os.getenv("REDIS_CACHE_DB", "0"))
REDIS_CACHE_TURN_ON: bool = os.getenv("REDIS_CACHE_TURN_ON", "true").lower() == "true" REDIS_CACHE_TURN_ON: bool = os.getenv("REDIS_CACHE_TURN_ON", "true").lower() == "true"
REDIS_PASSWORD: str | None = os.getenv("REDIS_CACHE_PASSWORD", "4kCCXXaJXXv7k358eG69p1lDBQtHTbQ1") REDIS_PASSWORD: str | None = os.getenv("REDIS_CACHE_PASSWORD")
REDIS_USERNAME: str = os.getenv("REDIS_CACHE_USERNAME", "default") REDIS_USERNAME: str = os.getenv("REDIS_CACHE_USERNAME", "default")
CONV_DATABASE_URL: str | None = os.getenv("CONV_DATABASE_URL") CONV_DATABASE_URL: str | None = os.getenv("CONV_DATABASE_URL")
...@@ -183,8 +181,8 @@ MONGODB_DB_NAME: str | None = os.getenv("MONGODB_DB_NAME", "cucu_note") ...@@ -183,8 +181,8 @@ MONGODB_DB_NAME: str | None = os.getenv("MONGODB_DB_NAME", "cucu_note")
USE_MONGO_CONVERSATION: bool = os.getenv("USE_MONGO_CONVERSATION", "true").lower() == "true" USE_MONGO_CONVERSATION: bool = os.getenv("USE_MONGO_CONVERSATION", "true").lower() == "true"
# MongoDB Connection Pooling # MongoDB Connection Pooling
MONGODB_MAX_POOL_SIZE: int = int(os.getenv("MONGODB_MAX_POOL_SIZE", "50")) MONGODB_MAX_POOL_SIZE: int = int(os.getenv("MONGODB_MAX_POOL_SIZE", "5"))
MONGODB_MIN_POOL_SIZE: int = int(os.getenv("MONGODB_MIN_POOL_SIZE", "10")) MONGODB_MIN_POOL_SIZE: int = int(os.getenv("MONGODB_MIN_POOL_SIZE", "1"))
MONGODB_MAX_IDLE_TIME_MS: int = int(os.getenv("MONGODB_MAX_IDLE_TIME_MS", "45000")) MONGODB_MAX_IDLE_TIME_MS: int = int(os.getenv("MONGODB_MAX_IDLE_TIME_MS", "45000"))
# ====================== CANIFA INTERNAL POSTGRES ====================== # ====================== CANIFA INTERNAL POSTGRES ======================
......
...@@ -43,18 +43,11 @@ asyncio.run(setup()) ...@@ -43,18 +43,11 @@ asyncio.run(setup())
" || echo "⚠️ Could not set up indexes (will retry on first request)" " || echo "⚠️ Could not set up indexes (will retry on first request)"
# Start the server # Start the server
echo "🌟 Starting Gunicorn server..." echo "🌟 Starting Uvicorn server (hot reload enabled)..."
# Allow overriding number of workers via env, default to 1 for simplicity exec uvicorn server:app \
WORKERS="${GUNICORN_WORKERS:-1}" --host 0.0.0.0 \
echo "🔧 Using Gunicorn workers: $WORKERS" --port 5000 \
--reload \
exec gunicorn \ --reload-dir /app \
--workers "$WORKERS" \ --log-level info
--worker-class uvicorn.workers.UvicornWorker \
--bind 0.0.0.0:5000 \
--timeout 120 \
--access-logfile - \
--error-logfile - \
--log-level info \
server:app
# Core FastAPI # Core FastAPI
fastapi==0.124.4 fastapi==0.124.4a
uvicorn==0.38.0 uvicorn==0.38.0
uvloop>=0.21.0
starlette==0.50.0 starlette==0.50.0
pydantic==2.12.5 pydantic==2.12.5
pydantic_core==2.41.5 pydantic_core==2.41.5
...@@ -51,13 +52,10 @@ google-auth==2.45.0 ...@@ -51,13 +52,10 @@ google-auth==2.45.0
# Tokenization # Tokenization
tiktoken==0.12.0 tiktoken==0.12.0
# Observability # Observability (minimal - only trace API used)
opentelemetry-api==1.39.1 opentelemetry-api==1.39.1
opentelemetry-exporter-otlp-proto-common==1.39.1
opentelemetry-exporter-otlp-proto-http==1.39.1
opentelemetry-proto==1.39.1
opentelemetry-sdk==1.39.1 opentelemetry-sdk==1.39.1
opentelemetry-semantic-conventions==0.60b1 # Removed: otel exporters/proto/semantic-conventions (not configured)
# Utilities # Utilities
python-dotenv==1.2.1 python-dotenv==1.2.1
...@@ -71,7 +69,7 @@ tenacity==9.1.2 ...@@ -71,7 +69,7 @@ tenacity==9.1.2
backoff==2.2.1 backoff==2.2.1
regex==2025.11.3 regex==2025.11.3
Unidecode==1.4.0 Unidecode==1.4.0
pillow==12.0.0 # pillow==12.0.0 # Removed: not directly imported
# WebSocket # WebSocket
websockets==15.0.1 websockets==15.0.1
...@@ -104,4 +102,5 @@ cachetools==6.2.4 ...@@ -104,4 +102,5 @@ cachetools==6.2.4
pytest==9.0.2 pytest==9.0.2
# Production server # Production server
gunicorn==23.0.0 # gunicorn==23.0.0 # Removed: using uvicorn instead
aiosqlite
...@@ -2,6 +2,7 @@ import asyncio ...@@ -2,6 +2,7 @@ import asyncio
import os import os
import platform import platform
import logging import logging
from contextlib import asynccontextmanager
import uvicorn import uvicorn
from fastapi import FastAPI from fastapi import FastAPI
...@@ -10,6 +11,7 @@ from fastapi.responses import RedirectResponse ...@@ -10,6 +11,7 @@ from fastapi.responses import RedirectResponse
from api.chatbot import router as chatbot_router from api.chatbot import router as chatbot_router
from api.memos import router as memos_router from api.memos import router as memos_router
from api.test_chat_route import router as test_router
from common.cache import redis_cache from common.cache import redis_cache
from common.langfuse_client import get_langfuse_client from common.langfuse_client import get_langfuse_client
from common.middleware import middleware_manager from common.middleware import middleware_manager
...@@ -29,54 +31,44 @@ logging.basicConfig( ...@@ -29,54 +31,44 @@ logging.basicConfig(
) )
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# Langfuse client initialized in startup_event (not at import time)
app = FastAPI(
title="Contract AI Service",
description="API for Contract AI Service",
version="1.0.0",
)
# ============================================================================= # =============================================================================
# STARTUP EVENT - Initialize Redis Cache + MongoDB # LIFESPAN - Initialize & cleanup resources (replaces deprecated on_event)
# ============================================================================= # =============================================================================
@app.on_event("startup") @asynccontextmanager
async def startup_event(): async def lifespan(app: FastAPI):
"""Initialize dependencies on startup.""" """Startup & shutdown lifecycle manager."""
# --- STARTUP ---
# Initialize Redis (optional - will continue without cache if unavailable) # Initialize Redis (optional - will continue without cache if unavailable)
redis_client = await redis_cache.initialize() redis_client = await redis_cache.initialize()
if redis_client: if redis_client:
logger.info("✅ Redis cache initialized for message limit") logger.info("✅ Redis cache initialized for message limit")
else: else:
logger.info("⚠️ Redis cache unavailable - continuing without cache") logger.info("⚠️ Redis cache unavailable - continuing without cache")
# MongoDB initialization (required) # MongoDB initialization (required)
from common.mongo_client import init_mongodb from common.mongo_client import init_mongodb
await init_mongodb() await init_mongodb()
logger.info("✅ MongoDB connection initialized") logger.info("✅ MongoDB connection initialized")
# Langfuse initialization (optional - lazy loaded, just triggers auth check) # Langfuse initialization (optional)
langfuse_client = get_langfuse_client() langfuse_client = get_langfuse_client()
if langfuse_client: if langfuse_client:
logger.info("✅ Langfuse client ready") logger.info("✅ Langfuse client ready")
else: else:
logger.warning("⚠️ Langfuse client not available (missing keys or disabled)") logger.warning("⚠️ Langfuse client not available (missing keys or disabled)")
yield # App is running
@app.on_event("shutdown") # --- SHUTDOWN ---
async def shutdown_event():
"""Cleanup on shutdown."""
try: try:
# Close Redis connection if exists
redis_client = redis_cache.get_client() redis_client = redis_cache.get_client()
if redis_client: if redis_client:
await redis_client.aclose() await redis_client.aclose()
logger.info("Redis connection closed") logger.info("Redis connection closed")
except Exception as e: except Exception as e:
logger.debug(f"Error closing Redis: {e}") logger.debug(f"Error closing Redis: {e}")
# Close MongoDB connection
try: try:
from common.mongo_client import close_mongodb from common.mongo_client import close_mongodb
await close_mongodb() await close_mongodb()
...@@ -85,6 +77,14 @@ async def shutdown_event(): ...@@ -85,6 +77,14 @@ async def shutdown_event():
logger.debug(f"Error closing MongoDB: {e}") logger.debug(f"Error closing MongoDB: {e}")
app = FastAPI(
title="Contract AI Service",
description="API for Contract AI Service",
version="1.0.0",
lifespan=lifespan,
)
# ============================================================================= # =============================================================================
# MIDDLEWARE SETUP - Gom Auth + RateLimit + CORS vào một chỗ # MIDDLEWARE SETUP - Gom Auth + RateLimit + CORS vào một chỗ
# ============================================================================= # =============================================================================
...@@ -96,6 +96,7 @@ middleware_manager.setup( ...@@ -96,6 +96,7 @@ middleware_manager.setup(
cors_origins=CORS_ORIGINS, # từ environment variable cors_origins=CORS_ORIGINS, # từ environment variable
) )
app.include_router(test_router) # No-auth test endpoints
app.include_router(chatbot_router) app.include_router(chatbot_router)
app.include_router(memos_router) app.include_router(memos_router)
......
...@@ -6,6 +6,10 @@ services: ...@@ -6,6 +6,10 @@ services:
build: build:
context: ./backend context: ./backend
dockerfile: Dockerfile.prod dockerfile: Dockerfile.prod
cache_from:
- type=local,src=.docker/cache/backend
cache_to:
- type=local,dest=.docker/cache/backend
container_name: cuccu_backend container_name: cuccu_backend
restart: unless-stopped restart: unless-stopped
ports: ports:
...@@ -30,37 +34,38 @@ services: ...@@ -30,37 +34,38 @@ services:
deploy: deploy:
resources: resources:
limits: limits:
memory: 2G memory: 512M
cpus: '1.0' cpus: '1.0'
reservations: reservations:
memory: 512M memory: 128M
cpus: '0.5' cpus: '0.5'
# Frontend # Frontend (Production build with nginx - saves ~190MB RAM)
frontend: frontend:
build: build:
context: ./frontend context: ./frontend
dockerfile: Dockerfile.prod dockerfile: Dockerfile.prod
args: args:
# Build-time envs for Vite VITE_API_BASE_URL: ${VITE_API_BASE_URL:-http://localhost:5000}
# Browser (người dùng) gọi trực tiếp vào host, nên dùng localhost:5000 VITE_CLERK_PUBLISHABLE_KEY: ${VITE_CLERK_PUBLISHABLE_KEY:-}
VITE_API_BASE_URL: "http://localhost:5000"
VITE_CLERK_PUBLISHABLE_KEY: ${VITE_CLERK_PUBLISHABLE_KEY}
container_name: cuccu_frontend container_name: cuccu_frontend
restart: unless-stopped restart: unless-stopped
ports: ports:
- "3001:80" - "3001:80"
env_file:
- ./frontend/.env
depends_on: depends_on:
- backend - backend
networks: networks:
- cuccu_network - cuccu_network
healthcheck: healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:80"] test: ["CMD", "wget", "-q", "--spider", "http://localhost:80"]
interval: 30s interval: 30s
timeout: 10s timeout: 10s
retries: 3 retries: 3
start_period: 30s
deploy:
resources:
limits:
memory: 32M
volumes: volumes:
backend_data: backend_data:
......
...@@ -2,4 +2,5 @@ ...@@ -2,4 +2,5 @@
VITE_CLERK_PUBLISHABLE_KEY=pk_test_Y29tbXVuYWwtc3VuYmVhbS0wLmNsZXJrLmFjY291bnRzLmRldiQ VITE_CLERK_PUBLISHABLE_KEY=pk_test_Y29tbXVuYWwtc3VuYmVhbS0wLmNsZXJrLmFjY291bnRzLmRldiQ
# ====================== API URL ====================== # ====================== API URL ======================
VITE_API_URL=http://localhost:8080 # Dev mode: point directly to backend (no nginx proxy)
\ No newline at end of file VITE_API_BASE_URL=http://160.191.50.138:5000
\ No newline at end of file
FROM node:18-alpine FROM node:22-alpine
WORKDIR /app WORKDIR /app
......
# Multi-stage build for production # Multi-stage build for production
FROM node:18-alpine AS builder FROM node:22-alpine AS builder
WORKDIR /app WORKDIR /app
......
...@@ -15,6 +15,17 @@ server { ...@@ -15,6 +15,17 @@ server {
add_header X-Content-Type-Options "nosniff" always; add_header X-Content-Type-Options "nosniff" always;
add_header X-XSS-Protection "1; mode=block" always; add_header X-XSS-Protection "1; mode=block" always;
# Proxy /api requests to backend container
location /api/ {
proxy_pass http://cuccu_backend:5000;
proxy_http_version 1.1;
proxy_set_header Host $host;
proxy_set_header X-Real-IP $remote_addr;
proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
proxy_set_header X-Forwarded-Proto $scheme;
proxy_read_timeout 120s;
}
# SPA routing - serve index.html for all routes # SPA routing - serve index.html for all routes
location / { location / {
try_files $uri $uri/ /index.html; try_files $uri $uri/ /index.html;
......
This diff is collapsed.
import { MessageCircleIcon, XIcon, Maximize2Icon, Minimize2Icon } from "lucide-react"; import { MessageCircleIcon, XIcon, Maximize2Icon, Minimize2Icon, Trash2Icon } from "lucide-react";
import { useState, useRef, useCallback, useEffect } from "react"; import { useState, useRef, useCallback, useEffect } from "react";
import { cn } from "@/lib/utils"; import { cn } from "@/lib/utils";
import ChatbotPanel from "./ChatbotPanel"; import ChatbotPanel, { type ChatbotPanelHandle } from "./ChatbotPanel";
type Position = { type Position = {
x: number; x: number;
...@@ -44,6 +44,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => { ...@@ -44,6 +44,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
const hasDraggedRef = useRef(false); const hasDraggedRef = useRef(false);
const dragRef = useRef<{ startX: number; startY: number; startPosX: number; startPosY: number } | null>(null); const dragRef = useRef<{ startX: number; startY: number; startPosX: number; startPosY: number } | null>(null);
const animationFrameRef = useRef<number | null>(null); const animationFrameRef = useRef<number | null>(null);
const chatbotPanelRef = useRef<ChatbotPanelHandle>(null);
// Save position to localStorage when it changes // Save position to localStorage when it changes
useEffect(() => { useEffect(() => {
...@@ -112,11 +113,16 @@ const ChatbotWidget = ({ className }: { className?: string }) => { ...@@ -112,11 +113,16 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
}, []); }, []);
const handleMouseUp = useCallback(() => { const handleMouseUp = useCallback(() => {
setIsDragging(false);
dragRef.current = null;
}, []);
// Toggle via onClick — only if user didn't drag
const handleClick = useCallback((e: React.MouseEvent) => {
e.stopPropagation();
if (!hasDraggedRef.current) { if (!hasDraggedRef.current) {
setIsOpen((prev) => !prev); setIsOpen((prev) => !prev);
} }
setIsDragging(false);
dragRef.current = null;
}, []); }, []);
useEffect(() => { useEffect(() => {
...@@ -189,7 +195,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => { ...@@ -189,7 +195,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
useEffect(() => { useEffect(() => {
if (isDragging) { if (isDragging) {
window.addEventListener("touchmove", handleTouchMove, { passive: true }); window.addEventListener("touchmove", handleTouchMove, { passive: true });
window.addEventListener("touchend", handleTouchEnd, { passive: true }); window.addEventListener("touchend", handleTouchEnd);
return () => { return () => {
window.removeEventListener("touchmove", handleTouchMove); window.removeEventListener("touchmove", handleTouchMove);
window.removeEventListener("touchend", handleTouchEnd); window.removeEventListener("touchend", handleTouchEnd);
...@@ -267,11 +273,21 @@ const ChatbotWidget = ({ className }: { className?: string }) => { ...@@ -267,11 +273,21 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
<div className="flex items-center gap-2"> <div className="flex items-center gap-2">
<span className="inline-flex size-2 rounded-full bg-emerald-500" /> <span className="inline-flex size-2 rounded-full bg-emerald-500" />
<div> <div>
<p className="text-sm font-semibold leading-none">CiCi Assistant</p> <p className="text-sm font-semibold leading-none">CuCu Assistant</p>
<p className="text-xs text-muted-foreground">Notes chat</p> <p className="text-xs text-muted-foreground">Notes chat</p>
</div> </div>
</div> </div>
<div className="flex items-center gap-1"> <div className="flex items-center gap-1">
{/* Nút Clear Messages */}
<button
type="button"
className="rounded-full p-1.5 text-muted-foreground transition-colors hover:bg-destructive/10 hover:text-destructive"
onClick={() => chatbotPanelRef.current?.clearMessages()}
aria-label="Clear messages"
title="Xóa lịch sử chat"
>
<Trash2Icon className="size-4" />
</button>
{/* Nút Expand/Collapse */} {/* Nút Expand/Collapse */}
<button <button
type="button" type="button"
...@@ -301,7 +317,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => { ...@@ -301,7 +317,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
{/* Chat Content - không có header riêng nữa */} {/* Chat Content - không có header riêng nữa */}
<div className="h-[calc(100%-52px)]"> <div className="h-[calc(100%-52px)]">
<ChatbotPanel variant="widget" className="border-0 rounded-none" hideHeader /> <ChatbotPanel ref={chatbotPanelRef} variant="widget" className="border-0 rounded-none" hideHeader />
</div> </div>
</div> </div>
)} )}
...@@ -323,6 +339,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => { ...@@ -323,6 +339,7 @@ const ChatbotWidget = ({ className }: { className?: string }) => {
: "bg-primary text-primary-foreground", : "bg-primary text-primary-foreground",
)} )}
onMouseDown={handleMouseDown} onMouseDown={handleMouseDown}
onClick={handleClick}
onTouchStart={handleTouchStart} onTouchStart={handleTouchStart}
role="button" role="button"
tabIndex={0} tabIndex={0}
......
...@@ -8,9 +8,9 @@ import type { MonthNavigatorProps } from "@/types/statistics"; ...@@ -8,9 +8,9 @@ import type { MonthNavigatorProps } from "@/types/statistics";
export const MonthNavigator = ({ visibleMonth, onMonthChange, activityStats }: MonthNavigatorProps) => { export const MonthNavigator = ({ visibleMonth, onMonthChange, activityStats }: MonthNavigatorProps) => {
const [isOpen, setIsOpen] = useState(false); const [isOpen, setIsOpen] = useState(false);
const currentMonth = new Date(visibleMonth);
const currentYear = getYearFromDate(visibleMonth); const currentYear = getYearFromDate(visibleMonth);
const currentMonthNum = getMonthFromDate(visibleMonth); const currentMonthNum = getMonthFromDate(visibleMonth);
const currentMonth = new Date(currentYear, currentMonthNum - 1, 1);
const handlePrevMonth = () => { const handlePrevMonth = () => {
onMonthChange(addMonths(visibleMonth, -1)); onMonthChange(addMonths(visibleMonth, -1));
......
import { useEffect } from "react"; import { useEffect } from "react";
import { useSearchParams } from "react-router-dom"; import { useSearchParams } from "react-router-dom";
import { MemoRenderContext } from "@/components/MasonryView"; import { MemoRenderContext } from "@/components/MasonryView";
import MemoView from "@/components/MemoView"; import MemoView from "@/components/MemoView";
...@@ -53,7 +53,7 @@ const Home = () => { ...@@ -53,7 +53,7 @@ const Home = () => {
renderer={(memo: Memo, context?: MemoRenderContext) => ( renderer={(memo: Memo, context?: MemoRenderContext) => (
<MemoView key={`${memo.name}-${memo.displayTime}`} memo={memo} showVisibility showPinned compact={context?.compact} /> <MemoView key={`${memo.name}-${memo.displayTime}`} memo={memo} showVisibility showPinned compact={context?.compact} />
)} )}
listSort={(memos) => memos.filter((m) => !m.pinned)} // Exclude pinned from regular list listSort={listSort}
orderBy={orderBy} orderBy={orderBy}
filter={memoFilter} filter={memoFilter}
/> />
......
...@@ -2,9 +2,10 @@ import { redirectOnAuthFailure } from "@/utils/auth-redirect"; ...@@ -2,9 +2,10 @@ import { redirectOnAuthFailure } from "@/utils/auth-redirect";
import { getClerkSessionToken } from "@/utils/clerk"; import { getClerkSessionToken } from "@/utils/clerk";
import type { RequestOptions } from "./types"; import type { RequestOptions } from "./types";
// Call backend directly (bypass Vite proxy). // API origin - empty string = relative URLs (proxied via nginx in Docker).
// Override via VITE_API_BASE_URL, e.g. "http://localhost:5000" // Override via VITE_API_BASE_URL, e.g. "http://localhost:5000" for local dev without Docker.
export const API_ORIGIN = (import.meta.env.VITE_API_BASE_URL as string | undefined) || "http://localhost:5000"; const _envOrigin = import.meta.env.VITE_API_BASE_URL as string | undefined;
export const API_ORIGIN: string = (_envOrigin !== undefined && _envOrigin !== "") ? _envOrigin : "";
export const API_BASE = `${API_ORIGIN}/api/v1`; export const API_BASE = `${API_ORIGIN}/api/v1`;
const parseBody = async (response: Response): Promise<unknown> => { const parseBody = async (response: Response): Promise<unknown> => {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment