Commit 17077cdf authored by Vũ Hoàng Anh's avatar Vũ Hoàng Anh

feat: skip response cache for /chat-dev endpoint

parent 4e384443
...@@ -42,6 +42,7 @@ async def chat_controller( ...@@ -42,6 +42,7 @@ async def chat_controller(
return_user_insight: bool = False, return_user_insight: bool = False,
is_authenticated: bool = False, # New: để track user vs guest is_authenticated: bool = False, # New: để track user vs guest
device_id: str | None = None, # Device ID từ request (có cả khi authenticated) device_id: str | None = None, # Device ID từ request (có cả khi authenticated)
skip_cache: bool = False, # Skip response cache (for dev endpoint)
) -> dict: ) -> dict:
""" """
Controller main logic for non-streaming chat requests. Controller main logic for non-streaming chat requests.
...@@ -63,7 +64,7 @@ async def chat_controller( ...@@ -63,7 +64,7 @@ async def chat_controller(
logger.info("chat_controller start: model=%s, key=%s", model_name, identity_key) logger.info("chat_controller start: model=%s, key=%s", model_name, identity_key)
# ====================== CACHE LAYER ====================== # ====================== CACHE LAYER ======================
if REDIS_CACHE_TURN_ON: if REDIS_CACHE_TURN_ON and not skip_cache:
cached_response = await redis_cache.get_response(user_id=identity_key, query=query) cached_response = await redis_cache.get_response(user_id=identity_key, query=query)
if cached_response: if cached_response:
logger.info("CACHE HIT key=%s", identity_key) logger.info("CACHE HIT key=%s", identity_key)
...@@ -303,7 +304,7 @@ async def chat_controller( ...@@ -303,7 +304,7 @@ async def chat_controller(
if user_insight_dict is not None: if user_insight_dict is not None:
response_payload["user_insight"] = user_insight_dict response_payload["user_insight"] = user_insight_dict
if REDIS_CACHE_TURN_ON: if REDIS_CACHE_TURN_ON and not skip_cache:
await redis_cache.set_response(user_id=identity_key, query=query, response_data=response_payload, ttl=300) await redis_cache.set_response(user_id=identity_key, query=query, response_data=response_payload, ttl=300)
background_tasks.add_task( background_tasks.add_task(
......
...@@ -64,6 +64,7 @@ PRODUCT_LINE_MAP: dict[str, list[str]] = { ...@@ -64,6 +64,7 @@ PRODUCT_LINE_MAP: dict[str, list[str]] = {
"Bộ quần áo": ["bộ quần áo", "đồ bộ"], "Bộ quần áo": ["bộ quần áo", "đồ bộ"],
"Bộ mặc nhà": ["bộ mặc nhà", "đồ ngủ", "đồ mặc nhà"], "Bộ mặc nhà": ["bộ mặc nhà", "đồ ngủ", "đồ mặc nhà"],
"Blazer": ["blazer"], "Blazer": ["blazer"],
"Tất": ["tất", "vớ", "bao chân", "vớ chân", "tất chân"],
} }
# ============================================================================== # ==============================================================================
......
...@@ -107,6 +107,7 @@ async def fashion_qa_chat_dev(request: Request, req: QueryRequest, background_ta ...@@ -107,6 +107,7 @@ async def fashion_qa_chat_dev(request: Request, req: QueryRequest, background_ta
return_user_insight=False, return_user_insight=False,
is_authenticated=is_authenticated, # Pass auth status for Langfuse metadata is_authenticated=is_authenticated, # Pass auth status for Langfuse metadata
device_id=device_id, # Luôn truyền device_id để lưu vào Langfuse metadata device_id=device_id, # Luôn truyền device_id để lưu vào Langfuse metadata
skip_cache=True, # Dev endpoint: luôn gọi LLM mới, không cache
) )
usage_info = await message_limit_service.increment( usage_info = await message_limit_service.increment(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment