Commit 3e2fa498 authored by Vũ Hoàng Anh's avatar Vũ Hoàng Anh

Merge branch 'refactor/one-big-table' into feature/change-architect-agent

parents ffe458a6 a3c02b90
import json import json
import logging import logging
import os as _os
import re import re
import sqlite3
import time import time
import httpx import httpx
import asyncio import asyncio
...@@ -17,7 +15,6 @@ from .pattern_detector import HardPatternDetector ...@@ -17,7 +15,6 @@ from .pattern_detector import HardPatternDetector
from .size_message_builder import build_size_message from .size_message_builder import build_size_message
# Constants # Constants
from common.constants import SQLITE_DB_PATH
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
...@@ -46,7 +43,10 @@ SELECT_COLUMNS = """ ...@@ -46,7 +43,10 @@ SELECT_COLUMNS = """
COALESCE(quantity_sold, 0) AS quantity_sold, COALESCE(quantity_sold, 0) AS quantity_sold,
COALESCE(is_new_product, 0) AS is_new_product, COALESCE(is_new_product, 0) AS is_new_product,
size_scale, size_scale,
description_text description_text,
tags,
outfit_recommendations,
description_text_full
""" """
# ============================================================================== # ==============================================================================
...@@ -332,30 +332,18 @@ class SearchEngine: ...@@ -332,30 +332,18 @@ class SearchEngine:
return [], 0, "No results found" return [], 0, "No results found"
async def _enrich_with_outfit(self, products: list) -> list: @staticmethod
if not products or not _os.path.exists(SQLITE_DB_PATH): return products def _parse_outfit_recommendations(products: list) -> list:
top_codes = [p.get("internal_ref_code") for p in products[:3] if p.get("internal_ref_code")] """Parse outfit_recommendations JSON string from One Big Table into list[dict]."""
if not top_codes: return products for p in products:
try: raw = p.get("outfit_recommendations")
conn = sqlite3.connect(SQLITE_DB_PATH) if raw and isinstance(raw, str):
conn.row_factory = sqlite3.Row try:
cursor = conn.cursor() p["outfit_recommendations"] = json.loads(raw)
placeholders = ",".join(["?"] * len(top_codes)) except (json.JSONDecodeError, TypeError):
outfits = cursor.execute(f"SELECT * FROM pg__dashboard_canifa__ai_outfit_product_matches WHERE anchor_product_code IN ({placeholders})", top_codes).fetchall() p["outfit_recommendations"] = []
conn.close() elif not raw:
p["outfit_recommendations"] = []
outfit_map = {}
for row in outfits:
anchor = row["anchor_product_code"]
outfit_map.setdefault(anchor, []).append({
"code": row["match_product_code"],
"name": row["match_product_name"],
"role": row["match_role"],
"reason": row["ai_reason"]
})
for p in products:
p["outfit_recommendations"] = outfit_map.get(p.get("internal_ref_code"), [])
except Exception as e: logger.error(f"Outfit error: {e}")
return products return products
async def search(self, literal: str, inferred: Dict[str, Any], check_stock: bool = True) -> Dict[str, Any]: async def search(self, literal: str, inferred: Dict[str, Any], check_stock: bool = True) -> Dict[str, Any]:
...@@ -375,7 +363,7 @@ class SearchEngine: ...@@ -375,7 +363,7 @@ class SearchEngine:
if products: if products:
if check_stock: products = await enrich_with_stock(products) if check_stock: products = await enrich_with_stock(products)
products = await self._enrich_with_outfit(products) products = self._parse_outfit_recommendations(products)
for p in products: for p in products:
raw_size = p.get("size_scale", "") raw_size = p.get("size_scale", "")
parsed = [s.strip() for s in str(raw_size).replace("[", "").replace("]", "").replace('"', '').split(",") if s.strip()] parsed = [s.strip() for s in str(raw_size).replace("[", "").replace("]", "").replace('"', '').split(",") if s.strip()]
......
# Refactor: One Big Table — Loại bỏ multi-table joins
## Scope
- **What changes:** `search_engine.py` (core), `__init__.py` (exports)
- **What does NOT change:** `db_connector.py`, `stock_provider.py`, `pattern_detector.py`, `product_mapping.py`, `size_message_builder.py`, `data_retrieval_tool.py` (interface giữ nguyên)
- **Blast radius:** 2 files (LOW risk)
## Current State
- `SearchEngine._enrich_with_outfit()` mở kết nối SQLite riêng, query bảng `pg__dashboard_canifa__ai_outfit_product_matches` để lấy outfit recommendations.
- `SELECT_COLUMNS` chỉ lấy 19 cột cơ bản, thiếu `tags`, `outfit_recommendations`, `description_data_cut`.
- Python phải gánh logic JOIN data từ nhiều bảng.
## Target State
- `SELECT_COLUMNS` mở rộng lấy thêm `tags`, `outfit_recommendations`, `description_text_full` (đã rename từ `description_data_cut`) trực tiếp từ bảng chính.
- **XÓA BỎ** hàm `_enrich_with_outfit()` — data đã có sẵn trong cột `outfit_recommendations` dạng JSON.
- Python chỉ cần `json.loads()` cột `outfit_recommendations` để parse ra list[dict].
- Output trả ra cho Agent **giữ nguyên 100% format cũ**.
## Risk Assessment
- [x] Blast radius ≤ 10 files: LOW risk
## No-Touch Zones
- `db_connector.py` — không sửa
- `stock_provider.py` — không sửa
- `data_retrieval_tool.py` — không sửa (interface SearchEngine.search() giữ nguyên)
- `pattern_detector.py` — không sửa
- `product_mapping.py` — không sửa
## Commit Plan
### Commit 1: refactor: expand SELECT_COLUMNS to include One Big Table columns
- Files: `search_engine.py`
- Change: Thêm `tags`, `outfit_recommendations`, `description_text_full` vào `SELECT_COLUMNS`
- Test: Chạy search query, verify output có thêm 3 cột mới
- Rollback: `git revert HEAD`
### Commit 2: refactor: replace _enrich_with_outfit with inline JSON parse
- Files: `search_engine.py`
- Change: Xóa hàm `_enrich_with_outfit()`, thay bằng `json.loads()` inline trong method `search()`
- Test: Chạy search, verify `outfit_recommendations` vẫn trả về list[dict]
- Rollback: `git revert HEAD`
### Commit 3: refactor: cleanup unused imports and constants
- Files: `search_engine.py`
- Change: Xóa import `sqlite3`, `os` nếu không dùng nữa. Cleanup comment blocks.
- Test: Chạy search, verify no regression
- Rollback: `git revert HEAD`
"""
Characterization Test: One Big Table Refactor
Self-contained tests - no external dependencies needed.
"""
import json
import sys
import os
# Add backend to path
BACKEND_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "backend")
sys.path.insert(0, BACKEND_DIR)
# ═══════════════════════════════════════════════════════════════
# Test 1: Verify SELECT_COLUMNS via file parsing (no import needed)
# ═══════════════════════════════════════════════════════════════
def test_select_columns_contain_new_fields():
"""Verify SELECT_COLUMNS includes the 3 new One Big Table columns."""
search_engine_path = os.path.join(BACKEND_DIR, "agent", "tools", "tool_module", "search_engine.py")
with open(search_engine_path, "r", encoding="utf-8") as f:
content = f.read()
# Extract the SELECT_COLUMNS string
assert "tags," in content or "tags\n" in content, "Missing 'tags' in SELECT_COLUMNS"
assert "outfit_recommendations" in content, "Missing 'outfit_recommendations' in SELECT_COLUMNS"
assert "description_text_full" in content, "Missing 'description_text_full' in SELECT_COLUMNS"
# Old columns still present
assert "internal_ref_code" in content, "Missing 'internal_ref_code'"
assert "sale_price" in content, "Missing 'sale_price'"
assert "description_text" in content, "Missing 'description_text'"
print("✅ test_select_columns_contain_new_fields PASSED")
# ═══════════════════════════════════════════════════════════════
# Test 2: Verify _enrich_with_outfit is REMOVED
# ═══════════════════════════════════════════════════════════════
def test_enrich_with_outfit_removed():
"""Verify the old multi-table query method is removed."""
search_engine_path = os.path.join(BACKEND_DIR, "agent", "tools", "tool_module", "search_engine.py")
with open(search_engine_path, "r", encoding="utf-8") as f:
content = f.read()
assert "async def _enrich_with_outfit" not in content, "_enrich_with_outfit should be REMOVED"
assert "pg__dashboard_canifa__ai_outfit_product_matches" not in content, "Old table reference should be REMOVED"
assert "sqlite3.connect" not in content, "sqlite3.connect should be REMOVED from search_engine"
assert "SQLITE_DB_PATH" not in content, "SQLITE_DB_PATH import should be REMOVED"
print("✅ test_enrich_with_outfit_removed PASSED")
# ═══════════════════════════════════════════════════════════════
# Test 3: Verify _parse_outfit_recommendations EXISTS
# ═══════════════════════════════════════════════════════════════
def test_parse_method_exists():
"""Verify the new static method replacement exists."""
search_engine_path = os.path.join(BACKEND_DIR, "agent", "tools", "tool_module", "search_engine.py")
with open(search_engine_path, "r", encoding="utf-8") as f:
content = f.read()
assert "def _parse_outfit_recommendations" in content, "_parse_outfit_recommendations method should exist"
assert "json.loads(raw)" in content, "Should use json.loads to parse outfit JSON"
assert "json.JSONDecodeError" in content, "Should handle JSONDecodeError gracefully"
print("✅ test_parse_method_exists PASSED")
# ═══════════════════════════════════════════════════════════════
# Test 4: JSON parse logic (pure unit test, no imports)
# ═══════════════════════════════════════════════════════════════
def test_json_parse_logic():
"""Test the JSON parse logic directly."""
# Simulate _parse_outfit_recommendations
def parse_outfit(products):
for p in products:
raw = p.get("outfit_recommendations")
if raw and isinstance(raw, str):
try:
p["outfit_recommendations"] = json.loads(raw)
except (json.JSONDecodeError, TypeError):
p["outfit_recommendations"] = []
elif not raw:
p["outfit_recommendations"] = []
return products
# Case 1: Valid JSON
products = [{"outfit_recommendations": json.dumps([
{"match_product_code": "A001", "role": "top", "reason": "Test"}
])}]
result = parse_outfit(products)
assert isinstance(result[0]["outfit_recommendations"], list)
assert len(result[0]["outfit_recommendations"]) == 1
assert result[0]["outfit_recommendations"][0]["role"] == "top"
# Case 2: None
products = [{"outfit_recommendations": None}]
result = parse_outfit(products)
assert result[0]["outfit_recommendations"] == []
# Case 3: Missing key
products = [{"internal_ref_code": "X"}]
result = parse_outfit(products)
assert result[0]["outfit_recommendations"] == []
# Case 4: Invalid JSON
products = [{"outfit_recommendations": "NOT JSON {{{"}]
result = parse_outfit(products)
assert result[0]["outfit_recommendations"] == []
# Case 5: Already a list (should remain untouched)
products = [{"outfit_recommendations": [{"role": "top"}]}]
result = parse_outfit(products)
assert result[0]["outfit_recommendations"] == [{"role": "top"}]
print("✅ test_json_parse_logic PASSED (5 sub-cases)")
# ═══════════════════════════════════════════════════════════════
# Test 5: Verify search() still calls _parse_outfit_recommendations
# ═══════════════════════════════════════════════════════════════
def test_search_calls_parse():
"""Verify search() method calls _parse_outfit_recommendations instead of _enrich_with_outfit."""
search_engine_path = os.path.join(BACKEND_DIR, "agent", "tools", "tool_module", "search_engine.py")
with open(search_engine_path, "r", encoding="utf-8") as f:
content = f.read()
assert "_parse_outfit_recommendations(products)" in content, "search() should call _parse_outfit_recommendations"
assert "await self._enrich_with_outfit" not in content, "search() should NOT call _enrich_with_outfit"
print("✅ test_search_calls_parse PASSED")
# ═══════════════════════════════════════════════════════════════
# Test 6: Verify no unused imports
# ═══════════════════════════════════════════════════════════════
def test_clean_imports():
"""Verify os and sqlite3 are not imported."""
search_engine_path = os.path.join(BACKEND_DIR, "agent", "tools", "tool_module", "search_engine.py")
with open(search_engine_path, "r", encoding="utf-8") as f:
content = f.read()
assert "import sqlite3" not in content, "sqlite3 should not be imported"
assert "import os" not in content, "os should not be imported"
print("✅ test_clean_imports PASSED")
# ═══════════════════════════════════════════════════════════════
# RUNNER
# ═══════════════════════════════════════════════════════════════
if __name__ == "__main__":
print("=" * 60)
print("🧪 ONE BIG TABLE REFACTOR — CHARACTERIZATION TESTS")
print("=" * 60)
test_select_columns_contain_new_fields()
test_enrich_with_outfit_removed()
test_parse_method_exists()
test_json_parse_logic()
test_search_calls_parse()
test_clean_imports()
print("=" * 60)
print("🏆 ALL 6 TESTS PASSED!")
print("=" * 60)
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment