Merge branch 'refactor/one-big-table' into feature/change-architect-agent

3e2fa498 · Vũ Hoàng Anh · ffe458a6 · a3c02b90 · 3e2fa498 · 3e2fa498
Commit 3e2fa498 authored May 11, 2026 by Vũ Hoàng Anh
Showing with 223 additions and 29 deletions

search_engine.py backend/agent/tools/tool_module/search_engine.py +17 -29

01_refactor_one_big_table.md plan/doings/01_refactor_one_big_table.md +47 -0

test_one_big_table.py test_one_big_table.py +159 -0

No files found.
--- a/backend/agent/tools/tool_module/search_engine.py
+++ b/backend/agent/tools/tool_module/search_engine.py
 import json
 import logging
-import os as _os
 import re
-import sqlite3
 import time
 import httpx
 import asyncio
@@ -17,7 +15,6 @@ from .pattern_detector import HardPatternDetector
 from .size_message_builder import build_size_message
 # Constants
-from common.constants import SQLITE_DB_PATH
 logger = logging.getLogger(__name__)
@@ -46,7 +43,10 @@ SELECT_COLUMNS = """
    COALESCE(quantity_sold, 0) AS quantity_sold,
    COALESCE(is_new_product, 0) AS is_new_product,
    size_scale,
-    description_text
+    description_text,
+    tags,
+    outfit_recommendations,
+    description_text_full
 """
 # ==============================================================================
@@ -332,30 +332,18 @@ class SearchEngine:
        return [], 0, "No results found"
-    async def _enrich_with_outfit(self, products: list) -> list:
+    @staticmethod
-        if not products or not _os.path.exists(SQLITE_DB_PATH): return products
+    def _parse_outfit_recommendations(products: list) -> list:
-        top_codes = [p.get("internal_ref_code") for p in products[:3] if p.get("internal_ref_code")]
+        """Parse outfit_recommendations JSON string from One Big Table into list[dict]."""
-        if not top_codes: return products
+        for p in products:
-        try:
+            raw = p.get("outfit_recommendations")
-            conn = sqlite3.connect(SQLITE_DB_PATH)
+            if raw and isinstance(raw, str):
-            conn.row_factory = sqlite3.Row
+                try:
-            cursor = conn.cursor()
+                    p["outfit_recommendations"] = json.loads(raw)
-            placeholders = ",".join(["?"] * len(top_codes))
+                except (json.JSONDecodeError, TypeError):
-            outfits = cursor.execute(f"SELECT * FROM pg__dashboard_canifa__ai_outfit_product_matches WHERE anchor_product_code IN ({placeholders})", top_codes).fetchall()
+                    p["outfit_recommendations"] = []
-            conn.close()
+            elif not raw:
+                p["outfit_recommendations"] = []
-            outfit_map = {}
-            for row in outfits:
-                anchor = row["anchor_product_code"]
-                outfit_map.setdefault(anchor, []).append({
-                    "code": row["match_product_code"],
-                    "name": row["match_product_name"],
-                    "role": row["match_role"],
-                    "reason": row["ai_reason"]
-                })
-            for p in products:
-                p["outfit_recommendations"] = outfit_map.get(p.get("internal_ref_code"), [])
-        except Exception as e: logger.error(f"Outfit error: {e}")
        return products
    async def search(self, literal: str, inferred: Dict[str, Any], check_stock: bool = True) -> Dict[str, Any]:
@@ -375,7 +363,7 @@ class SearchEngine:
        if products:
            if check_stock: products = await enrich_with_stock(products)
-            products = await self._enrich_with_outfit(products)
+            products = self._parse_outfit_recommendations(products)
            for p in products:
                raw_size = p.get("size_scale", "")
                parsed = [s.strip() for s in str(raw_size).replace("[", "").replace("]", "").replace('"', '').split(",") if s.strip()]

--- a/plan/doings/01_refactor_one_big_table.md
+++ b/plan/doings/01_refactor_one_big_table.md
+# Refactor: One Big Table — Loại bỏ multi-table joins
+## Scope
+- **What changes:** `search_engine.py` (core), `__init__.py` (exports)
+- **What does NOT change:** `db_connector.py`, `stock_provider.py`, `pattern_detector.py`, `product_mapping.py`, `size_message_builder.py`, `data_retrieval_tool.py` (interface giữ nguyên)
+- **Blast radius:** 2 files (LOW risk)
+## Current State
+- `SearchEngine._enrich_with_outfit()` mở kết nối SQLite riêng, query bảng `pg__dashboard_canifa__ai_outfit_product_matches` để lấy outfit recommendations.
+- `SELECT_COLUMNS` chỉ lấy 19 cột cơ bản, thiếu `tags`, `outfit_recommendations`, `description_data_cut`.
+- Python phải gánh logic JOIN data từ nhiều bảng.
+## Target State
+- `SELECT_COLUMNS` mở rộng lấy thêm `tags`, `outfit_recommendations`, `description_text_full` (đã rename từ `description_data_cut`) trực tiếp từ bảng chính.
+- **XÓA BỎ** hàm `_enrich_with_outfit()` — data đã có sẵn trong cột `outfit_recommendations` dạng JSON.
+- Python chỉ cần `json.loads()` cột `outfit_recommendations` để parse ra list[dict].
+- Output trả ra cho Agent **giữ nguyên 100% format cũ**.
+## Risk Assessment
+- [x] Blast radius ≤ 10 files: LOW risk
+## No-Touch Zones
+- `db_connector.py` — không sửa
+- `stock_provider.py` — không sửa
+- `data_retrieval_tool.py` — không sửa (interface SearchEngine.search() giữ nguyên)
+- `pattern_detector.py` — không sửa
+- `product_mapping.py` — không sửa
+## Commit Plan
+### Commit 1: refactor: expand SELECT_COLUMNS to include One Big Table columns
+- Files: `search_engine.py`
+- Change: Thêm `tags`, `outfit_recommendations`, `description_text_full` vào `SELECT_COLUMNS`
+- Test: Chạy search query, verify output có thêm 3 cột mới
+- Rollback: `git revert HEAD`
+### Commit 2: refactor: replace _enrich_with_outfit with inline JSON parse
+- Files: `search_engine.py`
+- Change: Xóa hàm `_enrich_with_outfit()`, thay bằng `json.loads()` inline trong method `search()`
+- Test: Chạy search, verify `outfit_recommendations` vẫn trả về list[dict]
+- Rollback: `git revert HEAD`
+### Commit 3: refactor: cleanup unused imports and constants
+- Files: `search_engine.py`
+- Change: Xóa import `sqlite3`, `os` nếu không dùng nữa. Cleanup comment blocks.
+- Test: Chạy search, verify no regression
+- Rollback: `git revert HEAD`
--- a/test_one_big_table.py
+++ b/test_one_big_table.py
+"""
+Characterization Test: One Big Table Refactor
+Self-contained tests - no external dependencies needed.
+"""
+import json
+import sys
+import os
+# Add backend to path
+BACKEND_DIR = os.path.join(os.path.dirname(os.path.abspath(__file__)), "backend")
+sys.path.insert(0, BACKEND_DIR)
+# ═══════════════════════════════════════════════════════════════
+# Test 1: Verify SELECT_COLUMNS via file parsing (no import needed)
+# ═══════════════════════════════════════════════════════════════
+def test_select_columns_contain_new_fields():
+    """Verify SELECT_COLUMNS includes the 3 new One Big Table columns."""
+    search_engine_path = os.path.join(BACKEND_DIR, "agent", "tools", "tool_module", "search_engine.py")
+    with open(search_engine_path, "r", encoding="utf-8") as f:
+        content = f.read()
+    # Extract the SELECT_COLUMNS string
+    assert "tags," in content or "tags\n" in content, "Missing 'tags' in SELECT_COLUMNS"
+    assert "outfit_recommendations" in content, "Missing 'outfit_recommendations' in SELECT_COLUMNS"
+    assert "description_text_full" in content, "Missing 'description_text_full' in SELECT_COLUMNS"
+    # Old columns still present
+    assert "internal_ref_code" in content, "Missing 'internal_ref_code'"
+    assert "sale_price" in content, "Missing 'sale_price'"
+    assert "description_text" in content, "Missing 'description_text'"
+    print("✅ test_select_columns_contain_new_fields PASSED")
+# ═══════════════════════════════════════════════════════════════
+# Test 2: Verify _enrich_with_outfit is REMOVED
+# ═══════════════════════════════════════════════════════════════
+def test_enrich_with_outfit_removed():
+    """Verify the old multi-table query method is removed."""
+    search_engine_path = os.path.join(BACKEND_DIR, "agent", "tools", "tool_module", "search_engine.py")
+    with open(search_engine_path, "r", encoding="utf-8") as f:
+        content = f.read()
+    assert "async def _enrich_with_outfit" not in content, "_enrich_with_outfit should be REMOVED"
+    assert "pg__dashboard_canifa__ai_outfit_product_matches" not in content, "Old table reference should be REMOVED"
+    assert "sqlite3.connect" not in content, "sqlite3.connect should be REMOVED from search_engine"
+    assert "SQLITE_DB_PATH" not in content, "SQLITE_DB_PATH import should be REMOVED"
+    print("✅ test_enrich_with_outfit_removed PASSED")
+# ═══════════════════════════════════════════════════════════════
+# Test 3: Verify _parse_outfit_recommendations EXISTS
+# ═══════════════════════════════════════════════════════════════
+def test_parse_method_exists():
+    """Verify the new static method replacement exists."""
+    search_engine_path = os.path.join(BACKEND_DIR, "agent", "tools", "tool_module", "search_engine.py")
+    with open(search_engine_path, "r", encoding="utf-8") as f:
+        content = f.read()
+    assert "def _parse_outfit_recommendations" in content, "_parse_outfit_recommendations method should exist"
+    assert "json.loads(raw)" in content, "Should use json.loads to parse outfit JSON"
+    assert "json.JSONDecodeError" in content, "Should handle JSONDecodeError gracefully"
+    print("✅ test_parse_method_exists PASSED")
+# ═══════════════════════════════════════════════════════════════
+# Test 4: JSON parse logic (pure unit test, no imports)
+# ═══════════════════════════════════════════════════════════════
+def test_json_parse_logic():
+    """Test the JSON parse logic directly."""
+    # Simulate _parse_outfit_recommendations
+    def parse_outfit(products):
+        for p in products:
+            raw = p.get("outfit_recommendations")
+            if raw and isinstance(raw, str):
+                try:
+                    p["outfit_recommendations"] = json.loads(raw)
+                except (json.JSONDecodeError, TypeError):
+                    p["outfit_recommendations"] = []
+            elif not raw:
+                p["outfit_recommendations"] = []
+        return products
+    # Case 1: Valid JSON
+    products = [{"outfit_recommendations": json.dumps([
+        {"match_product_code": "A001", "role": "top", "reason": "Test"}
+    ])}]
+    result = parse_outfit(products)
+    assert isinstance(result[0]["outfit_recommendations"], list)
+    assert len(result[0]["outfit_recommendations"]) == 1
+    assert result[0]["outfit_recommendations"][0]["role"] == "top"
+    # Case 2: None
+    products = [{"outfit_recommendations": None}]
+    result = parse_outfit(products)
+    assert result[0]["outfit_recommendations"] == []
+    # Case 3: Missing key
+    products = [{"internal_ref_code": "X"}]
+    result = parse_outfit(products)
+    assert result[0]["outfit_recommendations"] == []
+    # Case 4: Invalid JSON
+    products = [{"outfit_recommendations": "NOT JSON {{{"}]
+    result = parse_outfit(products)
+    assert result[0]["outfit_recommendations"] == []
+    # Case 5: Already a list (should remain untouched)
+    products = [{"outfit_recommendations": [{"role": "top"}]}]
+    result = parse_outfit(products)
+    assert result[0]["outfit_recommendations"] == [{"role": "top"}]
+    print("✅ test_json_parse_logic PASSED (5 sub-cases)")
+# ═══════════════════════════════════════════════════════════════
+# Test 5: Verify search() still calls _parse_outfit_recommendations
+# ═══════════════════════════════════════════════════════════════
+def test_search_calls_parse():
+    """Verify search() method calls _parse_outfit_recommendations instead of _enrich_with_outfit."""
+    search_engine_path = os.path.join(BACKEND_DIR, "agent", "tools", "tool_module", "search_engine.py")
+    with open(search_engine_path, "r", encoding="utf-8") as f:
+        content = f.read()
+    assert "_parse_outfit_recommendations(products)" in content, "search() should call _parse_outfit_recommendations"
+    assert "await self._enrich_with_outfit" not in content, "search() should NOT call _enrich_with_outfit"
+    print("✅ test_search_calls_parse PASSED")
+# ═══════════════════════════════════════════════════════════════
+# Test 6: Verify no unused imports
+# ═══════════════════════════════════════════════════════════════
+def test_clean_imports():
+    """Verify os and sqlite3 are not imported."""
+    search_engine_path = os.path.join(BACKEND_DIR, "agent", "tools", "tool_module", "search_engine.py")
+    with open(search_engine_path, "r", encoding="utf-8") as f:
+        content = f.read()
+    assert "import sqlite3" not in content, "sqlite3 should not be imported"
+    assert "import os" not in content, "os should not be imported"
+    print("✅ test_clean_imports PASSED")
+# ═══════════════════════════════════════════════════════════════
+# RUNNER
+# ═══════════════════════════════════════════════════════════════
+if __name__ == "__main__":
+    print("=" * 60)
+    print("🧪 ONE BIG TABLE REFACTOR — CHARACTERIZATION TESTS")
+    print("=" * 60)
+    test_select_columns_contain_new_fields()
+    test_enrich_with_outfit_removed()
+    test_parse_method_exists()
+    test_json_parse_logic()
+    test_search_calls_parse()
+    test_clean_imports()
+    print("=" * 60)
+    print("🏆 ALL 6 TESTS PASSED!")
+    print("=" * 60)