Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
chatbot canifa
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Vũ Hoàng Anh
chatbot canifa
Commits
3e2fa498
Commit
3e2fa498
authored
May 11, 2026
by
Vũ Hoàng Anh
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'refactor/one-big-table' into feature/change-architect-agent
parents
ffe458a6
a3c02b90
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
223 additions
and
29 deletions
+223
-29
search_engine.py
backend/agent/tools/tool_module/search_engine.py
+17
-29
01_refactor_one_big_table.md
plan/doings/01_refactor_one_big_table.md
+47
-0
test_one_big_table.py
test_one_big_table.py
+159
-0
No files found.
backend/agent/tools/tool_module/search_engine.py
View file @
3e2fa498
import
json
import
logging
import
os
as
_os
import
re
import
sqlite3
import
time
import
httpx
import
asyncio
...
...
@@ -17,7 +15,6 @@ from .pattern_detector import HardPatternDetector
from
.size_message_builder
import
build_size_message
# Constants
from
common.constants
import
SQLITE_DB_PATH
logger
=
logging
.
getLogger
(
__name__
)
...
...
@@ -46,7 +43,10 @@ SELECT_COLUMNS = """
COALESCE(quantity_sold, 0) AS quantity_sold,
COALESCE(is_new_product, 0) AS is_new_product,
size_scale,
description_text
description_text,
tags,
outfit_recommendations,
description_text_full
"""
# ==============================================================================
...
...
@@ -332,30 +332,18 @@ class SearchEngine:
return
[],
0
,
"No results found"
async
def
_enrich_with_outfit
(
self
,
products
:
list
)
->
list
:
if
not
products
or
not
_os
.
path
.
exists
(
SQLITE_DB_PATH
):
return
products
top_codes
=
[
p
.
get
(
"internal_ref_code"
)
for
p
in
products
[:
3
]
if
p
.
get
(
"internal_ref_code"
)]
if
not
top_codes
:
return
products
try
:
conn
=
sqlite3
.
connect
(
SQLITE_DB_PATH
)
conn
.
row_factory
=
sqlite3
.
Row
cursor
=
conn
.
cursor
()
placeholders
=
","
.
join
([
"?"
]
*
len
(
top_codes
))
outfits
=
cursor
.
execute
(
f
"SELECT * FROM pg__dashboard_canifa__ai_outfit_product_matches WHERE anchor_product_code IN ({placeholders})"
,
top_codes
)
.
fetchall
()
conn
.
close
()
outfit_map
=
{}
for
row
in
outfits
:
anchor
=
row
[
"anchor_product_code"
]
outfit_map
.
setdefault
(
anchor
,
[])
.
append
({
"code"
:
row
[
"match_product_code"
],
"name"
:
row
[
"match_product_name"
],
"role"
:
row
[
"match_role"
],
"reason"
:
row
[
"ai_reason"
]
})
for
p
in
products
:
p
[
"outfit_recommendations"
]
=
outfit_map
.
get
(
p
.
get
(
"internal_ref_code"
),
[])
except
Exception
as
e
:
logger
.
error
(
f
"Outfit error: {e}"
)
@
staticmethod
def
_parse_outfit_recommendations
(
products
:
list
)
->
list
:
"""Parse outfit_recommendations JSON string from One Big Table into list[dict]."""
for
p
in
products
:
raw
=
p
.
get
(
"outfit_recommendations"
)
if
raw
and
isinstance
(
raw
,
str
):
try
:
p
[
"outfit_recommendations"
]
=
json
.
loads
(
raw
)
except
(
json
.
JSONDecodeError
,
TypeError
):
p
[
"outfit_recommendations"
]
=
[]
elif
not
raw
:
p
[
"outfit_recommendations"
]
=
[]
return
products
async
def
search
(
self
,
literal
:
str
,
inferred
:
Dict
[
str
,
Any
],
check_stock
:
bool
=
True
)
->
Dict
[
str
,
Any
]:
...
...
@@ -375,7 +363,7 @@ class SearchEngine:
if
products
:
if
check_stock
:
products
=
await
enrich_with_stock
(
products
)
products
=
await
self
.
_enrich_with_outfit
(
products
)
products
=
self
.
_parse_outfit_recommendations
(
products
)
for
p
in
products
:
raw_size
=
p
.
get
(
"size_scale"
,
""
)
parsed
=
[
s
.
strip
()
for
s
in
str
(
raw_size
)
.
replace
(
"["
,
""
)
.
replace
(
"]"
,
""
)
.
replace
(
'"'
,
''
)
.
split
(
","
)
if
s
.
strip
()]
...
...
plan/doings/01_refactor_one_big_table.md
0 → 100644
View file @
3e2fa498
# Refactor: One Big Table — Loại bỏ multi-table joins
## Scope
-
**What changes:**
`search_engine.py`
(core),
`__init__.py`
(exports)
-
**What does NOT change:**
`db_connector.py`
,
`stock_provider.py`
,
`pattern_detector.py`
,
`product_mapping.py`
,
`size_message_builder.py`
,
`data_retrieval_tool.py`
(interface giữ nguyên)
-
**Blast radius:**
2 files (LOW risk)
## Current State
-
`SearchEngine._enrich_with_outfit()`
mở kết nối SQLite riêng, query bảng
`pg__dashboard_canifa__ai_outfit_product_matches`
để lấy outfit recommendations.
-
`SELECT_COLUMNS`
chỉ lấy 19 cột cơ bản, thiếu
`tags`
,
`outfit_recommendations`
,
`description_data_cut`
.
-
Python phải gánh logic JOIN data từ nhiều bảng.
## Target State
-
`SELECT_COLUMNS`
mở rộng lấy thêm
`tags`
,
`outfit_recommendations`
,
`description_text_full`
(đã rename từ
`description_data_cut`
) trực tiếp từ bảng chính.
-
**XÓA BỎ**
hàm
`_enrich_with_outfit()`
— data đã có sẵn trong cột
`outfit_recommendations`
dạng JSON.
-
Python chỉ cần
`json.loads()`
cột
`outfit_recommendations`
để parse ra list
[
dict
]
.
-
Output trả ra cho Agent
**giữ nguyên 100% format cũ**
.
## Risk Assessment
-
[
x
]
Blast radius ≤ 10 files: LOW risk
## No-Touch Zones
-
`db_connector.py`
— không sửa
-
`stock_provider.py`
— không sửa
-
`data_retrieval_tool.py`
— không sửa (interface SearchEngine.search() giữ nguyên)
-
`pattern_detector.py`
— không sửa
-
`product_mapping.py`
— không sửa
## Commit Plan
### Commit 1: refactor: expand SELECT_COLUMNS to include One Big Table columns
-
Files:
`search_engine.py`
-
Change: Thêm
`tags`
,
`outfit_recommendations`
,
`description_text_full`
vào
`SELECT_COLUMNS`
-
Test: Chạy search query, verify output có thêm 3 cột mới
-
Rollback:
`git revert HEAD`
### Commit 2: refactor: replace _enrich_with_outfit with inline JSON parse
-
Files:
`search_engine.py`
-
Change: Xóa hàm
`_enrich_with_outfit()`
, thay bằng
`json.loads()`
inline trong method
`search()`
-
Test: Chạy search, verify
`outfit_recommendations`
vẫn trả về list
[
dict
]
-
Rollback:
`git revert HEAD`
### Commit 3: refactor: cleanup unused imports and constants
-
Files:
`search_engine.py`
-
Change: Xóa import
`sqlite3`
,
`os`
nếu không dùng nữa. Cleanup comment blocks.
-
Test: Chạy search, verify no regression
-
Rollback:
`git revert HEAD`
test_one_big_table.py
0 → 100644
View file @
3e2fa498
"""
Characterization Test: One Big Table Refactor
Self-contained tests - no external dependencies needed.
"""
import
json
import
sys
import
os
# Add backend to path
BACKEND_DIR
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)),
"backend"
)
sys
.
path
.
insert
(
0
,
BACKEND_DIR
)
# ═══════════════════════════════════════════════════════════════
# Test 1: Verify SELECT_COLUMNS via file parsing (no import needed)
# ═══════════════════════════════════════════════════════════════
def
test_select_columns_contain_new_fields
():
"""Verify SELECT_COLUMNS includes the 3 new One Big Table columns."""
search_engine_path
=
os
.
path
.
join
(
BACKEND_DIR
,
"agent"
,
"tools"
,
"tool_module"
,
"search_engine.py"
)
with
open
(
search_engine_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
content
=
f
.
read
()
# Extract the SELECT_COLUMNS string
assert
"tags,"
in
content
or
"tags
\n
"
in
content
,
"Missing 'tags' in SELECT_COLUMNS"
assert
"outfit_recommendations"
in
content
,
"Missing 'outfit_recommendations' in SELECT_COLUMNS"
assert
"description_text_full"
in
content
,
"Missing 'description_text_full' in SELECT_COLUMNS"
# Old columns still present
assert
"internal_ref_code"
in
content
,
"Missing 'internal_ref_code'"
assert
"sale_price"
in
content
,
"Missing 'sale_price'"
assert
"description_text"
in
content
,
"Missing 'description_text'"
print
(
"✅ test_select_columns_contain_new_fields PASSED"
)
# ═══════════════════════════════════════════════════════════════
# Test 2: Verify _enrich_with_outfit is REMOVED
# ═══════════════════════════════════════════════════════════════
def
test_enrich_with_outfit_removed
():
"""Verify the old multi-table query method is removed."""
search_engine_path
=
os
.
path
.
join
(
BACKEND_DIR
,
"agent"
,
"tools"
,
"tool_module"
,
"search_engine.py"
)
with
open
(
search_engine_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
content
=
f
.
read
()
assert
"async def _enrich_with_outfit"
not
in
content
,
"_enrich_with_outfit should be REMOVED"
assert
"pg__dashboard_canifa__ai_outfit_product_matches"
not
in
content
,
"Old table reference should be REMOVED"
assert
"sqlite3.connect"
not
in
content
,
"sqlite3.connect should be REMOVED from search_engine"
assert
"SQLITE_DB_PATH"
not
in
content
,
"SQLITE_DB_PATH import should be REMOVED"
print
(
"✅ test_enrich_with_outfit_removed PASSED"
)
# ═══════════════════════════════════════════════════════════════
# Test 3: Verify _parse_outfit_recommendations EXISTS
# ═══════════════════════════════════════════════════════════════
def
test_parse_method_exists
():
"""Verify the new static method replacement exists."""
search_engine_path
=
os
.
path
.
join
(
BACKEND_DIR
,
"agent"
,
"tools"
,
"tool_module"
,
"search_engine.py"
)
with
open
(
search_engine_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
content
=
f
.
read
()
assert
"def _parse_outfit_recommendations"
in
content
,
"_parse_outfit_recommendations method should exist"
assert
"json.loads(raw)"
in
content
,
"Should use json.loads to parse outfit JSON"
assert
"json.JSONDecodeError"
in
content
,
"Should handle JSONDecodeError gracefully"
print
(
"✅ test_parse_method_exists PASSED"
)
# ═══════════════════════════════════════════════════════════════
# Test 4: JSON parse logic (pure unit test, no imports)
# ═══════════════════════════════════════════════════════════════
def
test_json_parse_logic
():
"""Test the JSON parse logic directly."""
# Simulate _parse_outfit_recommendations
def
parse_outfit
(
products
):
for
p
in
products
:
raw
=
p
.
get
(
"outfit_recommendations"
)
if
raw
and
isinstance
(
raw
,
str
):
try
:
p
[
"outfit_recommendations"
]
=
json
.
loads
(
raw
)
except
(
json
.
JSONDecodeError
,
TypeError
):
p
[
"outfit_recommendations"
]
=
[]
elif
not
raw
:
p
[
"outfit_recommendations"
]
=
[]
return
products
# Case 1: Valid JSON
products
=
[{
"outfit_recommendations"
:
json
.
dumps
([
{
"match_product_code"
:
"A001"
,
"role"
:
"top"
,
"reason"
:
"Test"
}
])}]
result
=
parse_outfit
(
products
)
assert
isinstance
(
result
[
0
][
"outfit_recommendations"
],
list
)
assert
len
(
result
[
0
][
"outfit_recommendations"
])
==
1
assert
result
[
0
][
"outfit_recommendations"
][
0
][
"role"
]
==
"top"
# Case 2: None
products
=
[{
"outfit_recommendations"
:
None
}]
result
=
parse_outfit
(
products
)
assert
result
[
0
][
"outfit_recommendations"
]
==
[]
# Case 3: Missing key
products
=
[{
"internal_ref_code"
:
"X"
}]
result
=
parse_outfit
(
products
)
assert
result
[
0
][
"outfit_recommendations"
]
==
[]
# Case 4: Invalid JSON
products
=
[{
"outfit_recommendations"
:
"NOT JSON {{{"
}]
result
=
parse_outfit
(
products
)
assert
result
[
0
][
"outfit_recommendations"
]
==
[]
# Case 5: Already a list (should remain untouched)
products
=
[{
"outfit_recommendations"
:
[{
"role"
:
"top"
}]}]
result
=
parse_outfit
(
products
)
assert
result
[
0
][
"outfit_recommendations"
]
==
[{
"role"
:
"top"
}]
print
(
"✅ test_json_parse_logic PASSED (5 sub-cases)"
)
# ═══════════════════════════════════════════════════════════════
# Test 5: Verify search() still calls _parse_outfit_recommendations
# ═══════════════════════════════════════════════════════════════
def
test_search_calls_parse
():
"""Verify search() method calls _parse_outfit_recommendations instead of _enrich_with_outfit."""
search_engine_path
=
os
.
path
.
join
(
BACKEND_DIR
,
"agent"
,
"tools"
,
"tool_module"
,
"search_engine.py"
)
with
open
(
search_engine_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
content
=
f
.
read
()
assert
"_parse_outfit_recommendations(products)"
in
content
,
"search() should call _parse_outfit_recommendations"
assert
"await self._enrich_with_outfit"
not
in
content
,
"search() should NOT call _enrich_with_outfit"
print
(
"✅ test_search_calls_parse PASSED"
)
# ═══════════════════════════════════════════════════════════════
# Test 6: Verify no unused imports
# ═══════════════════════════════════════════════════════════════
def
test_clean_imports
():
"""Verify os and sqlite3 are not imported."""
search_engine_path
=
os
.
path
.
join
(
BACKEND_DIR
,
"agent"
,
"tools"
,
"tool_module"
,
"search_engine.py"
)
with
open
(
search_engine_path
,
"r"
,
encoding
=
"utf-8"
)
as
f
:
content
=
f
.
read
()
assert
"import sqlite3"
not
in
content
,
"sqlite3 should not be imported"
assert
"import os"
not
in
content
,
"os should not be imported"
print
(
"✅ test_clean_imports PASSED"
)
# ═══════════════════════════════════════════════════════════════
# RUNNER
# ═══════════════════════════════════════════════════════════════
if
__name__
==
"__main__"
:
print
(
"="
*
60
)
print
(
"🧪 ONE BIG TABLE REFACTOR — CHARACTERIZATION TESTS"
)
print
(
"="
*
60
)
test_select_columns_contain_new_fields
()
test_enrich_with_outfit_removed
()
test_parse_method_exists
()
test_json_parse_logic
()
test_search_calls_parse
()
test_clean_imports
()
print
(
"="
*
60
)
print
(
"🏆 ALL 6 TESTS PASSED!"
)
print
(
"="
*
60
)
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment