Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
C
chatbot-canifa-feedback
Project
Project
Details
Activity
Releases
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
1
Merge Requests
1
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Vũ Hoàng Anh
chatbot-canifa-feedback
Commits
eea82e60
Commit
eea82e60
authored
Apr 23, 2026
by
Vũ Hoàng Anh
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
feat: refactor AI Tagging to output semantic natural language tags and fix unicode logging
parent
b3888b0b
Changes
5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
936 additions
and
400 deletions
+936
-400
bulk_ops_route.py
backend/api/product_desc/bulk_ops_route.py
+14
-13
tags_direct_route.py
backend/api/product_desc/tags_direct_route.py
+369
-0
ultra_desc_db.py
backend/common/ultra_desc_db.py
+62
-53
extract_seasons.py
backend/database/starrocks/extract_seasons.py
+36
-0
product-desc.html
backend/static/product-desc/product-desc.html
+455
-334
No files found.
backend/api/product_desc/bulk_ops_route.py
View file @
eea82e60
...
...
@@ -285,18 +285,19 @@ CANIFA_LINES = [
]
# Attempt to dynamically fetch product lines from DB to override the static list
try
:
_conn
=
get_pooled_connection_compat
()
_cur
=
_conn
.
cursor
()
_cur
.
execute
(
f
"SELECT DISTINCT product_line FROM {PG_TABLE} WHERE product_line IS NOT NULL AND product_line != ''"
)
_db_lines
=
[
r
[
0
]
for
r
in
_cur
.
fetchall
()]
_cur
.
close
()
_conn
.
close
()
if
_db_lines
:
CANIFA_LINES
=
_db_lines
logger
.
info
(
f
"✅ Loaded {len(CANIFA_LINES)} product lines dynamically from DB"
)
except
Exception
as
e
:
logger
.
warning
(
f
"⚠️ Could not load product lines from DB, using fallback. Error: {e}"
)
# Removed dynamic loading on import to prevent blocking thread when DB is slow
# try:
# _conn = get_pooled_connection_compat()
# _cur = _conn.cursor()
# _cur.execute(f"SELECT DISTINCT product_line FROM {PG_TABLE} WHERE product_line IS NOT NULL AND product_line != ''")
# _db_lines = [r[0] for r in _cur.fetchall()]
# _cur.close()
# _conn.close()
# if _db_lines:
# CANIFA_LINES = _db_lines
# logger.info(f"✅ Loaded {len(CANIFA_LINES)} product lines dynamically from DB")
# except Exception as e:
# logger.warning(f"⚠️ Could not load product lines from DB, using fallback. Error: {e}")
AI_SEARCH_SYSTEM
=
"""Bạn là AI phân tích ý định tìm kiếm sản phẩm thời trang Canifa.
...
...
@@ -945,7 +946,7 @@ async def batch_generate_tags():
conn
=
get_pooled_connection_compat
()
cur
=
conn
.
cursor
()
# Find up to 500 products that either don't have the tags column populated or it's empty
cur
.
execute
(
f
"SELECT internal_ref_code FROM {PG_TABLE} WHERE tags IS NULL
OR tags = '[]'::jsonb
LIMIT 500"
)
cur
.
execute
(
f
"SELECT internal_ref_code FROM {PG_TABLE} WHERE tags IS NULL LIMIT 500"
)
rows
=
cur
.
fetchall
()
if
not
rows
:
...
...
backend/api/product_desc/tags_direct_route.py
0 → 100644
View file @
eea82e60
This diff is collapsed.
Click to expand it.
backend/common/ultra_desc_db.py
View file @
eea82e60
...
...
@@ -28,52 +28,61 @@ class UltraDescriptionDB:
"""Create ultra_descriptions table if it doesn't exist."""
if
cls
.
_initialized
:
return
from
config
import
USE_LOCAL_SQLITE
conn
=
None
try
:
conn
=
get_pooled_connection_compat
()
cur
=
conn
.
cursor
()
cur
.
execute
(
f
"""
CREATE TABLE IF NOT EXISTS {TABLE} (
id SERIAL PRIMARY KEY,
internal_ref_code VARCHAR(50) NOT NULL,
product_name VARCHAR(500),
product_image_url TEXT,
product_line VARCHAR(200),
description_data JSONB NOT NULL,
phase VARCHAR(20) DEFAULT 'enriched',
status SMALLINT DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
CREATE INDEX IF NOT EXISTS idx_ultra_desc_ref_code
ON {TABLE}(internal_ref_code);
-- Migration: add columns if table already existed
ALTER TABLE {TABLE} ADD COLUMN IF NOT EXISTS status SMALLINT DEFAULT 0;
ALTER TABLE {TABLE} ADD COLUMN IF NOT EXISTS clean_description TEXT DEFAULT '';
ALTER TABLE {TABLE} ADD COLUMN IF NOT EXISTS tags JSONB DEFAULT '[]'::jsonb;
-- Migration v2: magento_ref_code support
ALTER TABLE {TABLE} ADD COLUMN IF NOT EXISTS magento_ref_code VARCHAR(100);
ALTER TABLE {TABLE} ADD COLUMN IF NOT EXISTS base_ref_code VARCHAR(50);
"""
)
# Drop old unique constraint on internal_ref_code (v2 migration: allow multiple colors per style)
try
:
cur
.
execute
(
f
"ALTER TABLE {TABLE} DROP CONSTRAINT IF EXISTS ultra_descriptions_internal_ref_code_key"
)
except
Exception
:
pass
# Non-unique index on internal_ref_code (for grouping)
try
:
cur
.
execute
(
f
"CREATE INDEX IF NOT EXISTS idx_ultra_desc_internal_code ON {TABLE}(internal_ref_code)"
)
except
Exception
:
pass
# Unique index on magento_ref_code (partial, ignores NULLs)
try
:
cur
.
execute
(
f
"""CREATE UNIQUE INDEX IF NOT EXISTS idx_ultra_desc_magento_code
ON {TABLE}(magento_ref_code) WHERE magento_ref_code IS NOT NULL;"""
)
except
Exception
:
pass
# already exists
if
USE_LOCAL_SQLITE
:
# SQLite: Create full table at once with all columns
cur
.
execute
(
f
"""
CREATE TABLE IF NOT EXISTS {TABLE} (
id INTEGER PRIMARY KEY AUTOINCREMENT,
internal_ref_code TEXT NOT NULL,
magento_ref_code TEXT,
base_ref_code TEXT,
product_name TEXT,
product_image_url TEXT,
product_line TEXT,
description_data TEXT NOT NULL,
phase TEXT DEFAULT 'enriched',
status INTEGER DEFAULT 0,
clean_description TEXT DEFAULT '',
tags TEXT DEFAULT '[]',
ai_matches TEXT,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
"""
)
else
:
# PostgreSQL: Original logic
cur
.
execute
(
f
"""
CREATE TABLE IF NOT EXISTS {TABLE} (
id SERIAL PRIMARY KEY,
internal_ref_code VARCHAR(50) NOT NULL,
product_name VARCHAR(500),
product_image_url TEXT,
product_line VARCHAR(200),
description_data JSONB NOT NULL,
phase VARCHAR(20) DEFAULT 'enriched',
status SMALLINT DEFAULT 0,
created_at TIMESTAMPTZ DEFAULT NOW(),
updated_at TIMESTAMPTZ DEFAULT NOW()
);
ALTER TABLE {TABLE} ADD COLUMN IF NOT EXISTS status SMALLINT DEFAULT 0;
ALTER TABLE {TABLE} ADD COLUMN IF NOT EXISTS clean_description TEXT DEFAULT '';
ALTER TABLE {TABLE} ADD COLUMN IF NOT EXISTS tags JSONB DEFAULT '[]'::jsonb;
ALTER TABLE {TABLE} ADD COLUMN IF NOT EXISTS magento_ref_code VARCHAR(100);
ALTER TABLE {TABLE} ADD COLUMN IF NOT EXISTS base_ref_code VARCHAR(50);
ALTER TABLE {TABLE} ADD COLUMN IF NOT EXISTS ai_matches JSONB;
"""
)
cur
.
close
()
conn
.
commit
()
cls
.
_initialized
=
True
logger
.
info
(
"✅ Table
%
s ready
"
,
TABL
E
)
logger
.
info
(
"✅ Table
%
s ready
(Mock:
%
s)"
,
TABLE
,
USE_LOCAL_SQLIT
E
)
except
Exception
as
e
:
logger
.
error
(
"Error creating ultra_descriptions table:
%
s"
,
e
)
finally
:
...
...
@@ -134,9 +143,9 @@ class UltraDescriptionDB:
json
.
dumps
(
description_data
,
ensure_ascii
=
False
),
phase
,
clean_description
),
)
row
=
cur
.
fetchone
()
cur
.
close
()
row_id
=
row
[
0
]
if
row
else
None
conn
.
commit
()
# critical: without this INSERT/UPDATE rolls back on conn.close()
cur
.
close
()
# Close cursor FIRST
conn
.
commit
()
logger
.
info
(
"💾 Saved ultra desc:
%
s / magento=
%
s (id=
%
s)"
,
internal_ref_code
,
magento_ref_code
,
row_id
)
return
row_id
except
Exception
as
e
:
...
...
@@ -565,7 +574,7 @@ class UltraDescriptionDB:
try
:
conn
=
get_pooled_connection_compat
()
cur
=
conn
.
cursor
()
cur
.
execute
(
f
"SELECT COUNT(*) FROM {TABLE} WHERE tags IS NULL
OR tags = '[]'::jsonb
"
)
cur
.
execute
(
f
"SELECT COUNT(*) FROM {TABLE} WHERE tags IS NULL"
)
count
=
cur
.
fetchone
()[
0
]
cur
.
close
()
return
count
...
...
@@ -618,11 +627,11 @@ class UltraDescriptionDB:
conn
.
close
()
#
Auto-init table on import
try
:
UltraDescriptionDB
.
ensure_table
()
except
Exception
:
pass
#
Removed auto-init on import to prevent blocking thread when DB is slow
#
try:
#
UltraDescriptionDB.ensure_table()
#
except Exception:
#
pass
# ═══════════════════════════════════════════════════════════════
...
...
@@ -848,8 +857,8 @@ class DescFieldConfig:
conn
.
close
()
#
Auto-init field config table
try
:
DescFieldConfig
.
ensure_table
()
except
Exception
:
pass
#
Removed auto-init on import to prevent blocking thread when DB is slow
#
try:
#
DescFieldConfig.ensure_table()
#
except Exception:
#
pass
backend/database/starrocks/extract_seasons.py
0 → 100644
View file @
eea82e60
import
re
from
collections
import
Counter
with
open
(
'test_db.magento_product_dimension_with_text_embedding.sql'
,
encoding
=
'utf-8'
,
errors
=
'ignore'
)
as
f
:
data
=
f
.
read
()
# Find the column order from INSERT statement
col_match
=
re
.
search
(
r'INSERT INTO[^(]+\(([^)]+)\)'
,
data
,
re
.
DOTALL
)
if
col_match
:
cols
=
[
c
.
strip
()
.
strip
(
'`'
)
for
c
in
col_match
.
group
(
1
)
.
split
(
','
)]
season_idx
=
cols
.
index
(
'season'
)
if
'season'
in
cols
else
-
1
print
(
f
"Column order found. 'season' is at index: {season_idx}"
)
print
(
f
"Columns around season: {cols[max(0,season_idx-2):season_idx+3]}"
)
else
:
print
(
"Could not find INSERT column list"
)
season_idx
=
-
1
# Extract VALUES rows - find all value tuples
# Look for patterns like ('...', '...', NULL, ...)
rows
=
re
.
findall
(
r'\(([^;]+?)\)(?:,|\s*;)'
,
data
,
re
.
DOTALL
)
print
(
f
"
\n
Total value rows found: {len(rows)}"
)
if
season_idx
>=
0
and
rows
:
seasons
=
[]
for
row
in
rows
[:
50
]:
# sample first 50
# Split by comma but respect quoted strings
parts
=
re
.
split
(
r",(?=(?:[^']*'[^']*')*[^']*$)"
,
row
.
strip
())
if
len
(
parts
)
>
season_idx
:
val
=
parts
[
season_idx
]
.
strip
()
.
strip
(
"'"
)
seasons
.
append
(
val
)
counts
=
Counter
(
seasons
)
print
(
f
"
\n
Season values (sample from first 50 rows):"
)
print
(
"-"
*
40
)
for
s
,
c
in
sorted
(
counts
.
items
(),
key
=
lambda
x
:
-
x
[
1
]):
print
(
f
"{c:5d} | '{s}'"
)
backend/static/product-desc/product-desc.html
View file @
eea82e60
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment