Skip to content
Toggle navigation
Toggle navigation
This project
Loading...
Sign in
柴进
/
DesignImageSearch
Go to a project
Toggle navigation
Toggle navigation pinning
Projects
Groups
Snippets
Help
Project
Activity
Repository
Pipelines
Graphs
Issues
0
Merge Requests
0
Wiki
Network
Create a new issue
Builds
Commits
Issue Boards
Files
Commits
Network
Compare
Branches
Tags
3618fbd3
authored
2025-12-19 20:37:25 +0800
by
柴进
Browse Files
Options
Browse Files
Tag
Download
Email Patches
Plain Diff
fix:修复图像搜索的归一性问题
1 parent
7a8809ce
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
66 additions
and
17 deletions
app.py
core/faiss_manager.py
data_sync.py
app.py
View file @
3618fbd
...
...
@@ -22,6 +22,7 @@ from dotenv import load_dotenv
from
apscheduler.schedulers.background
import
BackgroundScheduler
from
apscheduler.triggers.cron
import
CronTrigger
# 尝试导入不同的 JWT 库
try
:
import
jwt
...
...
@@ -53,6 +54,8 @@ search_engine = None
data_sync
=
None
sync_thread
=
None
scheduler
=
None
# 全局 FAISS 管理器实例
faiss_manager
=
None
os
.
environ
[
'KMP_DUPLICATE_LIB_OK'
]
=
'TRUE'
...
...
@@ -92,6 +95,12 @@ def scheduled_sync():
def
check_faiss_index_update
():
"""检查FAISS索引文件是否有更新,必要时重载"""
global
faiss_manager
# 如果全局 faiss_manager 未初始化,直接返回
if
faiss_manager
is
None
:
return
try
:
import
os
...
...
@@ -109,13 +118,6 @@ def check_faiss_index_update():
os
.
path
.
getmtime
(
mapping_path
),
os
.
path
.
getmtime
(
tombstone_path
)
)
# 初始化 FAISS 管理器
faiss_manager
=
FAISSManager
(
index_path
=
config
[
'faiss'
][
'index_path'
],
mapping_path
=
config
[
'faiss'
][
'mapping_path'
],
tombstone_path
=
config
[
'faiss'
][
'tombstone_path'
],
vector_dim
=
config
[
'faiss'
][
'vector_dim'
]
)
# 初始化或检查上次加载时间
if
not
hasattr
(
check_faiss_index_update
,
'last_mtime'
):
...
...
@@ -147,7 +149,7 @@ def check_faiss_index_update():
@asynccontextmanager
async
def
lifespan
(
app
:
FastAPI
):
"""应用生命周期管理"""
global
config
,
db_manager
,
search_engine
,
data_sync
,
sync_thread
,
scheduler
global
config
,
db_manager
,
search_engine
,
data_sync
,
sync_thread
,
scheduler
,
faiss_manager
logger
.
info
(
"启动 Design Image Search 服务..."
)
...
...
@@ -201,8 +203,10 @@ async def lifespan(app: FastAPI):
sync_thread
.
start
()
logger
.
info
(
"后台数据同步线程已启动"
)
# 启动定时任务
(每天 0:00 和 12:00)
# 启动定时任务
scheduler
=
BackgroundScheduler
()
# 每天 0:00 和 12:00 执行同步任务
scheduler
.
add_job
(
func
=
scheduled_sync
,
trigger
=
CronTrigger
(
hour
=
'0,12'
,
minute
=
'0'
),
...
...
@@ -210,7 +214,7 @@ async def lifespan(app: FastAPI):
replace_existing
=
True
)
#
添加 FAISS 索引检查任务(每5分钟检查一次
)
#
每分钟检查一次索引更新(data_sync会用os.utime通知我们
)
scheduler
.
add_job
(
func
=
check_faiss_index_update
,
trigger
=
CronTrigger
(
minute
=
'*/5'
),
...
...
core/faiss_manager.py
View file @
3618fbd
...
...
@@ -240,10 +240,10 @@ class FAISSManager:
continue
# 转换距离为相似度
#
FAISS IndexHNSWFlat返回L2距离平方:distance=0表示完全相同
#
转换为相似度:similarity = 1 / (1 + distance)
#
这样:distance=0 → similarity=1.0, distance越大similarity越接近0
similarity
=
1.0
/
(
1.0
+
float
(
dist
)
)
#
注意:索引使用METRIC_INNER_PRODUCT(内积度量)
#
对于L2归一化的向量,内积就是余弦相似度
#
dist值越大表示越相似(范围约为[0, 1],1表示完全相同)
similarity
=
float
(
dist
)
results
.
append
((
img_id
,
similarity
))
if
len
(
results
)
>=
top_k
:
...
...
data_sync.py
View file @
3618fbd
...
...
@@ -420,15 +420,60 @@ class DesignDataSync:
}
def
run_forever
(
self
):
"""定时同步,60 秒间隔"""
"""定时同步,60 秒间隔,0点和12点执行完整重建"""
import
datetime
import
time
interval
=
self
.
config
[
'sync'
][
'interval_seconds'
]
logger
.
info
(
f
"启动定时同步,间隔 {interval} 秒"
)
logger
.
info
(
f
"启动定时同步,间隔 {interval} 秒,0点和12点执行索引重建"
)
last_rebuild_date
=
None
while
True
:
try
:
self
.
sync_once
()
current_time
=
datetime
.
datetime
.
now
()
current_hour
=
current_time
.
hour
current_date
=
current_time
.
date
()
# 检查是否到了0点或12点,且今天还没有重建过
if
current_hour
in
[
0
,
18
]
and
last_rebuild_date
!=
current_date
:
logger
.
info
(
f
"🔄 {current_hour}点:开始执行完整索引重建..."
)
# 执行完整同步
result
=
self
.
sync_once
()
# 强制重建FAISS索引(清理墓碑)
logger
.
info
(
"🔧 开始强制重建FAISS索引..."
)
rebuild_start
=
time
.
time
()
try
:
if
self
.
faiss_manager
.
rebuild_index
(
self
.
db_manager
):
rebuild_time
=
time
.
time
()
-
rebuild_start
logger
.
info
(
f
"✅ 索引重建成功,耗时 {rebuild_time:.2f}秒"
)
logger
.
info
(
f
" 重建后统计: {self.faiss_manager.get_stats()}"
)
# 通知搜索服务重载索引(通过更新文件时间戳)
import
os
index_path
=
self
.
config
[
'faiss'
][
'index_path'
]
if
os
.
path
.
exists
(
index_path
):
# 更新文件修改时间,触发app.py重载
os
.
utime
(
index_path
)
logger
.
info
(
"已更新索引文件时间戳,通知搜索服务重载"
)
else
:
logger
.
error
(
"❌ 索引重建失败"
)
except
Exception
as
e
:
logger
.
error
(
f
"索引重建异常: {e}"
,
exc_info
=
True
)
# 记录今天已经重建过
last_rebuild_date
=
current_date
logger
.
info
(
"✅ 完整索引重建流程完成"
)
else
:
# 常规增量同步
self
.
sync_once
()
logger
.
info
(
f
"等待 {interval} 秒后进行下次同步..."
)
time
.
sleep
(
interval
)
except
KeyboardInterrupt
:
logger
.
info
(
"收到中断信号,停止同步"
)
break
...
...
Please
register
or
sign in
to post a comment