fix:修复图像搜索的归一性问题
Showing
3 changed files
with
66 additions
and
17 deletions
| ... | @@ -22,6 +22,7 @@ from dotenv import load_dotenv | ... | @@ -22,6 +22,7 @@ from dotenv import load_dotenv |
| 22 | from apscheduler.schedulers.background import BackgroundScheduler | 22 | from apscheduler.schedulers.background import BackgroundScheduler |
| 23 | from apscheduler.triggers.cron import CronTrigger | 23 | from apscheduler.triggers.cron import CronTrigger |
| 24 | 24 | ||
| 25 | |||
| 25 | # 尝试导入不同的 JWT 库 | 26 | # 尝试导入不同的 JWT 库 |
| 26 | try: | 27 | try: |
| 27 | import jwt | 28 | import jwt |
| ... | @@ -53,6 +54,8 @@ search_engine = None | ... | @@ -53,6 +54,8 @@ search_engine = None |
| 53 | data_sync = None | 54 | data_sync = None |
| 54 | sync_thread = None | 55 | sync_thread = None |
| 55 | scheduler = None | 56 | scheduler = None |
| 57 | # 全局 FAISS 管理器实例 | ||
| 58 | faiss_manager = None | ||
| 56 | 59 | ||
| 57 | os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' | 60 | os.environ['KMP_DUPLICATE_LIB_OK'] = 'TRUE' |
| 58 | 61 | ||
| ... | @@ -92,6 +95,12 @@ def scheduled_sync(): | ... | @@ -92,6 +95,12 @@ def scheduled_sync(): |
| 92 | 95 | ||
| 93 | def check_faiss_index_update(): | 96 | def check_faiss_index_update(): |
| 94 | """检查FAISS索引文件是否有更新,必要时重载""" | 97 | """检查FAISS索引文件是否有更新,必要时重载""" |
| 98 | global faiss_manager | ||
| 99 | |||
| 100 | # 如果全局 faiss_manager 未初始化,直接返回 | ||
| 101 | if faiss_manager is None: | ||
| 102 | return | ||
| 103 | |||
| 95 | try: | 104 | try: |
| 96 | import os | 105 | import os |
| 97 | 106 | ||
| ... | @@ -109,13 +118,6 @@ def check_faiss_index_update(): | ... | @@ -109,13 +118,6 @@ def check_faiss_index_update(): |
| 109 | os.path.getmtime(mapping_path), | 118 | os.path.getmtime(mapping_path), |
| 110 | os.path.getmtime(tombstone_path) | 119 | os.path.getmtime(tombstone_path) |
| 111 | ) | 120 | ) |
| 112 | # 初始化 FAISS 管理器 | ||
| 113 | faiss_manager = FAISSManager( | ||
| 114 | index_path=config['faiss']['index_path'], | ||
| 115 | mapping_path=config['faiss']['mapping_path'], | ||
| 116 | tombstone_path=config['faiss']['tombstone_path'], | ||
| 117 | vector_dim=config['faiss']['vector_dim'] | ||
| 118 | ) | ||
| 119 | 121 | ||
| 120 | # 初始化或检查上次加载时间 | 122 | # 初始化或检查上次加载时间 |
| 121 | if not hasattr(check_faiss_index_update, 'last_mtime'): | 123 | if not hasattr(check_faiss_index_update, 'last_mtime'): |
| ... | @@ -147,7 +149,7 @@ def check_faiss_index_update(): | ... | @@ -147,7 +149,7 @@ def check_faiss_index_update(): |
| 147 | @asynccontextmanager | 149 | @asynccontextmanager |
| 148 | async def lifespan(app: FastAPI): | 150 | async def lifespan(app: FastAPI): |
| 149 | """应用生命周期管理""" | 151 | """应用生命周期管理""" |
| 150 | global config, db_manager, search_engine, data_sync, sync_thread, scheduler | 152 | global config, db_manager, search_engine, data_sync, sync_thread, scheduler, faiss_manager |
| 151 | 153 | ||
| 152 | logger.info("启动 Design Image Search 服务...") | 154 | logger.info("启动 Design Image Search 服务...") |
| 153 | 155 | ||
| ... | @@ -201,8 +203,10 @@ async def lifespan(app: FastAPI): | ... | @@ -201,8 +203,10 @@ async def lifespan(app: FastAPI): |
| 201 | sync_thread.start() | 203 | sync_thread.start() |
| 202 | logger.info("后台数据同步线程已启动") | 204 | logger.info("后台数据同步线程已启动") |
| 203 | 205 | ||
| 204 | # 启动定时任务(每天 0:00 和 12:00) | 206 | # 启动定时任务 |
| 205 | scheduler = BackgroundScheduler() | 207 | scheduler = BackgroundScheduler() |
| 208 | |||
| 209 | # 每天 0:00 和 12:00 执行同步任务 | ||
| 206 | scheduler.add_job( | 210 | scheduler.add_job( |
| 207 | func=scheduled_sync, | 211 | func=scheduled_sync, |
| 208 | trigger=CronTrigger(hour='0,12', minute='0'), | 212 | trigger=CronTrigger(hour='0,12', minute='0'), |
| ... | @@ -210,7 +214,7 @@ async def lifespan(app: FastAPI): | ... | @@ -210,7 +214,7 @@ async def lifespan(app: FastAPI): |
| 210 | replace_existing=True | 214 | replace_existing=True |
| 211 | ) | 215 | ) |
| 212 | 216 | ||
| 213 | # 添加 FAISS 索引检查任务(每5分钟检查一次) | 217 | # 每分钟检查一次索引更新(data_sync会用os.utime通知我们) |
| 214 | scheduler.add_job( | 218 | scheduler.add_job( |
| 215 | func=check_faiss_index_update, | 219 | func=check_faiss_index_update, |
| 216 | trigger=CronTrigger(minute='*/5'), | 220 | trigger=CronTrigger(minute='*/5'), | ... | ... |
| ... | @@ -240,10 +240,10 @@ class FAISSManager: | ... | @@ -240,10 +240,10 @@ class FAISSManager: |
| 240 | continue | 240 | continue |
| 241 | 241 | ||
| 242 | # 转换距离为相似度 | 242 | # 转换距离为相似度 |
| 243 | # FAISS IndexHNSWFlat返回L2距离平方:distance=0表示完全相同 | 243 | # 注意:索引使用METRIC_INNER_PRODUCT(内积度量) |
| 244 | # 转换为相似度:similarity = 1 / (1 + distance) | 244 | # 对于L2归一化的向量,内积就是余弦相似度 |
| 245 | # 这样:distance=0 → similarity=1.0, distance越大similarity越接近0 | 245 | # dist值越大表示越相似(范围约为[0, 1],1表示完全相同) |
| 246 | similarity = 1.0 / (1.0 + float(dist)) | 246 | similarity = float(dist) |
| 247 | results.append((img_id, similarity)) | 247 | results.append((img_id, similarity)) |
| 248 | 248 | ||
| 249 | if len(results) >= top_k: | 249 | if len(results) >= top_k: | ... | ... |
| ... | @@ -420,15 +420,60 @@ class DesignDataSync: | ... | @@ -420,15 +420,60 @@ class DesignDataSync: |
| 420 | } | 420 | } |
| 421 | 421 | ||
| 422 | def run_forever(self): | 422 | def run_forever(self): |
| 423 | """定时同步,60 秒间隔""" | 423 | """定时同步,60 秒间隔,0点和12点执行完整重建""" |
| 424 | import datetime | ||
| 425 | import time | ||
| 426 | |||
| 424 | interval = self.config['sync']['interval_seconds'] | 427 | interval = self.config['sync']['interval_seconds'] |
| 425 | logger.info(f"启动定时同步,间隔 {interval} 秒") | 428 | logger.info(f"启动定时同步,间隔 {interval} 秒,0点和12点执行索引重建") |
| 429 | |||
| 430 | last_rebuild_date = None | ||
| 426 | 431 | ||
| 427 | while True: | 432 | while True: |
| 428 | try: | 433 | try: |
| 429 | self.sync_once() | 434 | current_time = datetime.datetime.now() |
| 435 | current_hour = current_time.hour | ||
| 436 | current_date = current_time.date() | ||
| 437 | |||
| 438 | # 检查是否到了0点或12点,且今天还没有重建过 | ||
| 439 | if current_hour in [0, 18] and last_rebuild_date != current_date: | ||
| 440 | logger.info(f"🔄 {current_hour}点:开始执行完整索引重建...") | ||
| 441 | |||
| 442 | # 执行完整同步 | ||
| 443 | result = self.sync_once() | ||
| 444 | |||
| 445 | # 强制重建FAISS索引(清理墓碑) | ||
| 446 | logger.info("🔧 开始强制重建FAISS索引...") | ||
| 447 | rebuild_start = time.time() | ||
| 448 | |||
| 449 | try: | ||
| 450 | if self.faiss_manager.rebuild_index(self.db_manager): | ||
| 451 | rebuild_time = time.time() - rebuild_start | ||
| 452 | logger.info(f"✅ 索引重建成功,耗时 {rebuild_time:.2f}秒") | ||
| 453 | logger.info(f" 重建后统计: {self.faiss_manager.get_stats()}") | ||
| 454 | |||
| 455 | # 通知搜索服务重载索引(通过更新文件时间戳) | ||
| 456 | import os | ||
| 457 | index_path = self.config['faiss']['index_path'] | ||
| 458 | if os.path.exists(index_path): | ||
| 459 | # 更新文件修改时间,触发app.py重载 | ||
| 460 | os.utime(index_path) | ||
| 461 | logger.info("已更新索引文件时间戳,通知搜索服务重载") | ||
| 462 | else: | ||
| 463 | logger.error("❌ 索引重建失败") | ||
| 464 | except Exception as e: | ||
| 465 | logger.error(f"索引重建异常: {e}", exc_info=True) | ||
| 466 | |||
| 467 | # 记录今天已经重建过 | ||
| 468 | last_rebuild_date = current_date | ||
| 469 | logger.info("✅ 完整索引重建流程完成") | ||
| 470 | else: | ||
| 471 | # 常规增量同步 | ||
| 472 | self.sync_once() | ||
| 473 | |||
| 430 | logger.info(f"等待 {interval} 秒后进行下次同步...") | 474 | logger.info(f"等待 {interval} 秒后进行下次同步...") |
| 431 | time.sleep(interval) | 475 | time.sleep(interval) |
| 476 | |||
| 432 | except KeyboardInterrupt: | 477 | except KeyboardInterrupt: |
| 433 | logger.info("收到中断信号,停止同步") | 478 | logger.info("收到中断信号,停止同步") |
| 434 | break | 479 | break | ... | ... |
-
Please register or sign in to post a comment