Compare commits

..

4 Commits

Author SHA1 Message Date
ouyangyouzhang 9200df7842 新增客服重定向接口 2025-11-28 10:12:41 +08:00
ouyangyouzhang eb361fe77f feat: 添加启动意图识别API服务的脚本
添加专用脚本用于启动rag2_0.api.intent_recognition_api服务
脚本功能包括检测并结束现有screen会话,清理占用端口,最后启动新服务
2025-11-26 11:12:39 +08:00
ouyangyouzhang 4627a2268f Merge branch 'master' of https://git.97id.com/ouyangyouzhang/QueryRewrite 2025-11-26 10:52:28 +08:00
ouyangyouzhang 46f756428e 上线前相关环境变量的修改 2025-11-26 10:49:54 +08:00
18 changed files with 233 additions and 35 deletions
+18 -18
View File
@@ -1,27 +1,27 @@
OPENAI_API_BASE=https://api.siliconflow.cn/v1/
MODEL_NAME=deepseek-ai/DeepSeek-V3
RERANKER_BASE_URL=http://10.1.16.39:9995
RERANKER_MODEL_NAME=bge-reranker-v2-m3
RERANKER_API_KEY=test
# RERANKER_BASE_URL=http://10.1.16.39:9995
# RERANKER_MODEL_NAME=bge-reranker-v2-m3
# RERANKER_API_KEY=test
EMBEDDING_BASE_URL=http://10.1.16.39:9995
EMBEDDING_MODEL_NAME=bge-m3
EMBEDDING_API_KEY=test
# EMBEDDING_BASE_URL=http://10.1.16.39:9995
# EMBEDDING_MODEL_NAME=bge-m3
# EMBEDDING_API_KEY=test
DIFY_BSAE_URL=http://10.1.16.39/v1
DIFY_APP_KEY=app-CPoOMaGDsLRPAe9TW7Xjhszy
DIFY_DATASET_KEY=dataset-skLjmPVonjHo119OWNf3kAmY
# DIFY_BSAE_URL=http://10.1.16.39/v1
# DIFY_APP_KEY=app-CPoOMaGDsLRPAe9TW7Xjhszy
# DIFY_DATASET_KEY=dataset-skLjmPVonjHo119OWNf3kAmY
DIFY_PG_HOST = 10.1.16.39
DIFY_PG_PORT = 5432
DIFY_PG_USER = postgres
DIFY_PG_PASSWORD = difyai123456
DIFY_PG_DATABASE = dify
# DIFY_PG_HOST = 10.1.16.39
# DIFY_PG_PORT = 5432
# DIFY_PG_USER = postgres
# DIFY_PG_PASSWORD = difyai123456
# DIFY_PG_DATABASE = dify
ENABLE_LANGFUSE=true
LANGFUSE_PUBLIC_KEY=pk-lf-4e9b7cbe-528c-4697-b73c-33257a60072c
LANGFUSE_SECRET_KEY=sk-lf-cd8a78c5-2538-455e-a85a-87b6e1aa69d0
LANGFUSE_HOST=http://10.1.6.34:3000
# ENABLE_LANGFUSE=true
# LANGFUSE_PUBLIC_KEY=pk-lf-4e9b7cbe-528c-4697-b73c-33257a60072c
# LANGFUSE_SECRET_KEY=sk-lf-cd8a78c5-2538-455e-a85a-87b6e1aa69d0
# LANGFUSE_HOST=http://10.1.6.34:3000
+8 -2
View File
@@ -10,7 +10,10 @@
"request": "launch",
"program": "${file}",
"console": "integratedTerminal",
"justMyCode": true
"justMyCode": true,
"env": {
"PYTHONPATH": "${workspaceFolder}"
}
},
{
"name": "IntentRecognition",
@@ -18,7 +21,10 @@
"request": "launch",
"program": "${workspaceFolder}/rag2_0/demo/intent_recognition_example.py",
"console": "integratedTerminal",
"justMyCode": true
"justMyCode": true,
"env": {
"PYTHONPATH": "${workspaceFolder}"
}
}
]
}
-1
View File
@@ -29,7 +29,6 @@ def main(query: str) -> dict:
import sys
sys.path.append(os.getcwd())
from rag2_0.dify.DifyQueryRetrieval import DifyQueryRetrieval
# 定义数据库路径
-1
View File
@@ -18,7 +18,6 @@ import logging
load_dotenv()
import sys
sys.path.append(os.getcwd())
from rag2_0.dify.DifyQueryRetrieval import DifyQueryRetrieval
# 确保日志目录存在
@@ -5,7 +5,6 @@ import pandas as pd
from openpyxl import load_workbook
import logging
import numpy as np
sys.path.append(os.getcwd())
from rag2_0.tool.ModelTool import XinferenceEmbeddings
from langchain_community.vectorstores import SQLiteVSS
+4 -1
View File
@@ -15,8 +15,8 @@ import logging
load_dotenv()
import sys
sys.path.append(os.getcwd())
from rag2_0.intent_recognition import AsyncIntentRecognizer
from rag2_0.api.kefu_redirect_url import router as kefu_router
# 确保日志目录存在
os.makedirs('data/logs', exist_ok=True)
@@ -85,6 +85,9 @@ app.add_middleware(
allow_headers=["*"],
)
# 注册外部路由
app.include_router(kefu_router)
# 全局变量存储AsyncIntentRecognizer实例
_instance = None
+92
View File
@@ -0,0 +1,92 @@
from fastapi import APIRouter
from fastapi.responses import RedirectResponse
import os
import sqlite3
import threading
import time
from queue import Queue, Full
router = APIRouter()
# 以当前文件为基准的相对路径:../../data/db
PROJECT_ROOT = os.getcwd()
DB_DIR = os.path.join(PROJECT_ROOT, "data", "db")
DB_FILE = os.path.join(DB_DIR, "redirects.sqlite3")
TABLE_SQL = (
"CREATE TABLE IF NOT EXISTS redirects ("
" msg_id TEXT PRIMARY KEY,"
" url TEXT NOT NULL"
")"
)
def _ensure_db():
"""确保数据库与表存在。"""
os.makedirs(DB_DIR, exist_ok=True)
with sqlite3.connect(DB_FILE) as conn:
cur = conn.cursor()
cur.execute(TABLE_SQL)
conn.commit()
def save_redirect(msg_id: str, url: str) -> None:
"""将 msg_id 与 url 写入 SQLite,若已存在则忽略。
使用 INSERT OR IGNORE 结合 PRIMARY KEY(msg_id) 来避免重复写入。
"""
_ensure_db()
with sqlite3.connect(DB_FILE) as conn:
cur = conn.cursor()
cur.execute(
"INSERT OR IGNORE INTO redirects (msg_id, url) VALUES (?, ?)",
(msg_id, url),
)
conn.commit()
# ========= 异步写库队列与后台线程 =========
_write_queue: "Queue[tuple[str, str]]" = Queue(maxsize=10000)
def _write_worker():
_ensure_db()
while True:
try:
msg_id, url = _write_queue.get()
try:
with sqlite3.connect(DB_FILE) as conn:
cur = conn.cursor()
cur.execute(
"INSERT OR IGNORE INTO redirects (msg_id, url) VALUES (?, ?)",
(msg_id, url),
)
conn.commit()
except Exception:
# 失败忽略,避免阻断工作线程
pass
finally:
_write_queue.task_done()
except Exception:
# 防御性 sleep,避免异常导致CPU空转
time.sleep(0.1)
_worker_thread = threading.Thread(target=_write_worker, daemon=True)
_worker_thread.start()
@router.get("/kefu_login", summary="客服登录页重定向")
async def kefu_redirect(msg_id:str):
"""重定向到客服登录页。"""
target_url = "https://www.booway.com.cn/kefu/toLoginPage"
# 写入 SQLite:若 msg_id 已存在将不会重复写入
try:
if msg_id:
# 走异步队列
_write_queue.put_nowait((msg_id, target_url))
except Exception:
# 出于稳健性考虑,即使写库失败也不影响重定向
pass
return RedirectResponse(target_url, status_code=302)
-1
View File
@@ -10,7 +10,6 @@ import sys
import os
# 导入ExcelToSQLiteProcessor类
sys.path.append(os.getcwd())
from rag2_0.api.create_qingdan_dinge_database import ExcelToSQLiteProcessor, create_db
# 导入向量检索相关类
from rag2_0.tool.ModelTool import XinferenceEmbeddings
-2
View File
@@ -18,8 +18,6 @@ from tqdm import tqdm
import glob
import shutil
# 将项目根目录添加到Python路径
sys.path.append(os.getcwd())
from rag2_0.tool.ModelTool import OpenAiLLM
load_dotenv()
@@ -20,7 +20,6 @@ import argparse
from typing import List, Dict, Any
from langchain.output_parsers import PydanticOutputParser
from pydantic import BaseModel, Field
sys.path.append(os.getcwd())
from rag2_0.intent_recognition import AsyncIntentRecognizer
from rag2_0.dify.DifyQueryRetrieval import DifyQueryRetrieval
from rag2_0.intent_recognition.DataModels import Classification
-1
View File
@@ -10,7 +10,6 @@ import os
import json
from dotenv import load_dotenv
import sys
sys.path.append(os.getcwd())
from rag2_0.intent_recognition import ProfessionalNounVectorizer
import logging
-1
View File
@@ -15,7 +15,6 @@ from datetime import datetime
import os
from langchain_core.output_parsers import JsonOutputParser
sys.path.append(os.getcwd())
from rag2_0.dify.dify_client import ChatClient
from rag2_0.tool.ModelTool import OpenAiLLM
from rag2_0.dify.dify_tool import DifyTool
-1
View File
@@ -6,7 +6,6 @@ import logging
import time
import asyncio
import httpx
sys.path.append(os.getcwd())
from rag2_0.dify.dify_client.client import DifyClient, KnowledgeBaseClient
from rag2_0.tool.ModelTool import XinferenceReRankerModel
@@ -5,7 +5,6 @@ import sys
from dotenv import load_dotenv
load_dotenv()
sys.path.append(os.getcwd())
from rag2_0.dify.dify_client import DifyApi
-1
View File
@@ -17,7 +17,6 @@ logging.basicConfig(
]
)
sys.path.append(os.getcwd())
import rag2_0.dify.dify_client.dify_api as DifyApi
import pandas as pd
-1
View File
@@ -6,7 +6,6 @@ import json
from concurrent.futures import ThreadPoolExecutor, as_completed
import sys
sys.path.append(os.getcwd())
from rag2_0.dify.dify_client import ChatClient
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
-1
View File
@@ -6,7 +6,6 @@ import pandas as pd
import sys
sys.path.append(os.getcwd())
from rag2_0.dify.dify_tool import DifyTool
import requests
+111
View File
@@ -0,0 +1,111 @@
#!/usr/bin/env bash
# 专用脚本:启动 rag2_0.api.intent_recognition_api 服务
# 功能:启动前检测screen是否存在,存在则结束,最后启动服务
set -euo pipefail
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
SESSION_NAME="intent_recognition_api"
SERVICE_PORT="8001"
START_COMMAND="cd \"$SCRIPT_DIR\" && uv run uvicorn rag2_0.api.intent_recognition_api:app --host 0.0.0.0 --port 8001 --workers 4"
echo "[脚本] 启动 intent_recognition_api 服务..."
# 检查screen会话是否存在
exists_session() {
# 使用严格匹配,避免误判
if screen -ls 2>/dev/null | grep -q "\\.${SESSION_NAME}\\s"; then
return 0
fi
return 1
}
# 按端口获取监听该端口的任意一个PID,优先用 ss,其次 lsof
pids_on_port() {
# 从 ss 提取 pid 列表
local ss_pids
ss_pids=$(ss -lptn 2>/dev/null \
| grep -E ":${SERVICE_PORT}\\b" \
| awk '{print $NF}' \
| sed -n 's/.*pid=\([0-9]\+\),.*/\1/p' \
| sort -u)
if [[ -n "$ss_pids" ]]; then
echo "$ss_pids"
return 0
fi
# 从 lsof 提取 pid 列表
if command -v lsof >/dev/null 2>&1; then
local lsof_pids
lsof_pids=$(lsof -nP -i :"$SERVICE_PORT" -sTCP:LISTEN -t 2>/dev/null | sort -u)
if [[ -n "$lsof_pids" ]]; then
echo "$lsof_pids"
return 0
fi
fi
return 1
}
# 根据端口优雅终止(TERM)并在必要时强制(KILL)清理进程
kill_by_port() {
local pids
pids=$(pids_on_port || true)
if [[ -z "$pids" ]]; then
return 0
fi
echo "[清理] 端口 $SERVICE_PORT 仍被占用,发送 SIGTERM 到: $pids"
kill -TERM $pids 2>/dev/null || true
sleep 2
# 再次检查
local left
left=$(pids_on_port || true)
if [[ -n "$left" ]]; then
echo "[强制] 端口 $SERVICE_PORT 仍占用,发送 SIGKILL 到: $left"
kill -KILL $left 2>/dev/null || true
fi
}
# 停止已存在的服务
stop_existing_service() {
# 1) 先尝试关闭 screen 会话
if exists_session "$SESSION_NAME"; then
echo "[停止] 发现已存在的 screen 会话 '$SESSION_NAME',正在结束..."
screen -S "$SESSION_NAME" -X quit || true
echo "[停止] screen 会话 '$SESSION_NAME' 已结束"
else
echo "[提示] 未发现 screen 会话: $SESSION_NAME"
fi
# 2) 等待释放端口
sleep 2
# 3) 如果仍占用,按端口清理
if ss -lptn 2>/dev/null | grep -E -q ":${SERVICE_PORT}\\b" || (command -v lsof >/dev/null 2>&1 && lsof -i :"$SERVICE_PORT" -sTCP:LISTEN >/dev/null 2>&1); then
echo "[清理] 端口 $SERVICE_PORT 仍被占用,正在清理..."
kill_by_port
echo "[清理] 端口 $SERVICE_PORT 清理完成"
fi
}
# 启动服务
start_new_service() {
echo "[启动] 准备启动 intent_recognition_api 服务..."
echo "[启动] 启动命令: $START_COMMAND"
screen -dmS "$SESSION_NAME" bash -c "$START_COMMAND"
echo "[启动] intent_recognition_api 服务已启动,screen 会话名: '$SESSION_NAME'"
echo "[启动] 服务运行在端口: $SERVICE_PORT"
echo "[提示] 可使用 'screen -r $SESSION_NAME' 查看服务输出"
}
# 主流程
main() {
# 1. 停止已存在的服务
stop_existing_service
# 2. 启动新服务
start_new_service
echo "[完成] intent_recognition_api 服务启动脚本执行完成"
}
main