新增多个启动脚本以支持不同服务的后台运行,优化对话到工单的处理逻辑,增加人力信息映射,调整日志记录机制以支持异步处理。
This commit is contained in:
+114
-40
@@ -8,6 +8,8 @@ from fastapi.middleware.cors import CORSMiddleware
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import Dict, List, Any, Optional
|
||||
import asyncio
|
||||
import threading
|
||||
import queue
|
||||
|
||||
from dotenv import load_dotenv
|
||||
import json
|
||||
@@ -32,20 +34,92 @@ from rag2_0.dify.DifyQueryRetrieval import DifyQueryRetrieval
|
||||
# 定义文件锁和JSON文件路径
|
||||
file_lock = asyncio.Lock()
|
||||
QUERY_LOG_DIR = os.path.join(os.getcwd(), "data", "query_logs")
|
||||
QUERY_LOG_FILE = os.path.join(QUERY_LOG_DIR, "answer_type_logs.json")
|
||||
QUERY_DATA_FILE = os.path.join(QUERY_LOG_DIR, "answer_type_logs.json")
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||
handlers=[
|
||||
logging.StreamHandler()
|
||||
]
|
||||
# 创建异步日志队列和工作线程
|
||||
log_queue = queue.Queue()
|
||||
worker_thread = None
|
||||
|
||||
# 后台工作线程函数
|
||||
def log_worker():
|
||||
while True:
|
||||
try:
|
||||
# 从队列获取数据,设置超时以允许线程退出
|
||||
data = log_queue.get(timeout=1.0)
|
||||
if data is None: # 接收到退出信号
|
||||
# 处理剩余数据后再退出
|
||||
while not log_queue.empty():
|
||||
data = log_queue.get_nowait()
|
||||
if data is None: # 跳过额外的停止信号
|
||||
continue
|
||||
process_log_data(data)
|
||||
break
|
||||
|
||||
process_log_data(data)
|
||||
log_queue.task_done()
|
||||
except queue.Empty:
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.error(f"保存查询数据时出错: {str(e)}", exc_info=True)
|
||||
|
||||
# 提取数据处理逻辑到单独函数
|
||||
def process_log_data(data):
|
||||
try:
|
||||
# 确保目录存在
|
||||
os.makedirs(os.path.dirname(QUERY_DATA_FILE), exist_ok=True)
|
||||
|
||||
# 读取现有数据
|
||||
existing_data = []
|
||||
if os.path.exists(QUERY_DATA_FILE) and os.path.getsize(QUERY_DATA_FILE) > 0:
|
||||
with open(QUERY_DATA_FILE, 'r', encoding='utf-8') as f:
|
||||
try:
|
||||
existing_data = json.load(f)
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"JSON文件解析错误,将创建新文件: {QUERY_DATA_FILE}")
|
||||
existing_data = []
|
||||
|
||||
# 添加新数据
|
||||
existing_data.append(data)
|
||||
|
||||
# 写入文件
|
||||
with open(QUERY_DATA_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump(existing_data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
logger.info(f"成功保存查询数据到: {QUERY_DATA_FILE}")
|
||||
except Exception as e:
|
||||
logger.error(f"处理日志数据时出错: {str(e)}", exc_info=True)
|
||||
|
||||
# 创建日志目录
|
||||
os.makedirs(QUERY_LOG_DIR, exist_ok=True)
|
||||
|
||||
# 配置日志 - 同时输出到控制台和文件
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.setLevel(logging.INFO)
|
||||
|
||||
# 创建控制台处理器
|
||||
console_handler = logging.StreamHandler()
|
||||
console_handler.setLevel(logging.INFO)
|
||||
|
||||
# 创建文件处理器
|
||||
file_handler = logging.FileHandler(
|
||||
os.path.join(QUERY_LOG_DIR, "answer_type_service.log"),
|
||||
encoding='utf-8'
|
||||
)
|
||||
file_handler.setLevel(logging.INFO)
|
||||
|
||||
# 创建日志格式
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
console_handler.setFormatter(formatter)
|
||||
file_handler.setFormatter(formatter)
|
||||
|
||||
# 添加处理器到日志器
|
||||
logger.addHandler(console_handler)
|
||||
logger.addHandler(file_handler)
|
||||
|
||||
# 设置其他库的日志级别
|
||||
logging.getLogger('httpx').setLevel(logging.WARNING)
|
||||
logging.getLogger('openai').setLevel(logging.WARNING)
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 定义请求模型
|
||||
class AnswerTypeRequest(BaseModel):
|
||||
query: str
|
||||
@@ -70,13 +144,32 @@ app.add_middleware(
|
||||
# 应用启动事件
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
global worker_thread
|
||||
# 确保日志目录存在
|
||||
os.makedirs(QUERY_LOG_DIR, exist_ok=True)
|
||||
# 确保日志文件存在
|
||||
if not os.path.exists(QUERY_LOG_FILE):
|
||||
async with file_lock:
|
||||
with open(QUERY_LOG_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump([], f, ensure_ascii=False)
|
||||
if not os.path.exists(QUERY_DATA_FILE):
|
||||
with open(QUERY_DATA_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump([], f, ensure_ascii=False)
|
||||
|
||||
# 启动后台工作线程
|
||||
worker_thread = threading.Thread(target=log_worker, daemon=True)
|
||||
worker_thread.start()
|
||||
logger.info("后台日志工作线程已启动")
|
||||
|
||||
# 应用关闭事件
|
||||
@app.on_event("shutdown")
|
||||
def shutdown_event():
|
||||
global worker_thread
|
||||
if worker_thread:
|
||||
# 发送退出信号
|
||||
log_queue.put(None)
|
||||
# 等待工作线程处理剩余数据
|
||||
worker_thread.join(timeout=10.0)
|
||||
if worker_thread.is_alive():
|
||||
logger.warning("工作线程未在超时时间内退出")
|
||||
else:
|
||||
logger.info("后台日志工作线程已停止")
|
||||
|
||||
# 添加健康检查端点
|
||||
@app.get("/health", summary="健康检查")
|
||||
@@ -89,41 +182,22 @@ async def query_type(query_type: str, workflow_run_id:str):
|
||||
# 记录请求
|
||||
logger.info(f"接收到请求: 类型: {query_type}, workflow_run_id: {workflow_run_id}")
|
||||
|
||||
# 保存 提问、问题类型、当前时间戳到json
|
||||
# 准备数据
|
||||
timestamp = datetime.datetime.now().isoformat()
|
||||
query_data = {
|
||||
"query_type": query_type,
|
||||
"timestamp": timestamp,
|
||||
"workflow_run_id": workflow_run_id
|
||||
}
|
||||
success = True
|
||||
|
||||
# 将数据放入队列
|
||||
try:
|
||||
# 使用锁保护文件读写操作
|
||||
async with file_lock:
|
||||
# 确保目录存在
|
||||
os.makedirs(os.path.dirname(QUERY_LOG_FILE), exist_ok=True)
|
||||
|
||||
# 读取现有数据
|
||||
existing_data = []
|
||||
if os.path.exists(QUERY_LOG_FILE) and os.path.getsize(QUERY_LOG_FILE) > 0:
|
||||
with open(QUERY_LOG_FILE, 'r', encoding='utf-8') as f:
|
||||
try:
|
||||
existing_data = json.load(f)
|
||||
except json.JSONDecodeError:
|
||||
logger.error(f"JSON文件解析错误,将创建新文件: {QUERY_LOG_FILE}")
|
||||
existing_data = []
|
||||
|
||||
# 添加新数据
|
||||
existing_data.append(query_data)
|
||||
|
||||
# 写入文件
|
||||
with open(QUERY_LOG_FILE, 'w', encoding='utf-8') as f:
|
||||
json.dump(existing_data, f, ensure_ascii=False, indent=2)
|
||||
|
||||
logger.info(f"成功保存查询数据到: {QUERY_LOG_FILE}")
|
||||
log_queue.put(query_data)
|
||||
success = True
|
||||
logger.info(f"查询数据已加入队列,当前队列大小: {log_queue.qsize()}")
|
||||
except Exception as e:
|
||||
success = False
|
||||
logger.error(f"保存查询数据时出错: {str(e)}", exc_info=True)
|
||||
logger.error(f"加入队列时出错: {str(e)}", exc_info=True)
|
||||
|
||||
# 返回响应
|
||||
content = f"<strong>问题类型</strong>: {query_type}<br><strong>操作是否成功</strong>: {'成功' if success else '失败'}"
|
||||
@@ -146,4 +220,4 @@ if __name__ == "__main__":
|
||||
# workers=1 # 生产环境可以增加worker数量
|
||||
# )
|
||||
# 生产环境可以使用以下命令启动:
|
||||
# uvicorn rag2_0.dify.AnswerType:app --host 0.0.0.0 --port 8003 --workers 20
|
||||
# uvicorn rag2_0.dify.AnswerType:app --host 0.0.0.0 --port 8003 --workers 1
|
||||
@@ -5,7 +5,7 @@ sys.path.append(os.getcwd())
|
||||
import rag2_0.dify.dify_client.dify_api as DifyApi
|
||||
|
||||
import pandas as pd
|
||||
pd_data = pd.read_excel("data/excel/工单汇总(给AI)_2.xlsx")
|
||||
pd_data = pd.read_excel("data/excel/工单汇总(给AI)_工单拆分.xlsx")
|
||||
|
||||
|
||||
dify_api = DifyApi.DifyApi()
|
||||
@@ -13,6 +13,7 @@ peiwang_dataset_id = dify_api.get_or_create_dataset_by_name("配网工单数据"
|
||||
zhuwang_dataset_id = dify_api.get_or_create_dataset_by_name("主网工单数据")
|
||||
jianga_dataset_id = dify_api.get_or_create_dataset_by_name("技改工单数据")
|
||||
chuneng_dataset_id = dify_api.get_or_create_dataset_by_name("储能工单数据")
|
||||
xizang_dataset_id = dify_api.get_or_create_dataset_by_name("西藏工单数据")
|
||||
|
||||
|
||||
soft_segments_list={}
|
||||
@@ -39,6 +40,10 @@ for skill_group, segments_list in soft_segments_list.items():
|
||||
dataset_id = jianga_dataset_id
|
||||
elif skill_group == "储能":
|
||||
dataset_id = chuneng_dataset_id
|
||||
elif skill_group == "西藏":
|
||||
dataset_id = xizang_dataset_id
|
||||
else:
|
||||
continue
|
||||
document_id = dify_api.get_document_id(dataset_id=dataset_id, document_name=f"{skill_group}工单数据")
|
||||
if not document_id:
|
||||
document_id = dify_api.upload_text_to_document(text_name=f"{skill_group}工单数据", text="", dataset_id=dataset_id)
|
||||
|
||||
Reference in New Issue
Block a user