新增AnswerType.py文件,创建FastAPI应用以收集用户提问数据类型,添加健康检查和异步检索API,优化日志记录和错误处理。同时,新增DifyExporter类用于导出Dify系统中的对话和消息数据,支持从查询日志加载数据并保存为Excel文件。
This commit is contained in:
@@ -0,0 +1,149 @@
|
|||||||
|
# from gevent import monkey
|
||||||
|
# monkey.patch_all()
|
||||||
|
|
||||||
|
import os
|
||||||
|
from fastapi import FastAPI, HTTPException, Request
|
||||||
|
from fastapi.responses import JSONResponse, HTMLResponse
|
||||||
|
from fastapi.middleware.cors import CORSMiddleware
|
||||||
|
from pydantic import BaseModel, Field
|
||||||
|
from typing import Dict, List, Any, Optional
|
||||||
|
import asyncio
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
import json
|
||||||
|
import time
|
||||||
|
import datetime
|
||||||
|
import logging
|
||||||
|
# 加载环境变量
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
def main(query: str) -> dict:
|
||||||
|
query = query.strip()
|
||||||
|
escaped_query = json.dumps(query, ensure_ascii=False)
|
||||||
|
return {
|
||||||
|
"format_query": escaped_query,
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.append(os.getcwd())
|
||||||
|
from rag2_0.dify.DifyQueryRetrieval import DifyQueryRetrieval
|
||||||
|
|
||||||
|
# 定义文件锁和JSON文件路径
|
||||||
|
file_lock = asyncio.Lock()
|
||||||
|
QUERY_LOG_DIR = os.path.join(os.getcwd(), "data", "query_logs")
|
||||||
|
QUERY_LOG_FILE = os.path.join(QUERY_LOG_DIR, "answer_type_logs.json")
|
||||||
|
|
||||||
|
logging.basicConfig(
|
||||||
|
level=logging.INFO,
|
||||||
|
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
||||||
|
handlers=[
|
||||||
|
logging.StreamHandler()
|
||||||
|
]
|
||||||
|
)
|
||||||
|
logging.getLogger('httpx').setLevel(logging.WARNING)
|
||||||
|
logging.getLogger('openai').setLevel(logging.WARNING)
|
||||||
|
|
||||||
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# 定义请求模型
|
||||||
|
class AnswerTypeRequest(BaseModel):
|
||||||
|
query: str
|
||||||
|
query_type: str
|
||||||
|
|
||||||
|
# 创建FastAPI应用
|
||||||
|
app = FastAPI(
|
||||||
|
title="提问数据类型",
|
||||||
|
description="收集用户提问数据类型",
|
||||||
|
version="1.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
# 添加CORS中间件
|
||||||
|
app.add_middleware(
|
||||||
|
CORSMiddleware,
|
||||||
|
allow_origins=["*"],
|
||||||
|
allow_credentials=True,
|
||||||
|
allow_methods=["*"],
|
||||||
|
allow_headers=["*"],
|
||||||
|
)
|
||||||
|
|
||||||
|
# 应用启动事件
|
||||||
|
@app.on_event("startup")
|
||||||
|
async def startup_event():
|
||||||
|
# 确保日志目录存在
|
||||||
|
os.makedirs(QUERY_LOG_DIR, exist_ok=True)
|
||||||
|
# 确保日志文件存在
|
||||||
|
if not os.path.exists(QUERY_LOG_FILE):
|
||||||
|
async with file_lock:
|
||||||
|
with open(QUERY_LOG_FILE, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump([], f, ensure_ascii=False)
|
||||||
|
|
||||||
|
# 添加健康检查端点
|
||||||
|
@app.get("/health", summary="健康检查")
|
||||||
|
async def health_check():
|
||||||
|
return {"status": "ok"}
|
||||||
|
|
||||||
|
@app.get("/query_type", summary="异步检索API")
|
||||||
|
async def query_type(query: str, query_type: str):
|
||||||
|
try:
|
||||||
|
# 记录请求
|
||||||
|
logger.info(f"接收到请求: {query}, 类型: {query_type}")
|
||||||
|
|
||||||
|
# 保存 提问、问题类型、当前时间戳到json
|
||||||
|
timestamp = datetime.datetime.now().isoformat()
|
||||||
|
query_data = {
|
||||||
|
"query": query,
|
||||||
|
"query_type": query_type,
|
||||||
|
"timestamp": timestamp
|
||||||
|
}
|
||||||
|
success = True
|
||||||
|
try:
|
||||||
|
# 使用锁保护文件读写操作
|
||||||
|
async with file_lock:
|
||||||
|
# 确保目录存在
|
||||||
|
os.makedirs(os.path.dirname(QUERY_LOG_FILE), exist_ok=True)
|
||||||
|
|
||||||
|
# 读取现有数据
|
||||||
|
existing_data = []
|
||||||
|
if os.path.exists(QUERY_LOG_FILE) and os.path.getsize(QUERY_LOG_FILE) > 0:
|
||||||
|
with open(QUERY_LOG_FILE, 'r', encoding='utf-8') as f:
|
||||||
|
try:
|
||||||
|
existing_data = json.load(f)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
logger.error(f"JSON文件解析错误,将创建新文件: {QUERY_LOG_FILE}")
|
||||||
|
existing_data = []
|
||||||
|
|
||||||
|
# 添加新数据
|
||||||
|
existing_data.append(query_data)
|
||||||
|
|
||||||
|
# 写入文件
|
||||||
|
with open(QUERY_LOG_FILE, 'w', encoding='utf-8') as f:
|
||||||
|
json.dump(existing_data, f, ensure_ascii=False, indent=2)
|
||||||
|
|
||||||
|
logger.info(f"成功保存查询数据到: {QUERY_LOG_FILE}")
|
||||||
|
except Exception as e:
|
||||||
|
success = False
|
||||||
|
logger.error(f"保存查询数据时出错: {str(e)}", exc_info=True)
|
||||||
|
|
||||||
|
# 返回响应
|
||||||
|
content = f"<strong>当前提问</strong>: {query}<br><strong>问题类型</strong>: {query_type}<br><strong>操作是否成功</strong>: {'成功' if success else '失败'}"
|
||||||
|
return HTMLResponse(content=content)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"处理请求时出错: {str(e)}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail=f"处理请求时出错: {str(e)}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
# 使用Uvicorn运行FastAPI应用
|
||||||
|
import uvicorn
|
||||||
|
uvicorn.run("rag2_0.dify.AnswerType:app", host="0.0.0.0", port=8003, reload=False, workers=1, log_level="info")
|
||||||
|
# # 使用uvicorn启动服务
|
||||||
|
# import uvicorn
|
||||||
|
# uvicorn.run(
|
||||||
|
# "rag2_0.dify.intent_recognition_api:app",
|
||||||
|
# host="0.0.0.0",
|
||||||
|
# port=8001,
|
||||||
|
# reload=False, # 开发环境启用热重载
|
||||||
|
# workers=1 # 生产环境可以增加worker数量
|
||||||
|
# )
|
||||||
|
# 生产环境可以使用以下命令启动:
|
||||||
|
# uvicorn rag2_0.dify.AnswerType:app --host 0.0.0.0 --port 8003 --workers 20
|
||||||
@@ -4,6 +4,9 @@ import psycopg2
|
|||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.append(os.getcwd())
|
||||||
from rag2_0.dify.dify_client import ChatClient
|
from rag2_0.dify.dify_client import ChatClient
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
from langchain.output_parsers import PydanticOutputParser
|
from langchain.output_parsers import PydanticOutputParser
|
||||||
@@ -168,6 +171,76 @@ class PgSql:
|
|||||||
except (Exception, psycopg2.Error) as error:
|
except (Exception, psycopg2.Error) as error:
|
||||||
raise Exception(f"Error while getting workflow_node_executions_info: {error}")
|
raise Exception(f"Error while getting workflow_node_executions_info: {error}")
|
||||||
|
|
||||||
|
def get_app_conversations(self, appid:str)->list[str] | None:
|
||||||
|
"""
|
||||||
|
根据应用 ID 从 'conversations' 表中获取应用会话信息。
|
||||||
|
"""
|
||||||
|
with self.pg_sql_lock:
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
SELECT DISTINCT conversation_id
|
||||||
|
FROM messages
|
||||||
|
WHERE app_id = %s AND invoke_from != 'debugger';
|
||||||
|
""",
|
||||||
|
(appid,)
|
||||||
|
)
|
||||||
|
result = cursor.fetchall()
|
||||||
|
if result:
|
||||||
|
colnames = [desc[0] for desc in cursor.description]
|
||||||
|
return [dict(zip(colnames, row)) for row in result]
|
||||||
|
return None
|
||||||
|
except (Exception, psycopg2.Error) as error:
|
||||||
|
raise Exception(f"Error while getting app_conversations: {error}")
|
||||||
|
|
||||||
|
def get_conversation_messages(self, conversation_id:str)->list[dict] | None:
|
||||||
|
"""
|
||||||
|
根据会话 ID 从 'messages' 表中获取会话消息信息。
|
||||||
|
"""
|
||||||
|
with self.pg_sql_lock:
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
cursor.execute(
|
||||||
|
"""
|
||||||
|
SELECT * FROM messages WHERE conversation_id = %s AND status = 'normal'
|
||||||
|
""",
|
||||||
|
(conversation_id,)
|
||||||
|
)
|
||||||
|
result = cursor.fetchall()
|
||||||
|
if result:
|
||||||
|
colnames = [desc[0] for desc in cursor.description]
|
||||||
|
return [dict(zip(colnames, row)) for row in result]
|
||||||
|
return None
|
||||||
|
except (Exception, psycopg2.Error) as error:
|
||||||
|
raise Exception(f"Error while getting conversation_messages: {error}")
|
||||||
|
|
||||||
|
def get_message_rating(self, msg_id):
|
||||||
|
"""
|
||||||
|
通过msg_id从message_feedbacks中找到对应的rating。
|
||||||
|
:param msg_id: 消息ID (UUID格式)
|
||||||
|
:return: rating 字符串
|
||||||
|
"""
|
||||||
|
with self.pg_sql_lock:
|
||||||
|
rating = None
|
||||||
|
try:
|
||||||
|
with self.connection.cursor() as cursor:
|
||||||
|
# 构建查询语句
|
||||||
|
cursor.execute("""
|
||||||
|
SELECT rating
|
||||||
|
FROM message_feedbacks
|
||||||
|
WHERE message_id = %s
|
||||||
|
""",
|
||||||
|
(msg_id,))
|
||||||
|
# 执行查询
|
||||||
|
row = cursor.fetchone()
|
||||||
|
|
||||||
|
if row:
|
||||||
|
rating = row[0]
|
||||||
|
except (Exception, psycopg2.Error) as error:
|
||||||
|
raise Exception(f"Error while getting conversation_messages: {error}")
|
||||||
|
return rating
|
||||||
|
|
||||||
class DifyTool:
|
class DifyTool:
|
||||||
"""
|
"""
|
||||||
提供用于获取 Dify 应用调试信息的工具类。
|
提供用于获取 Dify 应用调试信息的工具类。
|
||||||
@@ -388,7 +461,6 @@ content: "{content}"
|
|||||||
avg_score = total_score / valid_scores if valid_scores > 0 else 0
|
avg_score = total_score / valid_scores if valid_scores > 0 else 0
|
||||||
return retrieve_title, max_score, min_score, avg_score
|
return retrieve_title, max_score, min_score, avg_score
|
||||||
|
|
||||||
|
|
||||||
class NewWorkflowChat(BaseWorkflowChat):
|
class NewWorkflowChat(BaseWorkflowChat):
|
||||||
"""
|
"""
|
||||||
新工作流对话类,用于调用新工作流发送对话并解析获取相关数据
|
新工作流对话类,用于调用新工作流发送对话并解析获取相关数据
|
||||||
|
|||||||
@@ -0,0 +1,303 @@
|
|||||||
|
from dotenv import load_dotenv
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import datetime
|
||||||
|
import pandas as pd
|
||||||
|
|
||||||
|
|
||||||
|
import sys
|
||||||
|
sys.path.append(os.getcwd())
|
||||||
|
from rag2_0.dify.dify_tool import PgSql, DifyTool
|
||||||
|
|
||||||
|
|
||||||
|
class DifyExporter:
|
||||||
|
"""
|
||||||
|
Dify数据导出工具,用于从Dify系统中导出对话和消息数据
|
||||||
|
"""
|
||||||
|
def __init__(self, app_id=None, query_log_file=None):
|
||||||
|
"""
|
||||||
|
初始化DifyExporter实例
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app_id: Dify应用ID,默认为None
|
||||||
|
query_log_file: 查询日志文件路径,默认为None
|
||||||
|
"""
|
||||||
|
# 设置默认值
|
||||||
|
self.app_id = app_id or "72d03c7d-8bea-42f9-9e8d-cdfb9480f372"
|
||||||
|
|
||||||
|
# 设置查询日志文件路径
|
||||||
|
self.query_log_dir = os.path.join(os.getcwd(), "data", "query_logs")
|
||||||
|
self.query_log_file = query_log_file or os.path.join(self.query_log_dir, "answer_type_logs.json")
|
||||||
|
|
||||||
|
# 初始化工具类
|
||||||
|
self.dify_pgsql = PgSql()
|
||||||
|
self.dify_tool = DifyTool()
|
||||||
|
|
||||||
|
# 初始化数据存储
|
||||||
|
self.message_info_list = []
|
||||||
|
self.query_logs = {}
|
||||||
|
|
||||||
|
def load_query_logs(self):
|
||||||
|
"""
|
||||||
|
从文件加载查询日志
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
with open(self.query_log_file, 'r', encoding='utf-8') as f:
|
||||||
|
query_logs_list = json.load(f)
|
||||||
|
# 创建字典来存储每个查询的最新记录
|
||||||
|
for record in query_logs_list:
|
||||||
|
query = record['query']
|
||||||
|
timestamp = record.get('timestamp')
|
||||||
|
# 如果查询不在字典中或者当前记录的时间戳更新,则更新字典
|
||||||
|
if query not in self.query_logs or (timestamp and self.query_logs.get(query, {}).get('timestamp') and
|
||||||
|
datetime.datetime.fromisoformat(timestamp) >
|
||||||
|
datetime.datetime.fromisoformat(self.query_logs[query]['timestamp'])):
|
||||||
|
self.query_logs[query] = record
|
||||||
|
return True
|
||||||
|
except Exception as e:
|
||||||
|
print(f"加载查询日志失败: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
def process_message_chain(self, messages):
|
||||||
|
"""
|
||||||
|
处理消息链,按照时间顺序重新组织消息
|
||||||
|
|
||||||
|
Args:
|
||||||
|
messages: 消息列表
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
按时间顺序组织的消息列表
|
||||||
|
"""
|
||||||
|
message_chain = {}
|
||||||
|
for message in messages:
|
||||||
|
if message["parent_message_id"] in message_chain:
|
||||||
|
message_chain[message["parent_message_id"]].append(message)
|
||||||
|
else:
|
||||||
|
message_chain[message["parent_message_id"]] = [message]
|
||||||
|
|
||||||
|
message_chain_new = []
|
||||||
|
current_message_id = None
|
||||||
|
processed_ids = set() # 防止无限循环
|
||||||
|
|
||||||
|
while True:
|
||||||
|
# 获取当前父消息ID对应的所有消息
|
||||||
|
msg_list = message_chain.get(current_message_id, [])
|
||||||
|
|
||||||
|
# 如果没有消息或已处理过该ID,则退出循环
|
||||||
|
if not msg_list or current_message_id in processed_ids:
|
||||||
|
break
|
||||||
|
|
||||||
|
# 记录已处理的ID
|
||||||
|
if current_message_id is not None:
|
||||||
|
processed_ids.add(current_message_id)
|
||||||
|
|
||||||
|
# 使用max()函数找出创建时间最新的消息
|
||||||
|
new_msg = max(msg_list, key=lambda x: x["created_at"]) if msg_list else None
|
||||||
|
|
||||||
|
# 将最新消息添加到结果列表,并更新当前消息ID
|
||||||
|
if new_msg:
|
||||||
|
message_chain_new.append(new_msg)
|
||||||
|
current_message_id = new_msg["id"]
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
|
return message_chain_new
|
||||||
|
|
||||||
|
def extract_message_info(self, message):
|
||||||
|
"""
|
||||||
|
从消息中提取信息
|
||||||
|
|
||||||
|
Args:
|
||||||
|
message: 消息对象
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
包含消息信息的字典
|
||||||
|
"""
|
||||||
|
msg_id = message["id"]
|
||||||
|
msg_inputs = message["inputs"]
|
||||||
|
user_name = msg_inputs.get("user_name", "")
|
||||||
|
msg_query = message["query"]
|
||||||
|
msg_answer = message["answer"]
|
||||||
|
created_at = message['created_at'].strftime("%Y-%m-%d")
|
||||||
|
msg_debug_info = self.dify_tool.get_message_debug_info_by_id(msg_id)
|
||||||
|
if not msg_debug_info:
|
||||||
|
return None
|
||||||
|
wiki_list = []
|
||||||
|
|
||||||
|
for node_execution in msg_debug_info['workflow_node_executions_info']:
|
||||||
|
if node_execution["title"] == "提取处理后的知识":
|
||||||
|
source_kno = json.loads(node_execution["outputs"])["source_kno"]
|
||||||
|
knowledge_list_metadata = json.loads(node_execution["outputs"])["knowledge_list_metadata"]
|
||||||
|
for knowledge in knowledge_list_metadata:
|
||||||
|
document_name = knowledge['metadata']['document_name']
|
||||||
|
wiki_list.append(document_name.split("/")[-1])
|
||||||
|
|
||||||
|
wiki_list = list(set(wiki_list))
|
||||||
|
wiki_list_str = "\n".join(wiki_list)
|
||||||
|
if wiki_list_str == "":
|
||||||
|
wiki_list_str = "无"
|
||||||
|
rating = self.dify_pgsql.get_message_rating(msg_id)
|
||||||
|
# 直接通过字典键获取query_type
|
||||||
|
query_type = self.query_logs.get(msg_query, {}).get('query_type', "")
|
||||||
|
|
||||||
|
return {
|
||||||
|
"msg_id": msg_id,
|
||||||
|
"提问": msg_query,
|
||||||
|
"回答": msg_answer,
|
||||||
|
"提问人": user_name,
|
||||||
|
"提问时间": created_at,
|
||||||
|
"评价": rating,
|
||||||
|
"问题分类": query_type,
|
||||||
|
"检索到的词条": wiki_list_str
|
||||||
|
}
|
||||||
|
|
||||||
|
def process_conversations(self):
|
||||||
|
"""
|
||||||
|
处理会话数据
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
处理后的消息信息列表
|
||||||
|
"""
|
||||||
|
conversations = self.dify_pgsql.get_app_conversations(appid=self.app_id)
|
||||||
|
for conversation in conversations:
|
||||||
|
messages = self.dify_pgsql.get_conversation_messages(conversation_id=conversation['conversation_id'])
|
||||||
|
message_chain_new = self.process_message_chain(messages)
|
||||||
|
|
||||||
|
for message in message_chain_new:
|
||||||
|
message_info = self.extract_message_info(message)
|
||||||
|
if message_info:
|
||||||
|
self.message_info_list.append(message_info)
|
||||||
|
|
||||||
|
return self.message_info_list
|
||||||
|
|
||||||
|
def save_to_excel(self, message_info_list, output_file):
|
||||||
|
"""
|
||||||
|
将消息信息列表保存到Excel文件
|
||||||
|
|
||||||
|
Args:
|
||||||
|
message_info_list: 消息信息列表
|
||||||
|
output_file: 输出文件路径
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
输出文件路径
|
||||||
|
"""
|
||||||
|
# 创建DataFrame
|
||||||
|
df = pd.DataFrame(message_info_list)
|
||||||
|
|
||||||
|
# 设置列的顺序
|
||||||
|
columns_order = [
|
||||||
|
"msg_id", "提问", "回答", "提问人", "提问时间",
|
||||||
|
"评价", "问题分类", "检索到的词条"
|
||||||
|
]
|
||||||
|
|
||||||
|
# 确保所有列都存在,如果不存在则添加空列
|
||||||
|
for col in columns_order:
|
||||||
|
if col not in df.columns:
|
||||||
|
df[col] = None
|
||||||
|
|
||||||
|
# 按指定顺序重排列
|
||||||
|
df = df[columns_order]
|
||||||
|
|
||||||
|
# 确保目录存在
|
||||||
|
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
||||||
|
|
||||||
|
# 创建ExcelWriter对象,用于设置Excel样式
|
||||||
|
with pd.ExcelWriter(output_file, engine='openpyxl') as writer:
|
||||||
|
# 写入数据
|
||||||
|
df.to_excel(writer, index=False, sheet_name='Dify对话记录')
|
||||||
|
|
||||||
|
# 获取工作簿和工作表
|
||||||
|
workbook = writer.book
|
||||||
|
worksheet = writer.sheets['Dify对话记录']
|
||||||
|
|
||||||
|
# 设置行高(20磅 ≈ 26.67像素)
|
||||||
|
for row in worksheet.iter_rows():
|
||||||
|
worksheet.row_dimensions[row[0].row].height = 20
|
||||||
|
|
||||||
|
# 设置列宽
|
||||||
|
column_widths = {
|
||||||
|
"msg_id": 15,
|
||||||
|
"提问": 40,
|
||||||
|
"回答": 60,
|
||||||
|
"提问人": 15,
|
||||||
|
"提问时间": 15,
|
||||||
|
"评价": 10,
|
||||||
|
"问题分类": 20,
|
||||||
|
"检索到的词条": 40
|
||||||
|
}
|
||||||
|
|
||||||
|
# 应用列宽设置
|
||||||
|
for i, column in enumerate(columns_order):
|
||||||
|
col_letter = chr(65 + i) # A, B, C, ...
|
||||||
|
if i >= 26: # 超过Z的情况
|
||||||
|
col_letter = chr(64 + i // 26) + chr(65 + i % 26)
|
||||||
|
worksheet.column_dimensions[col_letter].width = column_widths[column]
|
||||||
|
|
||||||
|
print(f"结果已保存到 {output_file}")
|
||||||
|
|
||||||
|
return output_file
|
||||||
|
|
||||||
|
def export(self, output_file=None):
|
||||||
|
"""
|
||||||
|
执行导出流程
|
||||||
|
|
||||||
|
Args:
|
||||||
|
output_file: 输出文件路径,默认为None(自动生成文件名)
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
处理后的消息信息列表
|
||||||
|
"""
|
||||||
|
# 加载查询日志
|
||||||
|
self.load_query_logs()
|
||||||
|
|
||||||
|
# 处理会话数据
|
||||||
|
self.process_conversations()
|
||||||
|
|
||||||
|
# 如果指定了输出文件,保存结果
|
||||||
|
if output_file or len(self.message_info_list) > 0:
|
||||||
|
# 如果没有指定输出文件,则使用默认文件名
|
||||||
|
if output_file is None:
|
||||||
|
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
||||||
|
output_file = os.path.join(os.getcwd(), "data", "excel", f"dify_export_{timestamp}.xlsx")
|
||||||
|
|
||||||
|
# 保存到Excel文件
|
||||||
|
self.save_to_excel(self.message_info_list, output_file)
|
||||||
|
|
||||||
|
return self.message_info_list
|
||||||
|
|
||||||
|
|
||||||
|
# 示例用法
|
||||||
|
if __name__ == "__main__":
|
||||||
|
import argparse
|
||||||
|
|
||||||
|
# 解析命令行参数
|
||||||
|
parser = argparse.ArgumentParser(description='Dify数据导出工具')
|
||||||
|
parser.add_argument('--output', '-o', type=str, default="data/excel/dify_export.xlsx",
|
||||||
|
help='输出Excel文件路径')
|
||||||
|
parser.add_argument('--app_id', '-a', type=str, default=None,
|
||||||
|
help='Dify应用ID')
|
||||||
|
parser.add_argument('--query_log_file', '-q', type=str, default=None,
|
||||||
|
help='查询日志文件路径')
|
||||||
|
|
||||||
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
# 设置环境变量
|
||||||
|
os.environ["DIFY_PG_HOST"] = "10.1.16.39"
|
||||||
|
os.environ["DIFY_PG_PORT"] = "5432"
|
||||||
|
os.environ["DIFY_PG_USER"] = "postgres"
|
||||||
|
os.environ["DIFY_PG_PASSWORD"] = "difyai123456"
|
||||||
|
os.environ["DIFY_PG_DATABASE"] = "dify"
|
||||||
|
|
||||||
|
# 创建导出器实例
|
||||||
|
exporter = DifyExporter(
|
||||||
|
app_id=args.app_id,
|
||||||
|
query_log_file=args.query_log_file
|
||||||
|
)
|
||||||
|
|
||||||
|
# 执行导出
|
||||||
|
results = exporter.export(output_file=args.output)
|
||||||
|
|
||||||
|
# 打印结果
|
||||||
|
print(f"导出了 {len(results)} 条消息信息")
|
||||||
|
|
||||||
Reference in New Issue
Block a user