优化DifyCompareTest类,添加DifyExporter实例以支持词条检索,更新DifyQueryRetrieval_api.py中的topk参数,增强DifyExporter类以从HTTP服务获取查询类型和点踩原因,简化构造函数,移除不必要的查询日志加载逻辑。

This commit is contained in:
2025-07-30 17:30:24 +08:00
parent 57369059eb
commit 728262cc65
3 changed files with 95 additions and 54 deletions
+23 -6
View File
@@ -18,7 +18,8 @@ from langchain_core.output_parsers import JsonOutputParser
sys.path.append(os.getcwd())
from rag2_0.dify.dify_client import ChatClient
from rag2_0.tool.ModelTool import OpenAiLLM
from rag2_0.dify.dify_tool import PgSql, DifyTool
from rag2_0.dify.export_new_dify import DifyExporter
load_dotenv()
# 创建日志目录
log_dir = 'data/logs'
@@ -45,6 +46,7 @@ class DifyCompareTest:
# 词条与工单同时检索
self.both_wiki_worker_client = ChatClient(api_key=os.getenv("DIFY_APP_KEY"), base_url=os.getenv("DIFY_BSAE_URL"))
self.llm = OpenAiLLM(base_url=os.getenv("OPENAI_API_BASE"), model=os.getenv("MODEL_NAME"))
self.exporter = DifyExporter()
def llm_judge_answer(self, old_answer: str, now_answer: str):
user_prompt = f"""
@@ -100,10 +102,11 @@ class DifyCompareTest:
answer = result.get('answer', "")
if len(answer) == 0:
raise Exception(f"回答为空: {result}")
if old_answer:
judge_result = self.llm_judge_answer(old_answer=old_answer, now_answer=answer)
else:
judge_result=""
# if old_answer:
# judge_result = self.llm_judge_answer(old_answer=old_answer, now_answer=answer)
# else:
# judge_result=""
judge_result=""
# 只取回答的前半部分
answer = answer.split("----------------------------------------")[0].strip()
message_id = result.get('message_id', "")
@@ -117,6 +120,18 @@ class DifyCompareTest:
import time
time.sleep(10) # 等待1秒后重试
def get_wiki_list_by_msgid(self,msg_id):
if msg_id is None or pd.isna(msg_id):
return ""
msg_debug_info = self.exporter.dify_tool.get_message_debug_info_by_id(msg_id)
if not msg_debug_info:
return ""
wiki_list = self.exporter.get_wiki_list(msg_debug_info)
if len(wiki_list) == 0:
return ""
else:
return "\n".join(list(set(wiki_list)))
def process_single_row(self, index, row):
"""处理单行数据的方法"""
try:
@@ -145,6 +160,7 @@ class DifyCompareTest:
result_row["message_id"] = message_id
result_row["回答"] = answer
# result_row["词条与工单同时回答对比"] = judge_result
result_row["检索到的词条"] = self.get_wiki_list_by_msgid(message_id)
logging.info(f"成功处理第 {index + 1} 行数据")
return index, result_row
@@ -152,6 +168,7 @@ class DifyCompareTest:
logging.error(f"处理第 {index + 1} 行数据时出错: {e}")
result_row = row.copy()
result_row["回答"] = ''
result_row["检索到的词条"] = ''
result_row["message_id"] = ''
return index, result_row
@@ -230,7 +247,7 @@ if __name__ == "__main__":
# 处理第一个文件
excel_files = [
# ("data/excel/5月.xlsx", "data/excel/5月问答对比.xlsx"),
("data/excel/第四轮问题-Part2.xlsx", "data/excel/第四轮问题-Part2-问答测试.xlsx")
("data/excel/7.30数据导出.xlsx", "data/excel/7.30数据导出_问答测试.xlsx")
]
for excel_path, save_path in excel_files: