移除DifyComparisonTester类中的析构函数,优化问题处理逻辑,增强错误处理和日志记录,调整返回结果结构以包含更多信息。

This commit is contained in:
2025-07-02 11:02:01 +08:00
parent f76f44640a
commit d811ae411f
+79 -67
View File
@@ -81,13 +81,6 @@ class DifyComparisonTester:
self.dify_tool = DifyTool()
def __del__(self):
"""
析构函数,在对象被销毁时自动关闭数据库连接。
确保在对象生命周期结束时释放数据库资源。
"""
self.dify_tool.close_connection()
def get_llm(self, **kwargs):
api_key = os.getenv("OPENAI_API_KEY")
base_url = os.getenv("OPENAI_API_BASE")
@@ -538,16 +531,71 @@ content: "{content}"
Returns:
dict: 包含问题、回答和评判结果的字典
"""
# 获取基本的问题和回答
future_old, future_new = self.process_question(q)
if future_new is None:
return None
try:
# 获取基本的问题和回答
future_old, future_new = self.process_question(q)
if future_new is None:
return None
# 如果是仅测试新流程模式
if self.mode == "new_only" or future_old is None:
query = future_new["问题"]
new_answer = future_new["新流程答案"]
# 获取词条链接和标准答案
wiki_url = self.find_wiki_link(row)
standard_answer = ""
answer_title = ""
try:
if wiki_url and not pd.isna(wiki_url):
standard_answer = self.get_wiki_content(wiki_url)
answer_title = self.get_wiki_title(wiki_url)
except Exception as e:
logging.error(f"处理问题 '{query}' 获取标准答案时发生错误: {str(e)}", exc_info=True)
# 判断答案正确性
judge_result = ""
if standard_answer:
# 调用LLM判断新答案是否正确
new_result = self.judge_answer(standard_answer, new_answer)
if new_result is not None:
judge_result = "正确" if new_result else "错误"
# 判断检索词条是否正确
retrieve_right = answer_title in future_new["新检索词条"]
retrieve_right_str = ("正确" if retrieve_right else "错误") if answer_title else ""
# 判断槽点是否缺失
slot_info = future_new["槽点信息"]
slot_info_data=None
if isinstance(slot_info, str):
slot_info_data = json.loads(slot_info)
else:
slot_info_data = slot_info
slot_missing = slot_info_data.get("slot_missing", None)
slot_missing_str = "完整" if not slot_missing else "缺失"
# 返回结果
return {
"问题": query,
"问题改写": future_new["新问题改写"],
"问题分类": future_new["新问题分类"],
"槽点信息": future_new["槽点信息"],
"槽点是否缺失": slot_missing_str,
"新流程答案": new_answer,
"回答是否正确": judge_result,
"检索是否正确": retrieve_right_str,
"答案词条": answer_title if answer_title else "",
"检索词条": future_new["新检索词条"],
}
# 如果是测试新流程模式
if self.mode == "new_only" or future_old is None:
query = future_new["问题"]
# 如果是测试新流程模式
if future_old is None:
return None
query = future_old["问题"]
old_answer = future_old["旧流程答案"]
new_answer = future_new["新流程答案"]
# 获取词条链接和标准答案
wiki_url = self.find_wiki_link(row)
standard_answer = ""
@@ -561,68 +609,32 @@ content: "{content}"
logging.error(f"处理问题 '{query}' 获取标准答案时发生错误: {str(e)}", exc_info=True)
# 判断答案正确性
judge_result = ""
if standard_answer:
# 调用LLM判断新答案是否正确
new_result = self.judge_answer(standard_answer, new_answer)
if new_result is not None:
judge_result = "正确" if new_result else "错误"
judge_result = self.judge_by_standard_answer(standard_answer, old_answer, new_answer)
else:
judge_result = self.judge_answer_diff(old_answer, new_answer)
if judge_result is None:
judge_result = ""
# 返回结果
return {
"问题": query,
"问题改写": future_new["新问题改写"],
"问题分类": future_new["问题分类"],
"问题改写": future_new["新问题改写"],
"问题改写": future_old["问题改写"],
"新问题分类": future_new["新问题分类"],
"槽点信息": future_new["槽点信息"],
"新流程答案": new_answer,
"回答是否正确": judge_result,
"旧流程答案": old_answer,
"回答判断": judge_result,
# "词条检索相似度": retrieve_title_score,
"答案词条": answer_title if answer_title else "",
"检索词条": future_new["新检索词条"],
"检索词条": future_new["新检索词条"],
"旧检索词条": future_old["旧检索词条"],
}
# 如果是测试新老流程模式
if future_old is None:
return None
query = future_old["问题"]
old_answer = future_old["旧流程答案"]
new_answer = future_new["新流程答案"]
# 获取词条链接和标准答案
wiki_url = self.find_wiki_link(row)
standard_answer = ""
answer_title = ""
try:
if wiki_url and not pd.isna(wiki_url):
standard_answer = self.get_wiki_content(wiki_url)
answer_title = self.get_wiki_title(wiki_url)
except Exception as e:
logging.error(f"处理问题 '{query}' 获取标准答案时发生错误: {str(e)}", exc_info=True)
# 判断答案正确性
if standard_answer:
judge_result = self.judge_by_standard_answer(standard_answer, old_answer, new_answer)
else:
judge_result = self.judge_answer_diff(old_answer, new_answer)
if judge_result is None:
judge_result = ""
# 返回结果
return {
"问题": query,
"新问题改写": future_new["新问题改写"],
"旧问题改写": future_old["旧问题改写"],
"新问题分类": future_new["新问题分类"],
"槽点信息": future_new["槽点信息"],
"新流程答案": new_answer,
"旧流程答案": old_answer,
"回答判断": judge_result,
# "词条检索相似度": retrieve_title_score,
"答案词条": answer_title if answer_title else "",
"新检索词条": future_new["新检索词条"],
"旧检索词条": future_old["旧检索词条"],
}
logging.error(f"处理问题 '{q}' 时发生错误: {str(e)}", exc_info=True)
return None
def run_comparison(self, with_judge=False):
"""