移除DifyComparisonTester类中的析构函数,优化问题处理逻辑,增强错误处理和日志记录,调整返回结果结构以包含更多信息。
This commit is contained in:
@@ -81,13 +81,6 @@ class DifyComparisonTester:
|
||||
|
||||
self.dify_tool = DifyTool()
|
||||
|
||||
def __del__(self):
|
||||
"""
|
||||
析构函数,在对象被销毁时自动关闭数据库连接。
|
||||
确保在对象生命周期结束时释放数据库资源。
|
||||
"""
|
||||
self.dify_tool.close_connection()
|
||||
|
||||
def get_llm(self, **kwargs):
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
base_url = os.getenv("OPENAI_API_BASE")
|
||||
@@ -538,16 +531,71 @@ content: "{content}"
|
||||
Returns:
|
||||
dict: 包含问题、回答和评判结果的字典
|
||||
"""
|
||||
# 获取基本的问题和回答
|
||||
future_old, future_new = self.process_question(q)
|
||||
if future_new is None:
|
||||
return None
|
||||
try:
|
||||
# 获取基本的问题和回答
|
||||
future_old, future_new = self.process_question(q)
|
||||
if future_new is None:
|
||||
return None
|
||||
|
||||
# 如果是仅测试新流程模式
|
||||
if self.mode == "new_only" or future_old is None:
|
||||
query = future_new["问题"]
|
||||
new_answer = future_new["新流程答案"]
|
||||
|
||||
# 获取词条链接和标准答案
|
||||
wiki_url = self.find_wiki_link(row)
|
||||
standard_answer = ""
|
||||
answer_title = ""
|
||||
|
||||
try:
|
||||
if wiki_url and not pd.isna(wiki_url):
|
||||
standard_answer = self.get_wiki_content(wiki_url)
|
||||
answer_title = self.get_wiki_title(wiki_url)
|
||||
except Exception as e:
|
||||
logging.error(f"处理问题 '{query}' 获取标准答案时发生错误: {str(e)}", exc_info=True)
|
||||
|
||||
# 判断答案正确性
|
||||
judge_result = ""
|
||||
if standard_answer:
|
||||
# 调用LLM判断新答案是否正确
|
||||
new_result = self.judge_answer(standard_answer, new_answer)
|
||||
if new_result is not None:
|
||||
judge_result = "正确" if new_result else "错误"
|
||||
|
||||
# 判断检索词条是否正确
|
||||
retrieve_right = answer_title in future_new["新检索词条"]
|
||||
retrieve_right_str = ("正确" if retrieve_right else "错误") if answer_title else ""
|
||||
# 判断槽点是否缺失
|
||||
slot_info = future_new["槽点信息"]
|
||||
slot_info_data=None
|
||||
if isinstance(slot_info, str):
|
||||
slot_info_data = json.loads(slot_info)
|
||||
else:
|
||||
slot_info_data = slot_info
|
||||
slot_missing = slot_info_data.get("slot_missing", None)
|
||||
slot_missing_str = "完整" if not slot_missing else "缺失"
|
||||
|
||||
# 返回结果
|
||||
return {
|
||||
"问题": query,
|
||||
"问题改写": future_new["新问题改写"],
|
||||
"问题分类": future_new["新问题分类"],
|
||||
"槽点信息": future_new["槽点信息"],
|
||||
"槽点是否缺失": slot_missing_str,
|
||||
"新流程答案": new_answer,
|
||||
"回答是否正确": judge_result,
|
||||
"检索是否正确": retrieve_right_str,
|
||||
"答案词条": answer_title if answer_title else "",
|
||||
"检索词条": future_new["新检索词条"],
|
||||
}
|
||||
|
||||
# 如果是仅测试新流程模式
|
||||
if self.mode == "new_only" or future_old is None:
|
||||
query = future_new["问题"]
|
||||
# 如果是测试新老流程模式
|
||||
if future_old is None:
|
||||
return None
|
||||
query = future_old["问题"]
|
||||
old_answer = future_old["旧流程答案"]
|
||||
new_answer = future_new["新流程答案"]
|
||||
|
||||
|
||||
# 获取词条链接和标准答案
|
||||
wiki_url = self.find_wiki_link(row)
|
||||
standard_answer = ""
|
||||
@@ -561,68 +609,32 @@ content: "{content}"
|
||||
logging.error(f"处理问题 '{query}' 获取标准答案时发生错误: {str(e)}", exc_info=True)
|
||||
|
||||
# 判断答案正确性
|
||||
judge_result = ""
|
||||
if standard_answer:
|
||||
# 调用LLM判断新答案是否正确
|
||||
new_result = self.judge_answer(standard_answer, new_answer)
|
||||
if new_result is not None:
|
||||
judge_result = "正确" if new_result else "错误"
|
||||
judge_result = self.judge_by_standard_answer(standard_answer, old_answer, new_answer)
|
||||
else:
|
||||
judge_result = self.judge_answer_diff(old_answer, new_answer)
|
||||
|
||||
if judge_result is None:
|
||||
judge_result = ""
|
||||
|
||||
# 返回结果
|
||||
return {
|
||||
"问题": query,
|
||||
"问题改写": future_new["新问题改写"],
|
||||
"问题分类": future_new["新问题分类"],
|
||||
"新问题改写": future_new["新问题改写"],
|
||||
"旧问题改写": future_old["旧问题改写"],
|
||||
"新问题分类": future_new["新问题分类"],
|
||||
"槽点信息": future_new["槽点信息"],
|
||||
"新流程答案": new_answer,
|
||||
"回答是否正确": judge_result,
|
||||
"旧流程答案": old_answer,
|
||||
"回答判断": judge_result,
|
||||
# "词条检索相似度": retrieve_title_score,
|
||||
"答案词条": answer_title if answer_title else "",
|
||||
"检索词条": future_new["新检索词条"],
|
||||
"新检索词条": future_new["新检索词条"],
|
||||
"旧检索词条": future_old["旧检索词条"],
|
||||
}
|
||||
|
||||
# 如果是测试新老流程模式
|
||||
if future_old is None:
|
||||
return None
|
||||
query = future_old["问题"]
|
||||
old_answer = future_old["旧流程答案"]
|
||||
new_answer = future_new["新流程答案"]
|
||||
|
||||
# 获取词条链接和标准答案
|
||||
wiki_url = self.find_wiki_link(row)
|
||||
standard_answer = ""
|
||||
answer_title = ""
|
||||
|
||||
try:
|
||||
if wiki_url and not pd.isna(wiki_url):
|
||||
standard_answer = self.get_wiki_content(wiki_url)
|
||||
answer_title = self.get_wiki_title(wiki_url)
|
||||
except Exception as e:
|
||||
logging.error(f"处理问题 '{query}' 获取标准答案时发生错误: {str(e)}", exc_info=True)
|
||||
|
||||
# 判断答案正确性
|
||||
if standard_answer:
|
||||
judge_result = self.judge_by_standard_answer(standard_answer, old_answer, new_answer)
|
||||
else:
|
||||
judge_result = self.judge_answer_diff(old_answer, new_answer)
|
||||
|
||||
if judge_result is None:
|
||||
judge_result = ""
|
||||
|
||||
# 返回结果
|
||||
return {
|
||||
"问题": query,
|
||||
"新问题改写": future_new["新问题改写"],
|
||||
"旧问题改写": future_old["旧问题改写"],
|
||||
"新问题分类": future_new["新问题分类"],
|
||||
"槽点信息": future_new["槽点信息"],
|
||||
"新流程答案": new_answer,
|
||||
"旧流程答案": old_answer,
|
||||
"回答判断": judge_result,
|
||||
# "词条检索相似度": retrieve_title_score,
|
||||
"答案词条": answer_title if answer_title else "",
|
||||
"新检索词条": future_new["新检索词条"],
|
||||
"旧检索词条": future_old["旧检索词条"],
|
||||
}
|
||||
logging.error(f"处理问题 '{q}' 时发生错误: {str(e)}", exc_info=True)
|
||||
return None
|
||||
|
||||
def run_comparison(self, with_judge=False):
|
||||
"""
|
||||
|
||||
Reference in New Issue
Block a user