diff --git a/rag2_0/dify/DifyCompareTest.py b/rag2_0/dify/DifyCompareTest.py index b0c1070..9823ed1 100755 --- a/rag2_0/dify/DifyCompareTest.py +++ b/rag2_0/dify/DifyCompareTest.py @@ -81,13 +81,6 @@ class DifyComparisonTester: self.dify_tool = DifyTool() - def __del__(self): - """ - 析构函数,在对象被销毁时自动关闭数据库连接。 - 确保在对象生命周期结束时释放数据库资源。 - """ - self.dify_tool.close_connection() - def get_llm(self, **kwargs): api_key = os.getenv("OPENAI_API_KEY") base_url = os.getenv("OPENAI_API_BASE") @@ -538,16 +531,71 @@ content: "{content}" Returns: dict: 包含问题、回答和评判结果的字典 """ - # 获取基本的问题和回答 - future_old, future_new = self.process_question(q) - if future_new is None: - return None + try: + # 获取基本的问题和回答 + future_old, future_new = self.process_question(q) + if future_new is None: + return None + + # 如果是仅测试新流程模式 + if self.mode == "new_only" or future_old is None: + query = future_new["问题"] + new_answer = future_new["新流程答案"] + + # 获取词条链接和标准答案 + wiki_url = self.find_wiki_link(row) + standard_answer = "" + answer_title = "" + + try: + if wiki_url and not pd.isna(wiki_url): + standard_answer = self.get_wiki_content(wiki_url) + answer_title = self.get_wiki_title(wiki_url) + except Exception as e: + logging.error(f"处理问题 '{query}' 获取标准答案时发生错误: {str(e)}", exc_info=True) + + # 判断答案正确性 + judge_result = "" + if standard_answer: + # 调用LLM判断新答案是否正确 + new_result = self.judge_answer(standard_answer, new_answer) + if new_result is not None: + judge_result = "正确" if new_result else "错误" + + # 判断检索词条是否正确 + retrieve_right = answer_title in future_new["新检索词条"] + retrieve_right_str = ("正确" if retrieve_right else "错误") if answer_title else "" + # 判断槽点是否缺失 + slot_info = future_new["槽点信息"] + slot_info_data=None + if isinstance(slot_info, str): + slot_info_data = json.loads(slot_info) + else: + slot_info_data = slot_info + slot_missing = slot_info_data.get("slot_missing", None) + slot_missing_str = "完整" if not slot_missing else "缺失" + + # 返回结果 + return { + "问题": query, + "问题改写": future_new["新问题改写"], + "问题分类": future_new["新问题分类"], + "槽点信息": future_new["槽点信息"], + "槽点是否缺失": slot_missing_str, + "新流程答案": new_answer, + "回答是否正确": judge_result, + "检索是否正确": retrieve_right_str, + "答案词条": answer_title if answer_title else "", + "检索词条": future_new["新检索词条"], + } - # 如果是仅测试新流程模式 - if self.mode == "new_only" or future_old is None: - query = future_new["问题"] + # 如果是测试新老流程模式 + if future_old is None: + return None + query = future_old["问题"] + old_answer = future_old["旧流程答案"] new_answer = future_new["新流程答案"] - + # 获取词条链接和标准答案 wiki_url = self.find_wiki_link(row) standard_answer = "" @@ -561,68 +609,32 @@ content: "{content}" logging.error(f"处理问题 '{query}' 获取标准答案时发生错误: {str(e)}", exc_info=True) # 判断答案正确性 - judge_result = "" if standard_answer: - # 调用LLM判断新答案是否正确 - new_result = self.judge_answer(standard_answer, new_answer) - if new_result is not None: - judge_result = "正确" if new_result else "错误" + judge_result = self.judge_by_standard_answer(standard_answer, old_answer, new_answer) + else: + judge_result = self.judge_answer_diff(old_answer, new_answer) + + if judge_result is None: + judge_result = "" # 返回结果 return { "问题": query, - "问题改写": future_new["新问题改写"], - "问题分类": future_new["新问题分类"], + "新问题改写": future_new["新问题改写"], + "旧问题改写": future_old["旧问题改写"], + "新问题分类": future_new["新问题分类"], "槽点信息": future_new["槽点信息"], "新流程答案": new_answer, - "回答是否正确": judge_result, + "旧流程答案": old_answer, + "回答判断": judge_result, + # "词条检索相似度": retrieve_title_score, "答案词条": answer_title if answer_title else "", - "检索词条": future_new["新检索词条"], + "新检索词条": future_new["新检索词条"], + "旧检索词条": future_old["旧检索词条"], } - - # 如果是测试新老流程模式 - if future_old is None: - return None - query = future_old["问题"] - old_answer = future_old["旧流程答案"] - new_answer = future_new["新流程答案"] - - # 获取词条链接和标准答案 - wiki_url = self.find_wiki_link(row) - standard_answer = "" - answer_title = "" - - try: - if wiki_url and not pd.isna(wiki_url): - standard_answer = self.get_wiki_content(wiki_url) - answer_title = self.get_wiki_title(wiki_url) except Exception as e: - logging.error(f"处理问题 '{query}' 获取标准答案时发生错误: {str(e)}", exc_info=True) - - # 判断答案正确性 - if standard_answer: - judge_result = self.judge_by_standard_answer(standard_answer, old_answer, new_answer) - else: - judge_result = self.judge_answer_diff(old_answer, new_answer) - - if judge_result is None: - judge_result = "" - - # 返回结果 - return { - "问题": query, - "新问题改写": future_new["新问题改写"], - "旧问题改写": future_old["旧问题改写"], - "新问题分类": future_new["新问题分类"], - "槽点信息": future_new["槽点信息"], - "新流程答案": new_answer, - "旧流程答案": old_answer, - "回答判断": judge_result, - # "词条检索相似度": retrieve_title_score, - "答案词条": answer_title if answer_title else "", - "新检索词条": future_new["新检索词条"], - "旧检索词条": future_old["旧检索词条"], - } + logging.error(f"处理问题 '{q}' 时发生错误: {str(e)}", exc_info=True) + return None def run_comparison(self, with_judge=False): """