diff --git a/rag2_0/api/intent_recognition_api.py b/rag2_0/api/intent_recognition_api.py index c0687f6..f88aea7 100755 --- a/rag2_0/api/intent_recognition_api.py +++ b/rag2_0/api/intent_recognition_api.py @@ -182,4 +182,4 @@ if __name__ == "__main__": workers=1 # 生产环境可以增加worker数量 ) # 生产环境可以使用以下命令启动: - # uvicorn rag2_0.dify.intent_recognition_api:app --host 0.0.0.0 --port 8001 --workers 10 \ No newline at end of file + # uvicorn rag2_0.api.intent_recognition_api:app --host 0.0.0.0 --port 8001 --workers 10 \ No newline at end of file diff --git a/rag2_0/dify/AnalysisDifyAnswer.py b/rag2_0/dify/AnalysisDifyAnswer.py deleted file mode 100644 index 1194eac..0000000 --- a/rag2_0/dify/AnalysisDifyAnswer.py +++ /dev/null @@ -1,112 +0,0 @@ -import pandas as pd -import json -import sys -import os -sys.path.append(os.getcwd()) -from rag2_0.dify.dify_tool import DifyTool - -dify_tool = DifyTool() - -df = pd.read_excel("data/excel/0714提问数据汇总(已分析)_软件.xlsx") - - -def get_rewrite_query(intent_node_execution_info)->str: - outputs_result =json.loads(intent_node_execution_info['outputs']) - return outputs_result['optimize_query'] - -def judge_error_node_and_reason(intent_node_execution_info, knowledge_filter_node_execution_info_list, answer_wiki_name)->dict: - result = {"问题改写结果":None, "错误环节":None, "错误原因":None, "具体描述":None} - if answer_wiki_name is None or pd.isna(answer_wiki_name): - return result - - outputs_result =json.loads(intent_node_execution_info['outputs']) - result["问题改写结果"] = outputs_result['optimize_query'] - if outputs_result['is_complete'] == False and outputs_result["has_slot_filling"] == True: - result["错误环节"] = "槽点填充" - result["错误原因"] = f"槽点缺失" - result["具体描述"] = f"缺失内容:{outputs_result['missing_slots']}" - return result - - if len(knowledge_filter_node_execution_info_list) == 0: - return result - - knowledge_filter_node_execution_info=knowledge_filter_node_execution_info_list[0] - # 获取检索到的所有词条 - knowledge_filter_outputs = json.loads(knowledge_filter_node_execution_info['outputs']) - source_knowledge = knowledge_filter_outputs['source_kno'] - source_knowledge_title ="\n".join([item['title'] for item in source_knowledge]) - if answer_wiki_name not in source_knowledge_title: - result["错误环节"] = "知识检索" - result["错误原因"] = f"未检索到对应词条" - - # 获取词条名称及对应评分 - result["具体描述"] = "检索到的词条如下:\n" - for index, item in enumerate(source_knowledge): - result["具体描述"] += f"词条名称:{item['title'].split('/')[-1]},重排评分:{item['metadata']['score']:.2f}\n" - return result - - # 获取检索到的词条的metadata - knowledge_filter = knowledge_filter_outputs['knowledge_list_metadata'] - knowledge_filter_title ="\n".join([item['title'] for item in knowledge_filter]) - if answer_wiki_name not in knowledge_filter_title: - result["错误环节"] = "知识过滤" - result["错误原因"] = f"词条被过滤" - result["具体描述"] = "检索到的词条如下:\n" - for index, item in enumerate(source_knowledge): - result["具体描述"] += f"词条名称:{item['title'].split('/')[-1]},重排评分:{item['metadata']['score']:.2f}\n" - return result - - # 检索正确,回答错误 - result["错误环节"] = "生成错误" - result["错误原因"] = f"" - result["具体描述"] = f"" - return result - -df["问题改写结果"] = None -df["错误环节"] = None -df["错误原因"] = None -df["具体描述"] = None - -for index, row in df.iterrows(): - try: - msg_id = row["msg_id"] - answer = row["回答"] - query = row["提问"] - rating = row["评价"] - if rating != "dislike": - continue - class_type = row["问题分类"] - dislike_reason = row["点踩原因"] - if dislike_reason is None or pd.isna(dislike_reason): - continue - - answer_wiki_name = row["关联词条"] - search_wiki = row["检索到的词条"] - msg_debug_info = dify_tool.get_message_debug_info_by_id(msg_id) - node_executions_info = msg_debug_info["workflow_node_executions_info"] - intent_node_execution_info = [node_execution_info for node_execution_info in node_executions_info - if node_execution_info["title"] == "意图识别结果解析"] - - knowledge_filter_node_execution_info_list = [node_execution_info for node_execution_info in node_executions_info - if node_execution_info["title"] == "提取处理后的知识"] - if len(intent_node_execution_info) == 0: - print(f"msg_id: {msg_id} 缺少节点信息") - continue - - rewrite_query = get_rewrite_query(intent_node_execution_info[0]) - df.loc[index, "问题改写结果"] = rewrite_query - if "有词条" not in dislike_reason: - continue - result = judge_error_node_and_reason(intent_node_execution_info[0], knowledge_filter_node_execution_info_list, answer_wiki_name) - for key, value in result.items(): - df.loc[index, key] = value - - except Exception as e: - print(f"msg_id: {msg_id} 处理失败: {e}") - continue - -df.to_excel("data/excel/0714提问数据汇总(已分析)_软件_分析.xlsx", index=False) - - - -