上传遗漏文件
This commit is contained in:
@@ -182,4 +182,4 @@ if __name__ == "__main__":
|
|||||||
workers=1 # 生产环境可以增加worker数量
|
workers=1 # 生产环境可以增加worker数量
|
||||||
)
|
)
|
||||||
# 生产环境可以使用以下命令启动:
|
# 生产环境可以使用以下命令启动:
|
||||||
# uvicorn rag2_0.dify.intent_recognition_api:app --host 0.0.0.0 --port 8001 --workers 10
|
# uvicorn rag2_0.api.intent_recognition_api:app --host 0.0.0.0 --port 8001 --workers 10
|
||||||
@@ -1,112 +0,0 @@
|
|||||||
import pandas as pd
|
|
||||||
import json
|
|
||||||
import sys
|
|
||||||
import os
|
|
||||||
sys.path.append(os.getcwd())
|
|
||||||
from rag2_0.dify.dify_tool import DifyTool
|
|
||||||
|
|
||||||
dify_tool = DifyTool()
|
|
||||||
|
|
||||||
df = pd.read_excel("data/excel/0714提问数据汇总(已分析)_软件.xlsx")
|
|
||||||
|
|
||||||
|
|
||||||
def get_rewrite_query(intent_node_execution_info)->str:
|
|
||||||
outputs_result =json.loads(intent_node_execution_info['outputs'])
|
|
||||||
return outputs_result['optimize_query']
|
|
||||||
|
|
||||||
def judge_error_node_and_reason(intent_node_execution_info, knowledge_filter_node_execution_info_list, answer_wiki_name)->dict:
|
|
||||||
result = {"问题改写结果":None, "错误环节":None, "错误原因":None, "具体描述":None}
|
|
||||||
if answer_wiki_name is None or pd.isna(answer_wiki_name):
|
|
||||||
return result
|
|
||||||
|
|
||||||
outputs_result =json.loads(intent_node_execution_info['outputs'])
|
|
||||||
result["问题改写结果"] = outputs_result['optimize_query']
|
|
||||||
if outputs_result['is_complete'] == False and outputs_result["has_slot_filling"] == True:
|
|
||||||
result["错误环节"] = "槽点填充"
|
|
||||||
result["错误原因"] = f"槽点缺失"
|
|
||||||
result["具体描述"] = f"缺失内容:{outputs_result['missing_slots']}"
|
|
||||||
return result
|
|
||||||
|
|
||||||
if len(knowledge_filter_node_execution_info_list) == 0:
|
|
||||||
return result
|
|
||||||
|
|
||||||
knowledge_filter_node_execution_info=knowledge_filter_node_execution_info_list[0]
|
|
||||||
# 获取检索到的所有词条
|
|
||||||
knowledge_filter_outputs = json.loads(knowledge_filter_node_execution_info['outputs'])
|
|
||||||
source_knowledge = knowledge_filter_outputs['source_kno']
|
|
||||||
source_knowledge_title ="\n".join([item['title'] for item in source_knowledge])
|
|
||||||
if answer_wiki_name not in source_knowledge_title:
|
|
||||||
result["错误环节"] = "知识检索"
|
|
||||||
result["错误原因"] = f"未检索到对应词条"
|
|
||||||
|
|
||||||
# 获取词条名称及对应评分
|
|
||||||
result["具体描述"] = "检索到的词条如下:\n"
|
|
||||||
for index, item in enumerate(source_knowledge):
|
|
||||||
result["具体描述"] += f"词条名称:{item['title'].split('/')[-1]},重排评分:{item['metadata']['score']:.2f}\n"
|
|
||||||
return result
|
|
||||||
|
|
||||||
# 获取检索到的词条的metadata
|
|
||||||
knowledge_filter = knowledge_filter_outputs['knowledge_list_metadata']
|
|
||||||
knowledge_filter_title ="\n".join([item['title'] for item in knowledge_filter])
|
|
||||||
if answer_wiki_name not in knowledge_filter_title:
|
|
||||||
result["错误环节"] = "知识过滤"
|
|
||||||
result["错误原因"] = f"词条被过滤"
|
|
||||||
result["具体描述"] = "检索到的词条如下:\n"
|
|
||||||
for index, item in enumerate(source_knowledge):
|
|
||||||
result["具体描述"] += f"词条名称:{item['title'].split('/')[-1]},重排评分:{item['metadata']['score']:.2f}\n"
|
|
||||||
return result
|
|
||||||
|
|
||||||
# 检索正确,回答错误
|
|
||||||
result["错误环节"] = "生成错误"
|
|
||||||
result["错误原因"] = f""
|
|
||||||
result["具体描述"] = f""
|
|
||||||
return result
|
|
||||||
|
|
||||||
df["问题改写结果"] = None
|
|
||||||
df["错误环节"] = None
|
|
||||||
df["错误原因"] = None
|
|
||||||
df["具体描述"] = None
|
|
||||||
|
|
||||||
for index, row in df.iterrows():
|
|
||||||
try:
|
|
||||||
msg_id = row["msg_id"]
|
|
||||||
answer = row["回答"]
|
|
||||||
query = row["提问"]
|
|
||||||
rating = row["评价"]
|
|
||||||
if rating != "dislike":
|
|
||||||
continue
|
|
||||||
class_type = row["问题分类"]
|
|
||||||
dislike_reason = row["点踩原因"]
|
|
||||||
if dislike_reason is None or pd.isna(dislike_reason):
|
|
||||||
continue
|
|
||||||
|
|
||||||
answer_wiki_name = row["关联词条"]
|
|
||||||
search_wiki = row["检索到的词条"]
|
|
||||||
msg_debug_info = dify_tool.get_message_debug_info_by_id(msg_id)
|
|
||||||
node_executions_info = msg_debug_info["workflow_node_executions_info"]
|
|
||||||
intent_node_execution_info = [node_execution_info for node_execution_info in node_executions_info
|
|
||||||
if node_execution_info["title"] == "意图识别结果解析"]
|
|
||||||
|
|
||||||
knowledge_filter_node_execution_info_list = [node_execution_info for node_execution_info in node_executions_info
|
|
||||||
if node_execution_info["title"] == "提取处理后的知识"]
|
|
||||||
if len(intent_node_execution_info) == 0:
|
|
||||||
print(f"msg_id: {msg_id} 缺少节点信息")
|
|
||||||
continue
|
|
||||||
|
|
||||||
rewrite_query = get_rewrite_query(intent_node_execution_info[0])
|
|
||||||
df.loc[index, "问题改写结果"] = rewrite_query
|
|
||||||
if "有词条" not in dislike_reason:
|
|
||||||
continue
|
|
||||||
result = judge_error_node_and_reason(intent_node_execution_info[0], knowledge_filter_node_execution_info_list, answer_wiki_name)
|
|
||||||
for key, value in result.items():
|
|
||||||
df.loc[index, key] = value
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
print(f"msg_id: {msg_id} 处理失败: {e}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
df.to_excel("data/excel/0714提问数据汇总(已分析)_软件_分析.xlsx", index=False)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
Reference in New Issue
Block a user