更新对话转工单处理逻辑,优化用户问题和解决方案提取功能,调整LLM参数,增强错误处理机制,改进多线程处理效率,更新测试用例以支持新功能,提升代码可读性和结构。
This commit is contained in:
@@ -17,6 +17,7 @@ from dotenv import load_dotenv
|
||||
from pydantic import BaseModel, Field
|
||||
from langchain.output_parsers import PydanticOutputParser
|
||||
from threading import Lock
|
||||
import sys
|
||||
|
||||
load_dotenv()
|
||||
|
||||
@@ -292,7 +293,7 @@ class DifyComparisonTester:
|
||||
llm = self.get_llm()
|
||||
try:
|
||||
response = llm.invoke(user_prompt=prompt, need_retry=True)
|
||||
return "缺乏标准答案无法判断准确性,但答案差异较大" if "存在较大差异" in response.content else "缺乏标准答案无法判断准确性,但答案基本相同"
|
||||
return "缺乏标准答案无法判断准确性,但答案基本相同" if "差异较小" in response.content else "缺乏标准答案无法判断准确性,但答案差异较大"
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
@@ -368,17 +369,29 @@ content: "{content}"
|
||||
valid_scores = 0
|
||||
retrieve_content = []
|
||||
|
||||
for result in outputs["result"]:
|
||||
content = result["content"].strip()
|
||||
score = self.calculate_score(query=query, content=content)
|
||||
if score != -1:
|
||||
max_score = max(max_score, score)
|
||||
min_score = min(min_score, score)
|
||||
total_score += score
|
||||
valid_scores += 1
|
||||
content_title = content.split("\n")[0]
|
||||
if content_title:
|
||||
retrieve_content.append(content_title + f"--得分({score}分)")
|
||||
# 使用线程池并发计算分数
|
||||
with ThreadPoolExecutor() as executor:
|
||||
# 创建任务列表
|
||||
future_to_content = {}
|
||||
for result in outputs["result"]:
|
||||
content = result["content"].strip()
|
||||
future = executor.submit(self.calculate_score, query=query, content=content)
|
||||
future_to_content[future] = content
|
||||
|
||||
# 收集结果
|
||||
for future in as_completed(future_to_content):
|
||||
content = future_to_content[future]
|
||||
score = future.result()
|
||||
content_title = content.split("\n")[0]
|
||||
|
||||
if score != -1:
|
||||
max_score = max(max_score, score)
|
||||
min_score = min(min_score, score)
|
||||
total_score += score
|
||||
valid_scores += 1
|
||||
|
||||
if content_title:
|
||||
retrieve_content.append(content_title + f"--得分({score}分)")
|
||||
|
||||
avg_score = total_score / valid_scores if valid_scores > 0 else 0
|
||||
return retrieve_content, max_score, min_score, avg_score
|
||||
@@ -394,6 +407,7 @@ content: "{content}"
|
||||
Returns:
|
||||
dict: 包含问题分类结果的字典
|
||||
"""
|
||||
retrieve_title=[]
|
||||
retrieve_content=[]
|
||||
max_score=0
|
||||
min_score=0
|
||||
@@ -401,12 +415,14 @@ content: "{content}"
|
||||
rewrite_query=""
|
||||
vertical_classification=""
|
||||
sub_classification=""
|
||||
slot_info=""
|
||||
try:
|
||||
new_message_info = DifyTool.get_message_debug_info_by_id(message_id=new_message_id)
|
||||
for workflow_node in new_message_info["workflow_node_executions_info"]:
|
||||
if workflow_node["title"] == "知识检索结果后处理":
|
||||
outputs = json.loads(workflow_node["outputs"])
|
||||
retrieve_content, max_score, min_score, avg_score = self.get_retrieve_info(query=query, outputs=outputs)
|
||||
retrieve_title, max_score, min_score, avg_score = self.get_retrieve_info(query=query, outputs=outputs)
|
||||
retrieve_content=outputs["result"]
|
||||
elif workflow_node["title"] == "问题优化结果解析":
|
||||
outputs = json.loads(workflow_node["outputs"])
|
||||
rewrite_query = outputs["optimize_query"]
|
||||
@@ -414,15 +430,15 @@ content: "{content}"
|
||||
json_result = json.loads(llm_result_json)
|
||||
vertical_classification = json_result['vertical_classification']
|
||||
sub_classification = json_result['sub_classification']
|
||||
slot_info=json.dumps(json_result["slot_filling"],ensure_ascii=False,indent=2)
|
||||
except Exception as e:
|
||||
return None
|
||||
return {
|
||||
"问题改写": rewrite_query,
|
||||
"检索词条": "\n".join(retrieve_content) if retrieve_content else "未检索知识库",
|
||||
"检索词条": "\n".join(retrieve_title) if retrieve_title else "未检索知识库",
|
||||
"检索内容": retrieve_content,
|
||||
"问题分类": f"{vertical_classification} - {sub_classification}",
|
||||
"检索最高分": max_score,
|
||||
"检索最低分": min_score,
|
||||
"检索平均分": avg_score
|
||||
"槽点信息":slot_info
|
||||
}
|
||||
|
||||
def get_old_workflow_info(self, query:str, old_message_id:str) -> dict:
|
||||
@@ -436,6 +452,7 @@ content: "{content}"
|
||||
Returns:
|
||||
dict: 包含问题分类结果的字典
|
||||
"""
|
||||
retrieve_title=[]
|
||||
retrieve_content=[]
|
||||
max_score=0
|
||||
min_score=0
|
||||
@@ -446,7 +463,8 @@ content: "{content}"
|
||||
for workflow_node in old_message_info["workflow_node_executions_info"]:
|
||||
if workflow_node["title"] == "知识检索结果后处理":
|
||||
outputs = json.loads(workflow_node["outputs"])
|
||||
retrieve_content, max_score, min_score, avg_score = self.get_retrieve_info(query=query, outputs=outputs)
|
||||
retrieve_title, max_score, min_score, avg_score = self.get_retrieve_info(query=query, outputs=outputs)
|
||||
retrieve_content=outputs["result"]
|
||||
elif workflow_node["title"] == "问题优化结果解析":
|
||||
outputs = json.loads(workflow_node["outputs"])
|
||||
rewrite_query = outputs["optimize_query"]
|
||||
@@ -454,12 +472,35 @@ content: "{content}"
|
||||
return None
|
||||
return {
|
||||
"问题改写": rewrite_query,
|
||||
"检索词条": "\n".join(retrieve_content) if retrieve_content else "未检索知识库",
|
||||
"检索最高分": max_score,
|
||||
"检索最低分": min_score,
|
||||
"检索平均分": avg_score
|
||||
"检索词条": "\n".join(retrieve_title) if retrieve_title else "未检索知识库",
|
||||
"检索内容": retrieve_content,
|
||||
}
|
||||
|
||||
def get_retrieve_title_similarity(self, old_retrieve_content:list[dict], new_retrieve_content:list[dict]) -> str:
|
||||
old_retrieve_content_list=[content["content"] for content in old_retrieve_content]
|
||||
new_retrieve_content_list=[content["content"] for content in new_retrieve_content]
|
||||
# 计算两个列表的交集
|
||||
intersection = set(old_retrieve_content_list).intersection(set(new_retrieve_content_list))
|
||||
|
||||
# 准备详细的比较结果
|
||||
intersection_count = len(intersection)
|
||||
old_count = len(old_retrieve_content_list)
|
||||
new_count = len(new_retrieve_content_list)
|
||||
|
||||
# 计算相似度 (Jaccard相似系数)
|
||||
if old_count == 0 and new_count == 0:
|
||||
similarity = 1.0 # 都为空时,认为完全相似
|
||||
elif old_count == 0 or new_count == 0:
|
||||
similarity = 0.0 # 一个为空时,认为完全不相似
|
||||
else:
|
||||
# 交集大小除以并集大小
|
||||
union_count = len(set(old_retrieve_content_list).union(set(new_retrieve_content_list)))
|
||||
similarity = intersection_count / union_count
|
||||
|
||||
similarity_percentage = round(similarity * 100, 2)
|
||||
result = f"{similarity_percentage}%"
|
||||
return result
|
||||
|
||||
def process_question_with_judge(self, q:str):
|
||||
"""
|
||||
处理单个问题,获取新旧流程的回答并进行评判
|
||||
@@ -511,16 +552,18 @@ content: "{content}"
|
||||
except Exception as e:
|
||||
print(f"处理问题 '{query}' 获取工作流信息时发生错误: {str(e)}")
|
||||
return None
|
||||
|
||||
retrieve_title_score=self.get_retrieve_title_similarity(old_retrieve_content=old_workflow_info["检索内容"], new_retrieve_content=new_workflow_info["检索内容"])
|
||||
# 返回结果
|
||||
return {
|
||||
"问题": query,
|
||||
"新问题改写": new_workflow_info["问题改写"],
|
||||
"旧问题改写": old_workflow_info["问题改写"],
|
||||
"新问题分类": new_workflow_info["问题分类"],
|
||||
"槽点信息":new_workflow_info["槽点信息"],
|
||||
"新流程答案": new_answer,
|
||||
"旧流程答案": old_answer,
|
||||
"回答判断": judge_result,
|
||||
"词条检索相似度": retrieve_title_score,
|
||||
"答案词条": answer_title if answer_title else "",
|
||||
"新检索词条": new_workflow_info["检索词条"],
|
||||
"旧检索词条": old_workflow_info["检索词条"],
|
||||
@@ -538,29 +581,44 @@ content: "{content}"
|
||||
"""
|
||||
# 读取Excel文件中的问题
|
||||
df = pd.read_excel(self.excel_path)
|
||||
questions = df['问题'].tolist()
|
||||
questions=[]
|
||||
for idx, row in df.iterrows():
|
||||
if row['回答中的软件名称'] == "未知":
|
||||
continue
|
||||
if row['提问中的软件名称'] != "未知":
|
||||
questions.append(row['提问'])
|
||||
questions.append(f"{row['回答中的软件名称']}, {row['提问']}")
|
||||
|
||||
results = []
|
||||
# 选择处理函数
|
||||
process_func = self.process_question_with_judge if with_judge else self.process_question
|
||||
|
||||
# 使用多线程并发处理问题
|
||||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||
# 创建进度条
|
||||
with tqdm(total=len(questions), desc="处理问题进度") as pbar:
|
||||
# 提交所有任务
|
||||
futures = []
|
||||
for q in questions:
|
||||
future = executor.submit(process_func, q)
|
||||
futures.append(future)
|
||||
|
||||
# 处理结果
|
||||
for future in as_completed(futures):
|
||||
result = future.result()
|
||||
if result is not None:
|
||||
with self.results_lock:
|
||||
results.append(result)
|
||||
pbar.update(1)
|
||||
|
||||
is_debug = hasattr(sys, 'gettrace') and sys.gettrace() is not None
|
||||
if not is_debug:
|
||||
# 使用多线程并发处理问题
|
||||
print("并发数量: ", self.max_workers)
|
||||
print("问题数量: ", len(questions))
|
||||
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
|
||||
# 创建进度条
|
||||
with tqdm(total=len(questions), desc="处理问题进度") as pbar:
|
||||
# 提交所有任务
|
||||
futures = []
|
||||
for q in questions:
|
||||
future = executor.submit(process_func, q)
|
||||
futures.append(future)
|
||||
|
||||
# 处理结果
|
||||
for future in as_completed(futures):
|
||||
result = future.result()
|
||||
if result is not None:
|
||||
with self.results_lock:
|
||||
results.append(result)
|
||||
pbar.update(1)
|
||||
else:
|
||||
for q in questions:
|
||||
result = process_func(q)
|
||||
print(json.dumps(result,ensure_ascii=False,indent=2))
|
||||
if result is not None:
|
||||
results.append(result)
|
||||
# 生成输出Excel文件
|
||||
out_path = self.output_path if with_judge else os.path.join(os.path.dirname(self.excel_path), "dify问答_对比结果.xlsx")
|
||||
df_results = pd.DataFrame(results)
|
||||
@@ -583,7 +641,7 @@ content: "{content}"
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 定义Excel路径
|
||||
excel_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".." ,"data/excel/400条答案差异的.xlsx")
|
||||
excel_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".." ,"data/excel/历史提问数据(like)_提问明确.xlsx")
|
||||
|
||||
if not os.path.exists(excel_path):
|
||||
print(f"错误:Excel文件不存在: {excel_path}")
|
||||
@@ -592,7 +650,7 @@ if __name__ == "__main__":
|
||||
# Dify API配置
|
||||
baseurl = "http://172.20.0.145/v1"
|
||||
old_workflow_api_key = "app-wUdkWJx5zeOvmvBUZizMoSw3"
|
||||
new_workflow_api_key = "app-Lf1pQ1NVwdMfCRVNTBCOTPHT"
|
||||
new_workflow_api_key = "app-qxsSybCs7ABiKlC1JabTYVn6"
|
||||
|
||||
# Wiki Excel路径和Dify应用ID(用于评判)
|
||||
wiki_excel_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".." ,"data/excel/部分提问_软件名称明确.xlsx")
|
||||
@@ -604,6 +662,7 @@ if __name__ == "__main__":
|
||||
old_workflow_api_key=old_workflow_api_key,
|
||||
new_workflow_api_key=new_workflow_api_key,
|
||||
wiki_excel_path=wiki_excel_path,
|
||||
max_workers=5
|
||||
)
|
||||
|
||||
# 运行对比测试(带评判)
|
||||
|
||||
Reference in New Issue
Block a user