删除合并脚本,添加会话转工单脚本逻辑

This commit is contained in:
2025-05-28 10:00:33 +08:00
parent ce27ddd823
commit 6270513688
7 changed files with 955 additions and 785 deletions
+462
View File
@@ -0,0 +1,462 @@
"""
综合评判工具
此模块结合了答案正确性评判和检索内容相关性评分功能,可以同时:
1. 评判问题的新旧回答是否正确
2. 比较新旧回答的差异
3. 评估检索内容与问题的相关性
用法示例:
judge = CombinedJudge()
judge.process()
"""
import pandas as pd
from urllib.parse import unquote
from rag2_0.tool.WikijsTool import WikijsTool
from rag2_0.tool.html_to_md import convert_html_to_md
from rag2_0.tool.ModelTool import OpenAiLLM
from dotenv import load_dotenv
import os
from tqdm import tqdm
from rag2_0.dify.dify_tool import DifyTool
import json
from pydantic import BaseModel, Field
from langchain.output_parsers import PydanticOutputParser
import concurrent.futures
from threading import Lock
load_dotenv()
class ContentSource(BaseModel):
score:int = Field(description="相关性分数")
reason:str = Field(description="评分理由")
class CombinedJudge:
"""
综合评判工具类
结合了答案正确性评判和检索内容相关性评分功能
"""
def __init__(self, wiki_excel_path="/data/QueryRewrite/data/excel/部分提问_软件名称明确.xlsx",
answer_excel_path="/data/QueryRewrite/data/excel/dify问答_对比结果.xlsx",
output_path="/data/QueryRewrite/data/excel/dify问答__综合评判结果.xlsx",
dify_appid="ccf92b97-2789-4a3f-90e0-135a869a37c5",
max_workers=10):
"""
初始化综合评判工具
参数:
wiki_excel_path (str): Wiki Excel文件路径
answer_excel_path (str): 答案对比Excel文件路径
output_path (str): 输出Excel文件路径
dify_appid (str): Dify应用ID
max_workers (int): 最大工作线程数
"""
self.wiki_excel_path = wiki_excel_path
self.answer_excel_path = answer_excel_path
self.output_path = output_path
self.dify_appid = dify_appid
self.max_workers = max_workers
self.content_source_parser = PydanticOutputParser(pydantic_object=ContentSource)
self.results_lock = Lock()
# 读取Excel文件
if os.path.exists(wiki_excel_path):
self.wiki_excel = pd.read_excel(self.wiki_excel_path)
else:
self.wiki_excel = None
self.answer_excel = pd.read_excel(self.answer_excel_path)
# 初始化LLM
self.api_key = os.getenv("OPENAI_API_KEY")
self.base_url = os.getenv("OPENAI_API_BASE")
self.model = os.getenv("LLM_MODEL_NAME")
if not all([self.api_key, self.base_url, self.model]):
raise ValueError("请设置 OPENAI_API_KEY, OPENAI_API_BASE, 和 LLM_MODEL_NAME 环境变量")
self.llm = OpenAiLLM(api_key=self.api_key, base_url=self.base_url, model=self.model)
def find_wiki_link(self, query) -> str | None:
"""
根据查询(对应wiki_excel中的新提问列)找出对应的词条链接
参数:
query (str): 查询内容,对应wiki_excel中的新提问列
返回:
str: 对应的词条链接,如果没有找到则返回None
"""
# 确保query不为空
if not query or pd.isna(query):
return None
if self.wiki_excel is None:
return None
# 在"新提问"列中查找匹配的行
matched_rows = self.wiki_excel[self.wiki_excel['新提问'] == query]
# 如果找到了匹配的行,返回对应的词条链接
if not matched_rows.empty:
return matched_rows.iloc[0]['对应词条链接']
# 如果没有完全匹配,尝试部分匹配
# 去除软件名称部分(如果有)
query_parts = query.split(',', 1)
if len(query_parts) > 1:
clean_query = query_parts[1].strip()
# 在"提问"列中查找包含清理后查询的行
for idx, row in self.wiki_excel.iterrows():
if pd.notna(row['提问']) and clean_query in row['提问']:
return row['对应词条链接']
return None
def get_wiki_content(self, link) -> str:
"""
获取词条链接的内容
参数:
link (str): 词条链接
返回:
str: 链接内容,如果获取失败则返回错误信息
"""
try:
if not link or pd.isna(link):
return "链接为空或无效"
# 移除域名部分,只保留路径
path = link.split('/', 3)[-1]
decoded_path = unquote(path)
path_parts = decoded_path.split('/')
doc_path = "/".join(path_parts[1:])
wiki_doc = WikijsTool.get_all_doc_by_path(path=doc_path, path_is_dir=False)
html_content = WikijsTool.query_doc_info(wiki_doc[0]["id"]).get('content')
if not html_content:
return "获取内容失败"
options = {"heading_style": '', "keep_inline_images_in": ["figure", "img"], "escape_asterisks": True}
new_content = (html_content.replace("h6>", "h7>")
.replace("h5>", "h6>")
.replace("h4>", "h5>")
.replace("h3>", "h4>")
.replace("h2>", "h3>")
.replace("h1>", "h2>"))
# 将HTML内容转换为Markdown
markdown_content = convert_html_to_md(new_content, "", **options)
markdown_content = f"# {path_parts[-1]}\n\n{markdown_content}"
return markdown_content
except Exception as e:
raise RuntimeError(f"获取词条内容失败: {str(e)}") from e
def get_wiki_title(self, link) -> str | None:
"""
获取词条标题
参数:
link (str): 词条链接
返回:
str: 词条标题,如果获取失败则返回None
"""
try:
if not link or pd.isna(link):
return None
# 移除域名部分,只保留路径
path = link.split('/', 3)[-1]
decoded_path = unquote(path)
path_parts = decoded_path.split('/')
return path_parts[-1]
except Exception as e:
raise RuntimeError(f"获取词条内容失败: {str(e)}") from e
def create_correctness_prompt(self, standard_answer: str, answer_to_check: str) -> str:
"""
创建用于评判答案正确性的prompt
参数:
standard_answer (str): 标准答案
answer_to_check (str): 需要检查的答案
返回:
str: 格式化的prompt
"""
return f"""请作为一个专业的答案评判专家,评估以下回答与标准答案的匹配程度。
标准答案:
{standard_answer}
待评估的回答:
{answer_to_check}
请仔细分析两个答案的内容,并给出你的判断。只需要回答"正确""错误",不需要其他解释。
如果待评估的回答与标准答案在核心内容和关键信息(步骤)上一致,即使表达方式不同,也应判定为"正确"
如果待评估的回答存在明显的错误信息或重要信息缺失,应判定为"错误"
请严格按以下格式输出:【正确】或【错误】:"""
def judge_answer(self, standard_answer: str, answer: str) -> bool | None:
"""
调用LLM判断回答是否正确
参数:
standard_answer (str): 标准答案(来自Wiki
answer (str): 需评判的回答
返回:
bool | None: 判断结果,True表示正确,False表示错误,None表示判断失败
"""
prompt = self.create_correctness_prompt(standard_answer, answer)
try:
response = self.llm.invoke(user_prompt=prompt, need_retry=True)
return "正确" in response.content
except Exception as e:
return None
def judge_by_standard_answer(self, standard_answer: str, old_answer: str, new_answer: str) -> str | None:
"""
综合判断新旧回答的正确性
参数:
standard_answer (str): 标准答案(来自Wiki
old_answer (str): 旧流程的回答
new_answer (str): 新流程的回答
返回:
str | None: 包含新旧回答判断结果的字符串,None表示判断失败
"""
old_result = self.judge_answer(standard_answer, old_answer)
new_result = self.judge_answer(standard_answer, new_answer)
if old_result is None or new_result is None:
return None
if new_result and old_result:
return "新旧答案均正确"
elif new_result and not old_result:
return "新答案正确"
elif not new_result and old_result:
return "旧答案正确"
else:
return "新旧答案均错误"
def judge_answer_diff(self, old_answer: str, new_answer: str) -> str | None:
"""
判断新旧回答是否存在较大差异
参数:
old_answer (str): 旧流程的回答
new_answer (str): 新流程的回答
返回:
str | None: 差异判断结果,None表示判断失败
"""
prompt = f"""请判断以下两个回答是否存在较大差异:
旧回答: {old_answer}
新回答: {new_answer}
主要是主要步骤、主要信息、或者主要主体的差异
请仅回答"存在较大差异""差异较小""""
try:
response = self.llm.invoke(user_prompt=prompt, need_retry=True)
return "缺乏标准答案无法判断准确性,但答案差异较大" if "存在较大差异" in response.content else "缺乏标准答案无法判断准确性,但答案基本相同"
except Exception as e:
return None
def calculate_score(self, query:str, content:str) -> int:
"""
使用LLM判断query与content之间的相关性分数
参数:
query (str): 用户问题
content (str): 检索内容
返回:
int: 相关性分数,1-10分,10代表完全相关,1代表完全不相关;-1表示评分失败
"""
try:
prompt = f"""你是一个专业的信息相关性评估助手。请根据以下标准对用户query和检索内容的相关性进行1-10评分(10=完全相关,1=完全不相关),并按指定格式输出JSON结果。
【评分标准】
10分:完全契合,主题/意图完全一致且涵盖所有关键信息
8-9分:高度相关,核心要素匹配但存在少量信息缺失
6-7分:部分相关,涉及相同主题但存在重要信息缺失
4-5分:弱相关,仅次要信息点匹配
1-3分:完全不相关或信息冲突
【评估维度】
1. 主题一致性:核心主题/意图的匹配程度
2. 内容覆盖度:是否涵盖query的关键要素
3. 信息准确性:是否存在矛盾/错误信息
4. 细节丰富度:是否提供query要求的详细信息
【输出格式】
{{
"score": 评分,
"reason": "简明扼要的评分理由(中文)"
}}
【示例】
query: "新冠疫苗的常见副作用"
内容: "辉瑞疫苗常见反应包括注射部位疼痛(84.1%)、疲劳(62.9%)"
输出: {{"score":8,"reason":"主题完全匹配,涵盖主要副作用但未提及发热等常见反应"}}
现在评估:
query: "{query}"
content: "{content}"
"""
response = self.llm.invoke(user_prompt=prompt, need_retry=True)
# 解析JSON响应
try:
parsed_output = self.content_source_parser.parse(response.content)
return parsed_output.score
except Exception as e:
return -1
except Exception as e:
return -1
def get_retrieve_info(self, query:str, outputs:dict) -> tuple:
"""
获取检索信息并计算分数
参数:
query (str): 用户问题
outputs (dict): 检索输出结果
返回:
tuple: (检索内容列表, 最高分, 最低分, 平均分)
"""
max_score = 0
min_score = 10
total_score = 0
valid_scores = 0
retrieve_content = []
for result in outputs["result"]:
content = result["content"].strip()
score = self.calculate_score(query=query, content=content)
if score != -1:
max_score = max(max_score, score)
min_score = min(min_score, score)
total_score += score
valid_scores += 1
content_title = content.split("\n")[0]
if content_title:
retrieve_content.append(content_title + f"--得分({score}分)")
avg_score = total_score / valid_scores if valid_scores > 0 else 0
return retrieve_content, max_score, min_score, avg_score
def process_single_question(self, row):
"""
处理单个问题的评判
参数:
row: DataFrame中的一行数据
返回:
dict: 包含处理结果的字典
"""
query = row["问题"]
old_answer = row["旧流程答案"]
new_answer = row["新流程答案"]
# 获取词条链接和标准答案
wiki_url = self.find_wiki_link(query)
standard_answer = ""
answer_title = ""
try:
if wiki_url and not pd.isna(wiki_url):
standard_answer = self.get_wiki_content(wiki_url)
answer_title = self.get_wiki_title(wiki_url)
except Exception as e:
print(f"处理问题 '{query}' 获取标准答案时发生错误: {str(e)}")
# 判断答案正确性
if standard_answer:
judge_result = self.judge_by_standard_answer(standard_answer, old_answer, new_answer)
else:
judge_result = self.judge_answer_diff(old_answer, new_answer)
if judge_result is None:
judge_result = ""
# 获取检索内容评分
retrieve_content = []
max_score = 0
min_score = 0
avg_score = 0
rewrite_query = ""
try:
message_info = DifyTool.get_message_debug_info(appid=self.dify_appid, query=query)
for workflow_node in message_info["workflow_node_executions_info"]:
if workflow_node["title"] == "知识检索结果后处理":
outputs = json.loads(workflow_node["outputs"])
retrieve_content, max_score, min_score, avg_score = self.get_retrieve_info(query=query, outputs=outputs)
elif workflow_node["title"] == "问题优化结果解析":
outputs = json.loads(workflow_node["outputs"])
rewrite_query = outputs["optimize_query"]
except Exception as e:
print(f"处理问题 '{query}' 获取检索内容时发生错误: {str(e)}")
# 返回结果
return {
"问题": query,
"问题改写": rewrite_query,
"旧流程答案": old_answer,
"新流程答案": new_answer,
"回答判断": judge_result,
"答案词条": answer_title if answer_title else "",
"检索得到词条": "\n".join(retrieve_content) if retrieve_content else "未检索知识库",
}
def process(self):
"""
多线程处理所有问题并进行综合评判
读取Excel文件中的问题和答案,使用多线程进行评判,并将结果保存到输出Excel文件
"""
# 创建结果列表
results = []
# 创建进度条
with tqdm(total=len(self.answer_excel), desc="处理问题中") as pbar:
# 使用线程池执行任务
with concurrent.futures.ThreadPoolExecutor(max_workers=self.max_workers) as executor:
# 提交所有任务
future_to_row = {executor.submit(self.process_single_question, row): idx
for idx, row in self.answer_excel.iterrows()}
# 处理完成的任务
for future in concurrent.futures.as_completed(future_to_row):
idx = future_to_row[future]
try:
result = future.result()
with self.results_lock:
results.append(result)
except Exception as e:
print(f"处理第 {idx} 行时发生错误: {str(e)}")
finally:
pbar.update(1)
# 将结果转换为DataFrame并保存
results_df = pd.DataFrame(results)
results_df.to_excel(self.output_path, index=False)
print(f"处理完成,共处理 {len(results)} 条记录,结果已保存至 {self.output_path}")
# 测试函数
if __name__ == "__main__":
# 创建综合评判工具实例
judge = CombinedJudge(max_workers=30)
# 执行处理
judge.process()