优化对话转工单处理逻辑,调整LLM参数,增强用户问题和解决方案的提取功能,添加槽位填充支持,提升代码结构和可读性。

This commit is contained in:
2025-05-30 11:10:24 +08:00
parent 05caedc4fa
commit d4ff7b6fad
6 changed files with 469 additions and 123 deletions
+92 -62
View File
@@ -15,6 +15,7 @@ import json
import concurrent.futures
from tqdm import tqdm
import time
import sys
# 加载环境变量
load_dotenv()
@@ -43,7 +44,7 @@ def load_questions_from_excel(file_path=None):
def process_query(recognizer, query):
"""
处理单个查询,支持重试机制
处理单个查询,支持重试机制,并包含槽位填充
Args:
recognizer: 意图识别器实例
@@ -57,32 +58,48 @@ def process_query(recognizer, query):
while retry_count <= max_retries:
try:
# 如果是重试,添加重试信息到日志
classification, keywords, rewrite, query_keys = recognizer.process_query(query)
# 使用新的process_query_with_slots方法处理查询
result = recognizer.process_query_with_slots(query)
# 将keywords对象转换为字符串
# 提取分类信息
classification = result["classification"]
# 提取关键词信息
keywords = result["keywords"]
keywords_str = ""
if keywords and keywords.terms:
if keywords and keywords.get("terms"):
term_details = []
for term in keywords.terms:
for term in keywords["terms"]:
term_info = {
"名称": term.name,
"同义词": ";".join(term.synonymous) if term.synonymous else "",
"描述": term.description
"名称": term["name"],
"同义词": ";".join(term["synonymous"]) if term["synonymous"] else "",
"描述": term["description"]
}
term_details.append(term_info)
# 将term_details转换为JSON字符串,确保中文正确显示
keywords_str = json.dumps(term_details, ensure_ascii=False, indent=2)
# 提取槽位填充信息
slot_filling = result.get("slot_filling", {})
slot_filling_str = ""
if slot_filling and "filled_data" in slot_filling:
# 格式化槽位填充结果
slot_filling_str = json.dumps({
"是否完整": slot_filling.get("is_complete", False),
"缺失槽位": slot_filling.get("missing_slots", {}),
"填充数据": slot_filling.get("filled_data", {})
}, ensure_ascii=False, indent=2)
# 处理成功,返回结果
return {
"提问": query,
"问题拆解": query_keys,
"一级分类": classification.vertical_classification,
"二级分类": classification.sub_classification,
"问题改写": rewrite.rewrite,
"检索的关键词": keywords_str
"问题拆解": result["query_keys"],
"一级分类": classification["vertical_classification"],
"二级分类": classification["sub_classification"],
"问题改写": result["rewrite"]["rewrite"],
"检索的关键词": keywords_str,
"槽位填充": slot_filling_str
}
except Exception as e:
@@ -96,13 +113,15 @@ def process_query(recognizer, query):
"一级分类": "处理出错",
"二级分类": "处理出错",
"问题改写": "处理出错",
"检索的关键词": f"重试 {max_retries} 次后失败: {str(e)}"
"检索的关键词": f"重试 {max_retries} 次后失败: {str(e)}",
"槽位填充": "处理出错"
}
else:
# 可以在这里添加延迟,避免过快重试
time.sleep(10 * retry_count)
examples_query = """下载软件在哪下载?"""
# 示例查询
examples_query = """这个安全文明费费率在哪里调"""
def main():
"""
@@ -119,56 +138,67 @@ def main():
# 读取提问数据
current_dir = os.path.dirname(os.path.abspath(__file__))
data_file = os.path.join(current_dir, "..", "..", "data", "excel", "200条提问数据.xlsx")
examples = load_questions_from_excel(data_file)
# examples = examples_query.split("\n")
max_workers = 20
logging.info(f"共有 {len(examples)} 个问题需要处理,使用 {max_workers} 个并发线程")
data_file = os.path.join(current_dir, "..", "..", "data", "excel", "测试提问数据.xlsx")
# 创建一个与输入顺序相同的结果列表
results = [None] * len(examples)
# 检测是否为调试模式,调试模式下使用examples_query,否则从Excel读取
is_debug = hasattr(sys, 'gettrace') and sys.gettrace() is not None
if is_debug:
examples = examples_query.strip().split("\n")
else:
examples = load_questions_from_excel(data_file)
# 使用线程池进行并发处理
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
# 提交所有任务并记录它们的索引
future_to_index = {}
if not is_debug:
max_workers = 5 # 减少并发数以避免API限制
logging.info(f"共有 {len(examples)} 个问题需要处理,使用 {max_workers} 个并发线程")
# 创建一个与输入顺序相同的结果列表
results = [None] * len(examples)
# 使用线程池进行并发处理
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
# 提交所有任务并记录它们的索引
future_to_index = {}
for idx, query in enumerate(examples):
future = executor.submit(process_query, recognizer, query)
future_to_index[future] = idx
# 使用tqdm显示进度条
for future in tqdm(concurrent.futures.as_completed(future_to_index), total=len(examples), desc="处理进度"):
idx = future_to_index[future]
result = future.result()
# 将结果放在与输入相同的位置
results[idx] = result
# 将结果保存到Excel文件
results_df = pd.DataFrame(results)
output_file = os.path.join(current_dir, "..", "..", "data", "excel", "测试提问数据_槽位填充结果.xlsx")
# 使用ExcelWriter设置格式
with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
results_df.to_excel(writer, index=False, sheet_name='Sheet1')
# 获取工作簿和工作表对象
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# 设置列宽(单位:像素)
# 定义列宽(厘米转为Excel单位,1cm约等于4.7个Excel单位)
worksheet.set_column('A:A', 60) # 提问列 60个Excel单位
worksheet.set_column('B:B', 20) # 问题拆解 20个Excel单位
worksheet.set_column('C:C', 20) # 一级分类 20个Excel单位
worksheet.set_column('D:D', 20) # 二级分类 20个Excel单位
worksheet.set_column('E:E', 60) # 问题改写 60个Excel单位
worksheet.set_column('F:F', 60) # 检索到的关键词 60个Excel单位
worksheet.set_column('G:G', 80) # 槽位填充 80个Excel单位
# 设置所有行高为20磅
for i in range(len(results_df) + 1): # +1 是为了包括表头
worksheet.set_row(i, 20)
else:
for idx, query in enumerate(examples):
future = executor.submit(process_query, recognizer, query)
future_to_index[future] = idx
process_query(recognizer, query)
# 使用tqdm显示进度条
for future in tqdm(concurrent.futures.as_completed(future_to_index), total=len(examples), desc="处理进度"):
idx = future_to_index[future]
result = future.result()
# 将结果放在与输入相同的位置
results[idx] = result
# 将结果保存到Excel文件
results_df = pd.DataFrame(results)
output_file = os.path.join(current_dir, "..", "..", "data", "excel", "200条提问数据_重写结果.xlsx")
# 使用ExcelWriter设置格式
with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
results_df.to_excel(writer, index=False, sheet_name='Sheet1')
# 获取工作簿和工作表对象
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# 设置列宽(单位:像素)
# 定义列宽(厘米转为Excel单位,1cm约等于4.7个Excel单位)
worksheet.set_column('A:A', 60) # 提问列 60个Excel单位
worksheet.set_column('B:B', 20) # 问题拆解 20个Excel单位
worksheet.set_column('C:C', 20) # 一级分类 20个Excel单位
worksheet.set_column('D:D', 20) # 二级分类 20个Excel单位
worksheet.set_column('E:E', 60) # 问题改写 60个Excel单位
worksheet.set_column('F:F', 60) # 检索到的关键词 60个Excel单位
# 设置所有行高为20磅
for i in range(len(results_df) + 1): # +1 是为了包括表头
worksheet.set_row(i, 20)
logging.info(f"处理完成,结果已保存至: {output_file}")
def setup_logging():