工单提问相同时合并工单内容

This commit is contained in:
2025-08-05 11:14:25 +08:00
parent 142ae00844
commit 1cde82cc86
+31 -10
View File
@@ -128,27 +128,48 @@ class WorkorderToDify:
logging.info(f"技能组 {skill_group}: {len(data)} 条工单") logging.info(f"技能组 {skill_group}: {len(data)} 条工单")
def deduplicate_workorders(self): def deduplicate_workorders(self):
"""对每个技能组内的工单进行去重,保留时间最新的""" """对每个技能组内的工单进行去重,将问题相同的工单内容合并"""
logging.info("开始对工单进行去重处理") logging.info("开始对工单进行去重处理")
for skill_group in self.skill_group_data: for skill_group in self.skill_group_data:
logging.info(f"处理技能组: {skill_group}, 去重前工单数量: {len(self.skill_group_data[skill_group])}") logging.info(f"处理技能组: {skill_group}, 去重前工单数量: {len(self.skill_group_data[skill_group])}")
# 创建一个临时字典,用于存储每个客户问题的最新工单 # 创建一个临时字典,用于存储每个客户问题的合并工单
latest_workorders = {} merged_workorders = {}
for workorder in self.skill_group_data[skill_group]: for workorder in self.skill_group_data[skill_group]:
query = workorder["document_name"] query = workorder["document_name"]
create_time = workorder["create_time"] create_time = workorder["create_time"]
content = workorder["content"]
# 如果该问题尚未在字典中或当前工单的时间比已有的更新 # 如果该问题尚未在字典中,则添加
if query not in latest_workorders or create_time > latest_workorders[query]["datetime"]: if query not in merged_workorders:
latest_workorders[query] = { merged_workorders[query] = {
"workorder": workorder, "workorder": workorder,
"datetime": create_time "datetime": create_time,
"contents": [content]
} }
logging.debug(f"更新工单: {query}, 时间: {create_time}") else:
# 如果问题已存在,添加内容并更新时间(如果当前时间更新)
merged_workorders[query]["contents"].append(content)
if create_time > merged_workorders[query]["datetime"]:
merged_workorders[query]["datetime"] = create_time
# 更新会话ID等其他信息,但保留合并后的内容
merged_workorders[query]["workorder"].update({
"create_time": create_time,
"conversation_id": workorder["conversation_id"]
})
# 用去重后的工单列表替换原列表 # 合并内容并用去重后的工单列表替换原列表
self.skill_group_data[skill_group] = [item["workorder"] for item in latest_workorders.values()] result_workorders = []
for query, data in merged_workorders.items():
workorder = data["workorder"].copy()
# 合并所有内容,使用换行符和分隔符分隔
if len(data["contents"])>1:
breakpoint()
merged_content = "\n\n---\n\n".join(data["contents"])
workorder["content"] = merged_content
result_workorders.append(workorder)
self.skill_group_data[skill_group] = result_workorders
logging.info(f"技能组 {skill_group} 去重完成, 去重后工单数量: {len(self.skill_group_data[skill_group])}") logging.info(f"技能组 {skill_group} 去重完成, 去重后工单数量: {len(self.skill_group_data[skill_group])}")
logging.info("所有技能组工单去重处理完成") logging.info("所有技能组工单去重处理完成")