更新对话转工单处理逻辑，增强用户问题和解决方案提取功能，添加槽位填充支持，调整最大工作线程数为10，优化意图识别API，重排序匹配术语，改进数据模型以支持软件名称枚举，提升代码结构和可读性。

2025-06-03 10:35:25 +08:00
parent d4ff7b6fad
commit 38b6f66925
8 changed files with 160 additions and 92 deletions
@@ -207,10 +207,10 @@ class DialogueToWorkorder:
        """分析用户问题和解决方案"""
        dialogue_str = self.get_dialogue_str(conversation_rows)
        
-        prompt = """请从以下电力造价相关的客服对话记录中，识别并精准提取用户提出的问题及对应坐席提供的解决方案。
-1、理解对话记录，识别用户在此次对话中提出的诉求
-2、根据用户提出的诉求，分析坐席提供的解决方法
-3、使用json格式输出：
+        prompt = """请从以下电力造价相关的客服对话记录中，识别并总结用户提出的问题及对应坐席提供的解决方案。(注意指代消除)
+1、理解对话记录，总结用户在此次对话中提出的核心诉求(希望解决的问题)。以用户的角度总结。
+2、根据用户提出的诉求，分析坐席提供的解决方法(比如：1、引导用户xxxx。2、告诉用户xxxxx)。以坐席的角度直接总结解决方案(不要出现"坐席"、"我"等字样)
+3、使用json格式输出(多个用户诉求采用数组格式输出)：
 {output_format}

 输出示例：
@@ -235,6 +235,7 @@ class DialogueToWorkorder:
                user_question_and_solution = self.user_question_and_solution_parser.parse(response.content)
                return [user_question_and_solution]
            else:
+                
                raise Exception("解析失败")
        except Exception as e:
            output_format = self.user_question_and_solution_list_parser.get_format_instructions()
@@ -349,14 +350,13 @@ class DialogueToWorkorder:
        prompt = f"""
 请根据以下对话记录分析访客情绪是否对博微软件或者坐席服务存在明显抱怨，并按照以下结构输出JSON格式分析结果：

-1. 抱怨识别：判断访客是否对博微软件功能或者坐席服务存在**明显抱怨语气或词语**
+1. 抱怨识别：判断访客是否对博微软件功能或者坐席服务存在抱怨或不满
 2. 抱怨分级（如存在抱怨）：
-   - 一般抱怨：明确提出对博微软件功能或者坐席服务存在不满
-   - 中等抱怨：明确提出对博微软件功能或者坐席服务存在不满，语气较为强烈
-   - 严重抱怨：对博微软件功能或者坐席服务使用激烈言辞或威胁性语言
+   - 一般抱怨：明确表达出对博微软件功能或者坐席服务存在不满
+   - 中等抱怨：明确表达出对博微软件功能或者坐席服务存在不满，语气较为强烈
+   - 严重抱怨：对博微软件功能或者坐席服务使用激烈言辞
 3. 投诉倾向：是否明确/暗示将进行投诉
 4. 抱怨对象：坐席服务态度/业务能力 或 博微功能问题(注意忽略对非博微软件或坐席的抱怨)
-5. 内容摘录：标注具体抱怨语句

 示例输出：
 {{
@@ -372,11 +372,6 @@ class DialogueToWorkorder:
 当前对话记录：
 {dialogue_str}

-附加分析要求：
-1. 区分客观问题描述与主观情绪表达
-2. 注意抱怨升级趋势（如从一般抱怨发展为严重抗议）
-3. 关注非文本线索（如有记录可分析语气词、停顿等副语言特征）
-4. 标注涉及多个抱怨对象的情况
        """
        
        response = self.llm.invoke(user_prompt=prompt)
@@ -390,30 +385,46 @@ class DialogueToWorkorder:
    def process_conversation(self, conversation_id, conversation_rows, product_detail_dict):
        """处理单个会话的函数，用于多线程并发"""
        # 获取工单基本信息
-        workorder_dict = self.get_workorder_dict(conversation_rows)
+        base_workorder_dict = self.get_workorder_dict(conversation_rows)
        # 分析用户问题和解决方案
        user_question_list = self.get_user_question_and_solution(conversation_rows)

+        # 获取第一个问题和解决方案，用于后续分析
+        if user_question_list and len(user_question_list) > 0:
+            first_question = user_question_list[0]
+            user_question_str = first_question.user_question
+            solution_str = first_question.solution
+        else:
+            user_question_str = ""
+            solution_str = ""
+
        # 分析是否抱怨、是否投诉、抱怨级别
        is_dissatisfaction, dissatisfaction_level, dissatisfaction_reasoning, is_complaint = (
            self.get_is_complaint_and_is_complaint_level(conversation_rows))

+        # 分析问题类型
+        problem_type = self.get_problem_type(conversation_rows, user_question_str, solution_str)
+        
+        # 分析产品线
+        product_line = self.get_product_line(conversation_rows, product_detail_dict, user_question_str, solution_str)
+        # 分析产品名称和模块名称
+        if product_line != '':
+            product_name, module_name = self.get_product_name_and_module_name(
+                product_line, conversation_rows, product_detail_dict, user_question_str, solution_str)
+        else:
+            product_name = ''
+            module_name = ''
+        
+        # 创建工单列表
+        workorder_list = []
+        
        for user_question in user_question_list:
            user_question_str = user_question.user_question
            solution_str = user_question.solution
-            # 分析问题类型
-            problem_type = self.get_problem_type(conversation_rows, user_question_str, solution_str)
            
-            # 分析产品线
-            product_line = self.get_product_line(conversation_rows, product_detail_dict, user_question_str, solution_str)
-            # 分析产品名称和模块名称
-            if product_line != '':
-                product_name, module_name = self.get_product_name_and_module_name(
-                    product_line, conversation_rows, product_detail_dict, user_question_str, solution_str)
-            else:
-                product_name = ''
-                module_name = ''
-        
+            # 创建新的工单字典，复制基本信息
+            workorder_dict = base_workorder_dict.copy()
+
            # 更新工单字典
            workorder_dict.update({
                "产品线": product_line,
@@ -426,10 +437,13 @@ class DialogueToWorkorder:
                "是否投诉": "是" if is_complaint else '否',
                "解决方案": (solution_str + '\n存在抱怨:' + dissatisfaction_reasoning) if is_dissatisfaction else solution_str
            })
+            
+            # 将工单添加到列表中
+            workorder_list.append(workorder_dict)
        
-        return workorder_dict
+        return workorder_list
    
-    def analyze_conversation_data(self, conversation_excel_path, product_detail_excel_path, max_workers=4):
+    def analyze_conversation_data(self, conversation_excel_path, product_detail_excel_path, max_workers=10):
        """分析会话数据主流程，使用多线程并发处理"""
        # 读取Excel文件
        df = pd.read_excel(conversation_excel_path)
@@ -457,9 +471,10 @@ class DialogueToWorkorder:
            for future in concurrent.futures.as_completed(future_to_conversation):
                conversation_id = future_to_conversation[future]
                try:
-                    workorder_dict = future.result()
-                    workorder_dict_list.append(workorder_dict)
-                    print(f"完成处理会话ID: {conversation_id}")
+                    result_workorders = future.result()
+                    # 将每个会话的所有工单添加到总列表中
+                    workorder_dict_list.extend(result_workorders)
+                    print(f"完成处理会话ID: {conversation_id}，生成工单数量: {len(result_workorders)}")
                except Exception as exc:
                    print(f"处理会话ID: {conversation_id} 时发生错误: {exc}")
        
@@ -121,7 +121,7 @@ def process_query(recognizer, query):
                time.sleep(10 * retry_count)

 # 示例查询
-examples_query = """这个安全文明费费率在哪里调"""
+examples_query = """储能软件组合件界面，点击隐藏空项目划分后界面没有任何变化"""

 def main():
    """
@@ -138,7 +138,7 @@ def main():
    
    # 读取提问数据
    current_dir = os.path.dirname(os.path.abspath(__file__))
-    data_file = os.path.join(current_dir, "..", "..", "data", "excel", "测试提问数据.xlsx")
+    data_file = os.path.join(current_dir, "..", "..", "data", "excel", "400条提问意图分类数据-原始.xlsx")
    
    # 检测是否为调试模式，调试模式下使用examples_query，否则从Excel读取

@@ -150,7 +150,7 @@ def main():
    
    if not is_debug:

-        max_workers = 5  # 减少并发数以避免API限制
+        max_workers = 10  # 减少并发数以避免API限制
        logging.info(f"共有 {len(examples)} 个问题需要处理，使用 {max_workers} 个并发线程")
        # 创建一个与输入顺序相同的结果列表
        results = [None] * len(examples)