更新对话到工单的处理逻辑，添加API密钥参数，优化响应内容清理，修复多个解析函数中的API调用，调整并发处理的最大线程数。

优化DifyCompareTest和ModelTool中的API调用逻辑，增加重试机制以提高稳定性，更新模型名称获取方式为使用环境变量。
2025-07-21 10:11:34 +08:00 · 2025-07-21 09:44:42 +08:00
3 changed files with 138 additions and 108 deletions
@@ -135,15 +135,23 @@ class DialogueToWorkorder:
        self.product_line_parser = PydanticOutputParser(pydantic_object=ProductLine)
        
        # 初始化LLM模型
+        # self.llm_params = llm_params or {
+        #     "temperature": 0.2,
+        #     "top_p":0.95,
+        #     "model": "deepseek-ai/DeepSeek-R1",
+        #     "api_key": os.getenv("OPENAI_API_KEY"),
+        #     "base_url": os.getenv("OPENAI_API_BASE"),
+        #     "timeout": httpx.Timeout(600.0)
+        # }
+        self.api_key = "25t%Syu6I9yxX2IuTN"
        self.llm_params = llm_params or {
            "temperature": 0.2,
            "top_p":0.95,
-            "model": "deepseek-ai/DeepSeek-R1",
-            "api_key": os.getenv("OPENAI_API_KEY"),
-            "base_url": os.getenv("OPENAI_API_BASE"),
+            "model": "deepseek-r1",
+            "api_key": "25t%Syu6I9yxX2IuTN",
+            "base_url": "http://10.1.0.154:8000/v1",
            "timeout": httpx.Timeout(600.0)
        }
-        
        self.llm = self._get_llm_instance()
    
    def _get_llm_instance(self):
@@ -266,7 +274,7 @@ class DialogueToWorkorder:
 2、根据用户提出的问题，分析坐席提供的解决方法(比如：1、引导用户xxxx。2、告诉用户xxxxx)。以坐席的角度直接总结完整的解决方案或应对措施(不要出现"坐席"、"我"等字样)
 3、提炼访客独立的核心问题(以访客的角度总结核心问题)，核心问题衍生、细化后的请求合并到对应的核心问题中。不要单独列出衍生、细化后的请求。

-4、使用json格式输出(多个用户问题采用标准json数组格式输出)：
+4、使用json格式输出(多个用户问题采用标准json格式输出)：
 {output_format}

 输出示例：
@@ -283,15 +291,15 @@ class DialogueToWorkorder:
        output_format = self.user_question_and_solution_parser.get_format_instructions()
        llm_prompt = prompt.format(output_format=output_format, dialogue_str=dialogue_str)
        
-        response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False)
-        
+        response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False, api_key=self.api_key)
+        clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
        try:
-            if response.content.count('user_question') == 1:
-                user_question_and_solution = self.user_question_and_solution_parser.parse(response.content)
+            if clean_output.count('user_question') == 1:
+                user_question_and_solution = self.user_question_and_solution_parser.parse(clean_output)
                return [user_question_and_solution]
            else:
                array_pattern = r'\[\s*(\{[\s\S]*?\}(?:\s*,\s*\{[\s\S]*?\})*)\s*\]'
-                array_match = re.search(array_pattern, response.content)
+                array_match = re.search(array_pattern, clean_output)
                if array_match:
                    # 找到了JSON数组
                    json_array_str = '[' + array_match.group(1) + ']'
@@ -313,8 +321,9 @@ class DialogueToWorkorder:
        except Exception as e:
            output_format = self.user_question_and_solution_list_parser.get_format_instructions()
            llm_prompt = prompt.format(output_format=output_format, dialogue_str=dialogue_str)
-            response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False)
-            user_question_and_solution_temp = self.user_question_and_solution_list_parser.parse(response.content)
+            response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False, api_key=self.api_key)
+            clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
+            user_question_and_solution_temp = self.user_question_and_solution_list_parser.parse(clean_output)
            return user_question_and_solution_temp.user_question_list

        return [user_question_and_solution]
@@ -345,8 +354,9 @@ class DialogueToWorkorder:
 {dialogue_str}
        """
        
-        response = self.llm.invoke(user_prompt=prompt, need_retry=False)
-        product_name_and_module_name = self.product_name_and_module_name_parser.parse(response.content)
+        response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
+        clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
+        product_name_and_module_name = self.product_name_and_module_name_parser.parse(clean_output)
        
        return product_name_and_module_name.product_name, product_name_and_module_name.module_name
    
@@ -374,8 +384,9 @@ class DialogueToWorkorder:
 {dialogue_str}
        """
        
-        response = self.llm.invoke(user_prompt=prompt, need_retry=False)
-        product_line = self.product_line_parser.parse(response.content)
+        response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
+        clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
+        product_line = self.product_line_parser.parse(clean_output)
        
        return product_line.product_line
    
@@ -410,8 +421,9 @@ class DialogueToWorkorder:
 {dialogue_str}
        """
        
-        response = self.llm.invoke(user_prompt=prompt, need_retry=False)
-        question_type = self.question_type_parser.parse(response.content)
+        response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
+        clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
+        question_type = self.question_type_parser.parse(clean_output)
        
        return question_type.question_type
    
@@ -446,8 +458,9 @@ class DialogueToWorkorder:

        """
        
-        response = self.llm.invoke(user_prompt=prompt, need_retry=False)
-        is_complaint = self.is_complaint_parser.parse(response.content)
+        response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
+        clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
+        is_complaint = self.is_complaint_parser.parse(clean_output)
        
        return (is_complaint.is_dissatisfaction, 
                is_complaint.dissatisfaction_level, 
@@ -458,81 +471,85 @@ class DialogueToWorkorder:
        """处理单个会话的函数，用于多线程并发"""
        # if conversation_id!="b157aa91-3acb-11f0-a191-4fb224ef4b40":
        #     return []
-        # 获取工单基本信息
-        base_workorder_dict = self.get_workorder_dict(conversation_rows)
-        # 分析用户问题和解决方案
-        user_question_list = self.get_user_question_and_solution(conversation_rows)
+        try:
+            # 获取工单基本信息
+            base_workorder_dict = self.get_workorder_dict(conversation_rows)
+            # 分析用户问题和解决方案
+            user_question_list = self.get_user_question_and_solution(conversation_rows)

-        user_question_str=""
-        for user_question in user_question_list:
-            user_question_str = user_question_str + user_question.user_question.strip() + "\n"
-        user_question_str = user_question_str.strip()
+            user_question_str=""
+            for user_question in user_question_list:
+                user_question_str = user_question_str + user_question.user_question.strip() + "\n"
+            user_question_str = user_question_str.strip()

-        solution_str=""
-        for user_question in user_question_list:
-            solution_str = solution_str + user_question.solution.strip() + "\n"
-        solution_str = solution_str.strip()
+            solution_str=""
+            for user_question in user_question_list:
+                solution_str = solution_str + user_question.solution.strip() + "\n"
+            solution_str = solution_str.strip()

-        # 分析是否抱怨、是否投诉、抱怨级别
-        is_dissatisfaction, dissatisfaction_level, dissatisfaction_reasoning, is_complaint = (
-            self.get_is_complaint_and_is_complaint_level(conversation_rows))
+            # 分析是否抱怨、是否投诉、抱怨级别
+            is_dissatisfaction, dissatisfaction_level, dissatisfaction_reasoning, is_complaint = (
+                self.get_is_complaint_and_is_complaint_level(conversation_rows))

-        # 分析问题类型
-        problem_type = self.get_problem_type(conversation_rows, user_question_str, solution_str)
+            # 分析问题类型
+            problem_type = self.get_problem_type(conversation_rows, user_question_str, solution_str)
            
-        # 分析产品线
-        product_line = self.get_product_line(conversation_rows, product_detail_dict, user_question_str, solution_str)
-        # 分析产品名称和模块名称
-        if product_line != '':
-            product_name, module_name = self.get_product_name_and_module_name(
-                product_line, conversation_rows, product_detail_dict, user_question_str, solution_str)
-        else:
-            product_name = ''
-            module_name = ''
+            # 分析产品线
+            product_line = self.get_product_line(conversation_rows, product_detail_dict, user_question_str, solution_str)
+            # 分析产品名称和模块名称
+            if product_line != '':
+                product_name, module_name = self.get_product_name_and_module_name(
+                    product_line, conversation_rows, product_detail_dict, user_question_str, solution_str)
+            else:
+                product_name = ''
+                module_name = ''
            
-        # 创建工单列表
-        workorder_list = []
+            # 创建工单列表
+            workorder_list = []
            

-        # 更新工单字典
-        # base_workorder_dict.update({
-        #         "产品线": product_line,
-        #         "产品名称": product_name,
-        #         "模块名称": module_name,
-        #         "客户问题": user_question_str,
-        #         "问题类型": problem_type,
-        #         "是否抱怨": "是" if is_dissatisfaction else '否',
-        #         "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
-        #         "抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
-        #         "是否投诉": "是" if is_complaint else '否',
-        #         "解决方案": solution_str
-        #     })
-        # workorder_list.append(base_workorder_dict)
-        for user_question in user_question_list:
-            user_question_str = user_question.user_question
-            solution_str = user_question.solution
-            
-            # 创建新的工单字典，复制基本信息
-            workorder_dict = base_workorder_dict.copy()
-
            # 更新工单字典
-            workorder_dict.update({
-                "产品线": product_line,
-                "产品名称": product_name,
-                "模块名称": module_name,
-                "客户问题": user_question_str,
-                "问题类型": problem_type,
-                "是否抱怨": "是" if is_dissatisfaction else '否',
-                "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
-                "抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
-                "是否投诉": "是" if is_complaint else '否',
-                "解决方案": solution_str
-            })
+            # base_workorder_dict.update({
+            #         "产品线": product_line,
+            #         "产品名称": product_name,
+            #         "模块名称": module_name,
+            #         "客户问题": user_question_str,
+            #         "问题类型": problem_type,
+            #         "是否抱怨": "是" if is_dissatisfaction else '否',
+            #         "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
+            #         "抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
+            #         "是否投诉": "是" if is_complaint else '否',
+            #         "解决方案": solution_str
+            #     })
+            # workorder_list.append(base_workorder_dict)
+            for user_question in user_question_list:
+                user_question_str = user_question.user_question
+                solution_str = user_question.solution
                
-            # 将工单添加到列表中
-            workorder_list.append(workorder_dict)
+                # 创建新的工单字典，复制基本信息
+                workorder_dict = base_workorder_dict.copy()

-        return workorder_list
+                # 更新工单字典
+                workorder_dict.update({
+                    "产品线": product_line,
+                    "产品名称": product_name,
+                    "模块名称": module_name,
+                    "客户问题": user_question_str,
+                    "问题类型": problem_type,
+                    "是否抱怨": "是" if is_dissatisfaction else '否',
+                    "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
+                    "抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
+                    "是否投诉": "是" if is_complaint else '否',
+                    "解决方案": solution_str
+                })
+                
+                # 将工单添加到列表中
+                workorder_list.append(workorder_dict)
+            
+            return workorder_list
+        except Exception as e:
+            logger.error(f"处理会话ID: {conversation_id} 时发生错误: {e}")
+            return []
    
    def analyze_conversation_data(self, conversation_excel_path, product_detail_excel_path, max_workers=10, start_date=None, end_date=None):
        """分析会话数据主流程，使用多线程并发处理"""
@@ -588,9 +605,10 @@ class DialogueToWorkorder:
                conversation_id = future_to_conversation[future]
                try:
                    result_workorders = future.result()
-                    # 将每个会话的所有工单添加到总列表中
-                    workorder_dict_list.extend(result_workorders)
-                    logger.info(f"完成处理会话ID: {conversation_id}，生成工单数量: {len(result_workorders)}")
+                    if result_workorders:
+                        # 将每个会话的所有工单添加到总列表中
+                        workorder_dict_list.extend(result_workorders)
+                        logger.info(f"完成处理会话ID: {conversation_id}，生成工单数量: {len(result_workorders)}")
                except Exception as exc:
                    logger.error(f"处理会话ID: {conversation_id} 时发生错误: {exc}")
        
@@ -678,7 +696,7 @@ def parse_arguments():
                        help='会话内容Excel文件路径')
    parser.add_argument('--product_detail_file', type=str, required=False,
                        help='产品详情Excel文件路径')
-    parser.add_argument('--max_workers', type=int, default=16,
+    parser.add_argument('--max_workers', type=int, default=6,
                        help='并发处理线程数，默认为16')
    parser.add_argument('--start_date', type=str, required=False,default="2025-06-10 16:08:00",
                        help='开始日期，格式为YYYY-MM-DD')
@@ -46,7 +46,7 @@ class DifyCompareTest:
        self.first_wiki_client = ChatClient(api_key="app-gocvuqduBnJptYNPpnW9V9R6", base_url=os.getenv("DIFY_BSAE_URL"))
        # 词条与工单同时检索
        self.both_wiki_worker_client = ChatClient(api_key="app-CPoOMaGDsLRPAe9TW7Xjhszy", base_url=os.getenv("DIFY_BSAE_URL"))
-        self.llm = OpenAiLLM(base_url=os.getenv("OPENAI_API_BASE"), model="deepseek-ai/DeepSeek-R1")
+        self.llm = OpenAiLLM(base_url=os.getenv("OPENAI_API_BASE"), model=os.getenv("MODEL_NAME"))

    def llm_judge_answer(self, old_answer: str, now_answer: str):
        user_prompt = f"""
@@ -76,7 +76,6 @@ class DifyCompareTest:
                response.content = response.content.strip()
                clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
                result = JsonOutputParser().parse(clean_output)
-                result = json.loads(clean_output)
                return "回答基本相同" if result.get("is_same", False) else "回答基本不相同"
            except Exception as e:
                retry_count += 1
@@ -91,17 +90,28 @@ class DifyCompareTest:
        
    def process_workflow(self, workflow_name, client, inputs, query, old_answer):
        """处理单个工作流调用"""
-        try:
-            response = client.create_chat_message(
-                inputs=inputs, query=query, user="AutoCodeRun", response_mode="blocking"
-            )
-            result = response.json()
-            answer = result.get('answer', "")
-            judge_result = self.llm_judge_answer(old_answer=old_answer, now_answer=answer)
-            return answer, judge_result
-        except Exception as e:
-            logging.error(f"{workflow_name}调用失败: {e}")
-            return '', ''
+        max_retries = 3
+        retry_count = 0
+        
+        while retry_count < max_retries:
+            try:
+                response = client.create_chat_message(
+                    inputs=inputs, query=query, user="AutoCodeRun", response_mode="blocking"
+                )
+                result = response.json()
+                answer = result.get('answer', "")
+                if len(answer) == 0:
+                    raise Exception(f"回答为空: {result}")
+                judge_result = self.llm_judge_answer(old_answer=old_answer, now_answer=answer)
+                return answer, judge_result
+            except Exception as e:
+                retry_count += 1
+                if retry_count >= max_retries:
+                    logging.error(f"{workflow_name}调用失败 (尝试 {max_retries} 次后): {e}")
+                    return '', ''
+                else:
+                    import time
+                    time.sleep(1)  # 等待1秒后重试

    def process_single_row(self, index, row):
        """处理单行数据的方法，用于多线程执行"""
@@ -247,7 +257,7 @@ if __name__ == "__main__":
        
        # 处理第一个文件
        excel_files = [
-            ("data/excel/5月.xlsx", "data/excel/5月问答对比.xlsx"),
+            # ("data/excel/5月.xlsx", "data/excel/5月问答对比.xlsx"),
            ("data/excel/其他月.xlsx", "data/excel/其他月问答对比.xlsx")
        ]
        
@@ -240,7 +240,7 @@ class OpenAiLLM:

        self._kwargs = kwargs

-    def invoke(self, user_prompt="你是谁？", need_retry=True,**extra_kwargs):
+    def invoke(self, user_prompt="你是谁？", need_retry=True, api_key:str = None, **extra_kwargs):
        # 初始化 OpenAI 客户端

        max_retries = 3
@@ -253,10 +253,13 @@ class OpenAiLLM:
            timeout = httpx.Timeout(300.0) 
            self._kwargs["timeout"] = timeout

+        if api_key is None:
+            api_key = APIKeyManager.get_api_key()  
+            
        if need_retry:
            while retry_count < max_retries:
                try:
-                    api_key = APIKeyManager.get_api_key()  
+                    
                    # 使用with语句创建客户端，确保资源会被正确释放
                    with OpenAI(api_key=api_key, base_url=self._url) as client:
                        # 创建 Completion 请求. 超时120s
@@ -276,7 +279,6 @@ class OpenAiLLM:
        else:
            try:
                # 创建 Completion 请求. 超时120s
-                api_key = APIKeyManager.get_api_key()  
                # 使用with语句创建客户端，确保资源会被正确释放
                with OpenAI(api_key=api_key, base_url=self._url) as client:
                    completion = client.chat.completions.create(
Author	SHA1	Message	Date
ouyangyouzhang	17ff99bad1	更新对话到工单的处理逻辑，添加API密钥参数，优化响应内容清理，修复多个解析函数中的API调用，调整并发处理的最大线程数。	2025-07-21 10:11:34 +08:00
ouyangyouzhang	0dda581c8e	优化DifyCompareTest和ModelTool中的API调用逻辑，增加重试机制以提高稳定性，更新模型名称获取方式为使用环境变量。	2025-07-21 09:44:42 +08:00