diff --git a/rag2_0/demo/dialogue_to_workorder.py b/rag2_0/demo/dialogue_to_workorder.py index 0cb2fe7..3a7deb1 100755 --- a/rag2_0/demo/dialogue_to_workorder.py +++ b/rag2_0/demo/dialogue_to_workorder.py @@ -135,15 +135,23 @@ class DialogueToWorkorder: self.product_line_parser = PydanticOutputParser(pydantic_object=ProductLine) # 初始化LLM模型 + # self.llm_params = llm_params or { + # "temperature": 0.2, + # "top_p":0.95, + # "model": "deepseek-ai/DeepSeek-R1", + # "api_key": os.getenv("OPENAI_API_KEY"), + # "base_url": os.getenv("OPENAI_API_BASE"), + # "timeout": httpx.Timeout(600.0) + # } + self.api_key = "25t%Syu6I9yxX2IuTN" self.llm_params = llm_params or { "temperature": 0.2, "top_p":0.95, - "model": "deepseek-ai/DeepSeek-R1", - "api_key": os.getenv("OPENAI_API_KEY"), - "base_url": os.getenv("OPENAI_API_BASE"), + "model": "deepseek-r1", + "api_key": "25t%Syu6I9yxX2IuTN", + "base_url": "http://10.1.0.154:8000/v1", "timeout": httpx.Timeout(600.0) } - self.llm = self._get_llm_instance() def _get_llm_instance(self): @@ -266,7 +274,7 @@ class DialogueToWorkorder: 2、根据用户提出的问题,分析坐席提供的解决方法(比如:1、引导用户xxxx。2、告诉用户xxxxx)。以坐席的角度直接总结完整的解决方案或应对措施(不要出现"坐席"、"我"等字样) 3、提炼访客独立的核心问题(以访客的角度总结核心问题),核心问题衍生、细化后的请求合并到对应的核心问题中。不要单独列出衍生、细化后的请求。 -4、使用json格式输出(多个用户问题采用标准json数组格式输出): +4、使用json格式输出(多个用户问题采用标准json格式输出): {output_format} 输出示例: @@ -283,15 +291,15 @@ class DialogueToWorkorder: output_format = self.user_question_and_solution_parser.get_format_instructions() llm_prompt = prompt.format(output_format=output_format, dialogue_str=dialogue_str) - response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False) - + response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False, api_key=self.api_key) + clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL) try: - if response.content.count('user_question') == 1: - user_question_and_solution = self.user_question_and_solution_parser.parse(response.content) + if clean_output.count('user_question') == 1: + user_question_and_solution = self.user_question_and_solution_parser.parse(clean_output) return [user_question_and_solution] else: array_pattern = r'\[\s*(\{[\s\S]*?\}(?:\s*,\s*\{[\s\S]*?\})*)\s*\]' - array_match = re.search(array_pattern, response.content) + array_match = re.search(array_pattern, clean_output) if array_match: # 找到了JSON数组 json_array_str = '[' + array_match.group(1) + ']' @@ -313,8 +321,9 @@ class DialogueToWorkorder: except Exception as e: output_format = self.user_question_and_solution_list_parser.get_format_instructions() llm_prompt = prompt.format(output_format=output_format, dialogue_str=dialogue_str) - response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False) - user_question_and_solution_temp = self.user_question_and_solution_list_parser.parse(response.content) + response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False, api_key=self.api_key) + clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL) + user_question_and_solution_temp = self.user_question_and_solution_list_parser.parse(clean_output) return user_question_and_solution_temp.user_question_list return [user_question_and_solution] @@ -345,8 +354,9 @@ class DialogueToWorkorder: {dialogue_str} """ - response = self.llm.invoke(user_prompt=prompt, need_retry=False) - product_name_and_module_name = self.product_name_and_module_name_parser.parse(response.content) + response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key) + clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL) + product_name_and_module_name = self.product_name_and_module_name_parser.parse(clean_output) return product_name_and_module_name.product_name, product_name_and_module_name.module_name @@ -374,8 +384,9 @@ class DialogueToWorkorder: {dialogue_str} """ - response = self.llm.invoke(user_prompt=prompt, need_retry=False) - product_line = self.product_line_parser.parse(response.content) + response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key) + clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL) + product_line = self.product_line_parser.parse(clean_output) return product_line.product_line @@ -410,8 +421,9 @@ class DialogueToWorkorder: {dialogue_str} """ - response = self.llm.invoke(user_prompt=prompt, need_retry=False) - question_type = self.question_type_parser.parse(response.content) + response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key) + clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL) + question_type = self.question_type_parser.parse(clean_output) return question_type.question_type @@ -446,8 +458,9 @@ class DialogueToWorkorder: """ - response = self.llm.invoke(user_prompt=prompt, need_retry=False) - is_complaint = self.is_complaint_parser.parse(response.content) + response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key) + clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL) + is_complaint = self.is_complaint_parser.parse(clean_output) return (is_complaint.is_dissatisfaction, is_complaint.dissatisfaction_level, @@ -458,81 +471,85 @@ class DialogueToWorkorder: """处理单个会话的函数,用于多线程并发""" # if conversation_id!="b157aa91-3acb-11f0-a191-4fb224ef4b40": # return [] - # 获取工单基本信息 - base_workorder_dict = self.get_workorder_dict(conversation_rows) - # 分析用户问题和解决方案 - user_question_list = self.get_user_question_and_solution(conversation_rows) + try: + # 获取工单基本信息 + base_workorder_dict = self.get_workorder_dict(conversation_rows) + # 分析用户问题和解决方案 + user_question_list = self.get_user_question_and_solution(conversation_rows) - user_question_str="" - for user_question in user_question_list: - user_question_str = user_question_str + user_question.user_question.strip() + "\n" - user_question_str = user_question_str.strip() + user_question_str="" + for user_question in user_question_list: + user_question_str = user_question_str + user_question.user_question.strip() + "\n" + user_question_str = user_question_str.strip() - solution_str="" - for user_question in user_question_list: - solution_str = solution_str + user_question.solution.strip() + "\n" - solution_str = solution_str.strip() + solution_str="" + for user_question in user_question_list: + solution_str = solution_str + user_question.solution.strip() + "\n" + solution_str = solution_str.strip() - # 分析是否抱怨、是否投诉、抱怨级别 - is_dissatisfaction, dissatisfaction_level, dissatisfaction_reasoning, is_complaint = ( - self.get_is_complaint_and_is_complaint_level(conversation_rows)) + # 分析是否抱怨、是否投诉、抱怨级别 + is_dissatisfaction, dissatisfaction_level, dissatisfaction_reasoning, is_complaint = ( + self.get_is_complaint_and_is_complaint_level(conversation_rows)) - # 分析问题类型 - problem_type = self.get_problem_type(conversation_rows, user_question_str, solution_str) - - # 分析产品线 - product_line = self.get_product_line(conversation_rows, product_detail_dict, user_question_str, solution_str) - # 分析产品名称和模块名称 - if product_line != '': - product_name, module_name = self.get_product_name_and_module_name( - product_line, conversation_rows, product_detail_dict, user_question_str, solution_str) - else: - product_name = '' - module_name = '' - - # 创建工单列表 - workorder_list = [] - - - # 更新工单字典 - # base_workorder_dict.update({ - # "产品线": product_line, - # "产品名称": product_name, - # "模块名称": module_name, - # "客户问题": user_question_str, - # "问题类型": problem_type, - # "是否抱怨": "是" if is_dissatisfaction else '否', - # "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '', - # "抱怨级别": dissatisfaction_level if is_dissatisfaction else '', - # "是否投诉": "是" if is_complaint else '否', - # "解决方案": solution_str - # }) - # workorder_list.append(base_workorder_dict) - for user_question in user_question_list: - user_question_str = user_question.user_question - solution_str = user_question.solution + # 分析问题类型 + problem_type = self.get_problem_type(conversation_rows, user_question_str, solution_str) + + # 分析产品线 + product_line = self.get_product_line(conversation_rows, product_detail_dict, user_question_str, solution_str) + # 分析产品名称和模块名称 + if product_line != '': + product_name, module_name = self.get_product_name_and_module_name( + product_line, conversation_rows, product_detail_dict, user_question_str, solution_str) + else: + product_name = '' + module_name = '' + + # 创建工单列表 + workorder_list = [] - # 创建新的工单字典,复制基本信息 - workorder_dict = base_workorder_dict.copy() # 更新工单字典 - workorder_dict.update({ - "产品线": product_line, - "产品名称": product_name, - "模块名称": module_name, - "客户问题": user_question_str, - "问题类型": problem_type, - "是否抱怨": "是" if is_dissatisfaction else '否', - "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '', - "抱怨级别": dissatisfaction_level if is_dissatisfaction else '', - "是否投诉": "是" if is_complaint else '否', - "解决方案": solution_str - }) + # base_workorder_dict.update({ + # "产品线": product_line, + # "产品名称": product_name, + # "模块名称": module_name, + # "客户问题": user_question_str, + # "问题类型": problem_type, + # "是否抱怨": "是" if is_dissatisfaction else '否', + # "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '', + # "抱怨级别": dissatisfaction_level if is_dissatisfaction else '', + # "是否投诉": "是" if is_complaint else '否', + # "解决方案": solution_str + # }) + # workorder_list.append(base_workorder_dict) + for user_question in user_question_list: + user_question_str = user_question.user_question + solution_str = user_question.solution + + # 创建新的工单字典,复制基本信息 + workorder_dict = base_workorder_dict.copy() + + # 更新工单字典 + workorder_dict.update({ + "产品线": product_line, + "产品名称": product_name, + "模块名称": module_name, + "客户问题": user_question_str, + "问题类型": problem_type, + "是否抱怨": "是" if is_dissatisfaction else '否', + "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '', + "抱怨级别": dissatisfaction_level if is_dissatisfaction else '', + "是否投诉": "是" if is_complaint else '否', + "解决方案": solution_str + }) + + # 将工单添加到列表中 + workorder_list.append(workorder_dict) - # 将工单添加到列表中 - workorder_list.append(workorder_dict) - - return workorder_list + return workorder_list + except Exception as e: + logger.error(f"处理会话ID: {conversation_id} 时发生错误: {e}") + return [] def analyze_conversation_data(self, conversation_excel_path, product_detail_excel_path, max_workers=10, start_date=None, end_date=None): """分析会话数据主流程,使用多线程并发处理""" @@ -588,9 +605,10 @@ class DialogueToWorkorder: conversation_id = future_to_conversation[future] try: result_workorders = future.result() - # 将每个会话的所有工单添加到总列表中 - workorder_dict_list.extend(result_workorders) - logger.info(f"完成处理会话ID: {conversation_id},生成工单数量: {len(result_workorders)}") + if result_workorders: + # 将每个会话的所有工单添加到总列表中 + workorder_dict_list.extend(result_workorders) + logger.info(f"完成处理会话ID: {conversation_id},生成工单数量: {len(result_workorders)}") except Exception as exc: logger.error(f"处理会话ID: {conversation_id} 时发生错误: {exc}") @@ -678,7 +696,7 @@ def parse_arguments(): help='会话内容Excel文件路径') parser.add_argument('--product_detail_file', type=str, required=False, help='产品详情Excel文件路径') - parser.add_argument('--max_workers', type=int, default=16, + parser.add_argument('--max_workers', type=int, default=6, help='并发处理线程数,默认为16') parser.add_argument('--start_date', type=str, required=False,default="2025-06-10 16:08:00", help='开始日期,格式为YYYY-MM-DD')