diff --git a/rag2_0/demo/dialogue_to_workorder.py b/rag2_0/demo/dialogue_to_workorder.py
index 0cb2fe7..3a7deb1 100755
--- a/rag2_0/demo/dialogue_to_workorder.py
+++ b/rag2_0/demo/dialogue_to_workorder.py
@@ -135,15 +135,23 @@ class DialogueToWorkorder:
self.product_line_parser = PydanticOutputParser(pydantic_object=ProductLine)
# 初始化LLM模型
+ # self.llm_params = llm_params or {
+ # "temperature": 0.2,
+ # "top_p":0.95,
+ # "model": "deepseek-ai/DeepSeek-R1",
+ # "api_key": os.getenv("OPENAI_API_KEY"),
+ # "base_url": os.getenv("OPENAI_API_BASE"),
+ # "timeout": httpx.Timeout(600.0)
+ # }
+ self.api_key = "25t%Syu6I9yxX2IuTN"
self.llm_params = llm_params or {
"temperature": 0.2,
"top_p":0.95,
- "model": "deepseek-ai/DeepSeek-R1",
- "api_key": os.getenv("OPENAI_API_KEY"),
- "base_url": os.getenv("OPENAI_API_BASE"),
+ "model": "deepseek-r1",
+ "api_key": "25t%Syu6I9yxX2IuTN",
+ "base_url": "http://10.1.0.154:8000/v1",
"timeout": httpx.Timeout(600.0)
}
-
self.llm = self._get_llm_instance()
def _get_llm_instance(self):
@@ -266,7 +274,7 @@ class DialogueToWorkorder:
2、根据用户提出的问题,分析坐席提供的解决方法(比如:1、引导用户xxxx。2、告诉用户xxxxx)。以坐席的角度直接总结完整的解决方案或应对措施(不要出现"坐席"、"我"等字样)
3、提炼访客独立的核心问题(以访客的角度总结核心问题),核心问题衍生、细化后的请求合并到对应的核心问题中。不要单独列出衍生、细化后的请求。
-4、使用json格式输出(多个用户问题采用标准json数组格式输出):
+4、使用json格式输出(多个用户问题采用标准json格式输出):
{output_format}
输出示例:
@@ -283,15 +291,15 @@ class DialogueToWorkorder:
output_format = self.user_question_and_solution_parser.get_format_instructions()
llm_prompt = prompt.format(output_format=output_format, dialogue_str=dialogue_str)
- response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False)
-
+ response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False, api_key=self.api_key)
+ clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL)
try:
- if response.content.count('user_question') == 1:
- user_question_and_solution = self.user_question_and_solution_parser.parse(response.content)
+ if clean_output.count('user_question') == 1:
+ user_question_and_solution = self.user_question_and_solution_parser.parse(clean_output)
return [user_question_and_solution]
else:
array_pattern = r'\[\s*(\{[\s\S]*?\}(?:\s*,\s*\{[\s\S]*?\})*)\s*\]'
- array_match = re.search(array_pattern, response.content)
+ array_match = re.search(array_pattern, clean_output)
if array_match:
# 找到了JSON数组
json_array_str = '[' + array_match.group(1) + ']'
@@ -313,8 +321,9 @@ class DialogueToWorkorder:
except Exception as e:
output_format = self.user_question_and_solution_list_parser.get_format_instructions()
llm_prompt = prompt.format(output_format=output_format, dialogue_str=dialogue_str)
- response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False)
- user_question_and_solution_temp = self.user_question_and_solution_list_parser.parse(response.content)
+ response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False, api_key=self.api_key)
+ clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL)
+ user_question_and_solution_temp = self.user_question_and_solution_list_parser.parse(clean_output)
return user_question_and_solution_temp.user_question_list
return [user_question_and_solution]
@@ -345,8 +354,9 @@ class DialogueToWorkorder:
{dialogue_str}
"""
- response = self.llm.invoke(user_prompt=prompt, need_retry=False)
- product_name_and_module_name = self.product_name_and_module_name_parser.parse(response.content)
+ response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
+ clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL)
+ product_name_and_module_name = self.product_name_and_module_name_parser.parse(clean_output)
return product_name_and_module_name.product_name, product_name_and_module_name.module_name
@@ -374,8 +384,9 @@ class DialogueToWorkorder:
{dialogue_str}
"""
- response = self.llm.invoke(user_prompt=prompt, need_retry=False)
- product_line = self.product_line_parser.parse(response.content)
+ response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
+ clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL)
+ product_line = self.product_line_parser.parse(clean_output)
return product_line.product_line
@@ -410,8 +421,9 @@ class DialogueToWorkorder:
{dialogue_str}
"""
- response = self.llm.invoke(user_prompt=prompt, need_retry=False)
- question_type = self.question_type_parser.parse(response.content)
+ response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
+ clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL)
+ question_type = self.question_type_parser.parse(clean_output)
return question_type.question_type
@@ -446,8 +458,9 @@ class DialogueToWorkorder:
"""
- response = self.llm.invoke(user_prompt=prompt, need_retry=False)
- is_complaint = self.is_complaint_parser.parse(response.content)
+ response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
+ clean_output = re.sub(r'.*?', '', response.content, flags=re.DOTALL)
+ is_complaint = self.is_complaint_parser.parse(clean_output)
return (is_complaint.is_dissatisfaction,
is_complaint.dissatisfaction_level,
@@ -458,81 +471,85 @@ class DialogueToWorkorder:
"""处理单个会话的函数,用于多线程并发"""
# if conversation_id!="b157aa91-3acb-11f0-a191-4fb224ef4b40":
# return []
- # 获取工单基本信息
- base_workorder_dict = self.get_workorder_dict(conversation_rows)
- # 分析用户问题和解决方案
- user_question_list = self.get_user_question_and_solution(conversation_rows)
+ try:
+ # 获取工单基本信息
+ base_workorder_dict = self.get_workorder_dict(conversation_rows)
+ # 分析用户问题和解决方案
+ user_question_list = self.get_user_question_and_solution(conversation_rows)
- user_question_str=""
- for user_question in user_question_list:
- user_question_str = user_question_str + user_question.user_question.strip() + "\n"
- user_question_str = user_question_str.strip()
+ user_question_str=""
+ for user_question in user_question_list:
+ user_question_str = user_question_str + user_question.user_question.strip() + "\n"
+ user_question_str = user_question_str.strip()
- solution_str=""
- for user_question in user_question_list:
- solution_str = solution_str + user_question.solution.strip() + "\n"
- solution_str = solution_str.strip()
+ solution_str=""
+ for user_question in user_question_list:
+ solution_str = solution_str + user_question.solution.strip() + "\n"
+ solution_str = solution_str.strip()
- # 分析是否抱怨、是否投诉、抱怨级别
- is_dissatisfaction, dissatisfaction_level, dissatisfaction_reasoning, is_complaint = (
- self.get_is_complaint_and_is_complaint_level(conversation_rows))
+ # 分析是否抱怨、是否投诉、抱怨级别
+ is_dissatisfaction, dissatisfaction_level, dissatisfaction_reasoning, is_complaint = (
+ self.get_is_complaint_and_is_complaint_level(conversation_rows))
- # 分析问题类型
- problem_type = self.get_problem_type(conversation_rows, user_question_str, solution_str)
-
- # 分析产品线
- product_line = self.get_product_line(conversation_rows, product_detail_dict, user_question_str, solution_str)
- # 分析产品名称和模块名称
- if product_line != '':
- product_name, module_name = self.get_product_name_and_module_name(
- product_line, conversation_rows, product_detail_dict, user_question_str, solution_str)
- else:
- product_name = ''
- module_name = ''
-
- # 创建工单列表
- workorder_list = []
-
-
- # 更新工单字典
- # base_workorder_dict.update({
- # "产品线": product_line,
- # "产品名称": product_name,
- # "模块名称": module_name,
- # "客户问题": user_question_str,
- # "问题类型": problem_type,
- # "是否抱怨": "是" if is_dissatisfaction else '否',
- # "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
- # "抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
- # "是否投诉": "是" if is_complaint else '否',
- # "解决方案": solution_str
- # })
- # workorder_list.append(base_workorder_dict)
- for user_question in user_question_list:
- user_question_str = user_question.user_question
- solution_str = user_question.solution
+ # 分析问题类型
+ problem_type = self.get_problem_type(conversation_rows, user_question_str, solution_str)
+
+ # 分析产品线
+ product_line = self.get_product_line(conversation_rows, product_detail_dict, user_question_str, solution_str)
+ # 分析产品名称和模块名称
+ if product_line != '':
+ product_name, module_name = self.get_product_name_and_module_name(
+ product_line, conversation_rows, product_detail_dict, user_question_str, solution_str)
+ else:
+ product_name = ''
+ module_name = ''
+
+ # 创建工单列表
+ workorder_list = []
- # 创建新的工单字典,复制基本信息
- workorder_dict = base_workorder_dict.copy()
# 更新工单字典
- workorder_dict.update({
- "产品线": product_line,
- "产品名称": product_name,
- "模块名称": module_name,
- "客户问题": user_question_str,
- "问题类型": problem_type,
- "是否抱怨": "是" if is_dissatisfaction else '否',
- "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
- "抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
- "是否投诉": "是" if is_complaint else '否',
- "解决方案": solution_str
- })
+ # base_workorder_dict.update({
+ # "产品线": product_line,
+ # "产品名称": product_name,
+ # "模块名称": module_name,
+ # "客户问题": user_question_str,
+ # "问题类型": problem_type,
+ # "是否抱怨": "是" if is_dissatisfaction else '否',
+ # "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
+ # "抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
+ # "是否投诉": "是" if is_complaint else '否',
+ # "解决方案": solution_str
+ # })
+ # workorder_list.append(base_workorder_dict)
+ for user_question in user_question_list:
+ user_question_str = user_question.user_question
+ solution_str = user_question.solution
+
+ # 创建新的工单字典,复制基本信息
+ workorder_dict = base_workorder_dict.copy()
+
+ # 更新工单字典
+ workorder_dict.update({
+ "产品线": product_line,
+ "产品名称": product_name,
+ "模块名称": module_name,
+ "客户问题": user_question_str,
+ "问题类型": problem_type,
+ "是否抱怨": "是" if is_dissatisfaction else '否',
+ "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
+ "抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
+ "是否投诉": "是" if is_complaint else '否',
+ "解决方案": solution_str
+ })
+
+ # 将工单添加到列表中
+ workorder_list.append(workorder_dict)
- # 将工单添加到列表中
- workorder_list.append(workorder_dict)
-
- return workorder_list
+ return workorder_list
+ except Exception as e:
+ logger.error(f"处理会话ID: {conversation_id} 时发生错误: {e}")
+ return []
def analyze_conversation_data(self, conversation_excel_path, product_detail_excel_path, max_workers=10, start_date=None, end_date=None):
"""分析会话数据主流程,使用多线程并发处理"""
@@ -588,9 +605,10 @@ class DialogueToWorkorder:
conversation_id = future_to_conversation[future]
try:
result_workorders = future.result()
- # 将每个会话的所有工单添加到总列表中
- workorder_dict_list.extend(result_workorders)
- logger.info(f"完成处理会话ID: {conversation_id},生成工单数量: {len(result_workorders)}")
+ if result_workorders:
+ # 将每个会话的所有工单添加到总列表中
+ workorder_dict_list.extend(result_workorders)
+ logger.info(f"完成处理会话ID: {conversation_id},生成工单数量: {len(result_workorders)}")
except Exception as exc:
logger.error(f"处理会话ID: {conversation_id} 时发生错误: {exc}")
@@ -678,7 +696,7 @@ def parse_arguments():
help='会话内容Excel文件路径')
parser.add_argument('--product_detail_file', type=str, required=False,
help='产品详情Excel文件路径')
- parser.add_argument('--max_workers', type=int, default=16,
+ parser.add_argument('--max_workers', type=int, default=6,
help='并发处理线程数,默认为16')
parser.add_argument('--start_date', type=str, required=False,default="2025-06-10 16:08:00",
help='开始日期,格式为YYYY-MM-DD')