Compare commits
2 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
| 17ff99bad1 | |||
| 0dda581c8e |
@@ -135,15 +135,23 @@ class DialogueToWorkorder:
|
||||
self.product_line_parser = PydanticOutputParser(pydantic_object=ProductLine)
|
||||
|
||||
# 初始化LLM模型
|
||||
# self.llm_params = llm_params or {
|
||||
# "temperature": 0.2,
|
||||
# "top_p":0.95,
|
||||
# "model": "deepseek-ai/DeepSeek-R1",
|
||||
# "api_key": os.getenv("OPENAI_API_KEY"),
|
||||
# "base_url": os.getenv("OPENAI_API_BASE"),
|
||||
# "timeout": httpx.Timeout(600.0)
|
||||
# }
|
||||
self.api_key = "25t%Syu6I9yxX2IuTN"
|
||||
self.llm_params = llm_params or {
|
||||
"temperature": 0.2,
|
||||
"top_p":0.95,
|
||||
"model": "deepseek-ai/DeepSeek-R1",
|
||||
"api_key": os.getenv("OPENAI_API_KEY"),
|
||||
"base_url": os.getenv("OPENAI_API_BASE"),
|
||||
"model": "deepseek-r1",
|
||||
"api_key": "25t%Syu6I9yxX2IuTN",
|
||||
"base_url": "http://10.1.0.154:8000/v1",
|
||||
"timeout": httpx.Timeout(600.0)
|
||||
}
|
||||
|
||||
self.llm = self._get_llm_instance()
|
||||
|
||||
def _get_llm_instance(self):
|
||||
@@ -266,7 +274,7 @@ class DialogueToWorkorder:
|
||||
2、根据用户提出的问题,分析坐席提供的解决方法(比如:1、引导用户xxxx。2、告诉用户xxxxx)。以坐席的角度直接总结完整的解决方案或应对措施(不要出现"坐席"、"我"等字样)
|
||||
3、提炼访客独立的核心问题(以访客的角度总结核心问题),核心问题衍生、细化后的请求合并到对应的核心问题中。不要单独列出衍生、细化后的请求。
|
||||
|
||||
4、使用json格式输出(多个用户问题采用标准json数组格式输出):
|
||||
4、使用json格式输出(多个用户问题采用标准json格式输出):
|
||||
{output_format}
|
||||
|
||||
输出示例:
|
||||
@@ -283,15 +291,15 @@ class DialogueToWorkorder:
|
||||
output_format = self.user_question_and_solution_parser.get_format_instructions()
|
||||
llm_prompt = prompt.format(output_format=output_format, dialogue_str=dialogue_str)
|
||||
|
||||
response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False)
|
||||
|
||||
response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False, api_key=self.api_key)
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
try:
|
||||
if response.content.count('user_question') == 1:
|
||||
user_question_and_solution = self.user_question_and_solution_parser.parse(response.content)
|
||||
if clean_output.count('user_question') == 1:
|
||||
user_question_and_solution = self.user_question_and_solution_parser.parse(clean_output)
|
||||
return [user_question_and_solution]
|
||||
else:
|
||||
array_pattern = r'\[\s*(\{[\s\S]*?\}(?:\s*,\s*\{[\s\S]*?\})*)\s*\]'
|
||||
array_match = re.search(array_pattern, response.content)
|
||||
array_match = re.search(array_pattern, clean_output)
|
||||
if array_match:
|
||||
# 找到了JSON数组
|
||||
json_array_str = '[' + array_match.group(1) + ']'
|
||||
@@ -313,8 +321,9 @@ class DialogueToWorkorder:
|
||||
except Exception as e:
|
||||
output_format = self.user_question_and_solution_list_parser.get_format_instructions()
|
||||
llm_prompt = prompt.format(output_format=output_format, dialogue_str=dialogue_str)
|
||||
response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False)
|
||||
user_question_and_solution_temp = self.user_question_and_solution_list_parser.parse(response.content)
|
||||
response = self.llm.invoke(user_prompt=llm_prompt, need_retry=False, api_key=self.api_key)
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
user_question_and_solution_temp = self.user_question_and_solution_list_parser.parse(clean_output)
|
||||
return user_question_and_solution_temp.user_question_list
|
||||
|
||||
return [user_question_and_solution]
|
||||
@@ -345,8 +354,9 @@ class DialogueToWorkorder:
|
||||
{dialogue_str}
|
||||
"""
|
||||
|
||||
response = self.llm.invoke(user_prompt=prompt, need_retry=False)
|
||||
product_name_and_module_name = self.product_name_and_module_name_parser.parse(response.content)
|
||||
response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
product_name_and_module_name = self.product_name_and_module_name_parser.parse(clean_output)
|
||||
|
||||
return product_name_and_module_name.product_name, product_name_and_module_name.module_name
|
||||
|
||||
@@ -374,8 +384,9 @@ class DialogueToWorkorder:
|
||||
{dialogue_str}
|
||||
"""
|
||||
|
||||
response = self.llm.invoke(user_prompt=prompt, need_retry=False)
|
||||
product_line = self.product_line_parser.parse(response.content)
|
||||
response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
product_line = self.product_line_parser.parse(clean_output)
|
||||
|
||||
return product_line.product_line
|
||||
|
||||
@@ -410,8 +421,9 @@ class DialogueToWorkorder:
|
||||
{dialogue_str}
|
||||
"""
|
||||
|
||||
response = self.llm.invoke(user_prompt=prompt, need_retry=False)
|
||||
question_type = self.question_type_parser.parse(response.content)
|
||||
response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
question_type = self.question_type_parser.parse(clean_output)
|
||||
|
||||
return question_type.question_type
|
||||
|
||||
@@ -446,8 +458,9 @@ class DialogueToWorkorder:
|
||||
|
||||
"""
|
||||
|
||||
response = self.llm.invoke(user_prompt=prompt, need_retry=False)
|
||||
is_complaint = self.is_complaint_parser.parse(response.content)
|
||||
response = self.llm.invoke(user_prompt=prompt, need_retry=False, api_key=self.api_key)
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
is_complaint = self.is_complaint_parser.parse(clean_output)
|
||||
|
||||
return (is_complaint.is_dissatisfaction,
|
||||
is_complaint.dissatisfaction_level,
|
||||
@@ -458,81 +471,85 @@ class DialogueToWorkorder:
|
||||
"""处理单个会话的函数,用于多线程并发"""
|
||||
# if conversation_id!="b157aa91-3acb-11f0-a191-4fb224ef4b40":
|
||||
# return []
|
||||
# 获取工单基本信息
|
||||
base_workorder_dict = self.get_workorder_dict(conversation_rows)
|
||||
# 分析用户问题和解决方案
|
||||
user_question_list = self.get_user_question_and_solution(conversation_rows)
|
||||
try:
|
||||
# 获取工单基本信息
|
||||
base_workorder_dict = self.get_workorder_dict(conversation_rows)
|
||||
# 分析用户问题和解决方案
|
||||
user_question_list = self.get_user_question_and_solution(conversation_rows)
|
||||
|
||||
user_question_str=""
|
||||
for user_question in user_question_list:
|
||||
user_question_str = user_question_str + user_question.user_question.strip() + "\n"
|
||||
user_question_str = user_question_str.strip()
|
||||
user_question_str=""
|
||||
for user_question in user_question_list:
|
||||
user_question_str = user_question_str + user_question.user_question.strip() + "\n"
|
||||
user_question_str = user_question_str.strip()
|
||||
|
||||
solution_str=""
|
||||
for user_question in user_question_list:
|
||||
solution_str = solution_str + user_question.solution.strip() + "\n"
|
||||
solution_str = solution_str.strip()
|
||||
solution_str=""
|
||||
for user_question in user_question_list:
|
||||
solution_str = solution_str + user_question.solution.strip() + "\n"
|
||||
solution_str = solution_str.strip()
|
||||
|
||||
# 分析是否抱怨、是否投诉、抱怨级别
|
||||
is_dissatisfaction, dissatisfaction_level, dissatisfaction_reasoning, is_complaint = (
|
||||
self.get_is_complaint_and_is_complaint_level(conversation_rows))
|
||||
# 分析是否抱怨、是否投诉、抱怨级别
|
||||
is_dissatisfaction, dissatisfaction_level, dissatisfaction_reasoning, is_complaint = (
|
||||
self.get_is_complaint_and_is_complaint_level(conversation_rows))
|
||||
|
||||
# 分析问题类型
|
||||
problem_type = self.get_problem_type(conversation_rows, user_question_str, solution_str)
|
||||
|
||||
# 分析产品线
|
||||
product_line = self.get_product_line(conversation_rows, product_detail_dict, user_question_str, solution_str)
|
||||
# 分析产品名称和模块名称
|
||||
if product_line != '':
|
||||
product_name, module_name = self.get_product_name_and_module_name(
|
||||
product_line, conversation_rows, product_detail_dict, user_question_str, solution_str)
|
||||
else:
|
||||
product_name = ''
|
||||
module_name = ''
|
||||
|
||||
# 创建工单列表
|
||||
workorder_list = []
|
||||
|
||||
|
||||
# 更新工单字典
|
||||
# base_workorder_dict.update({
|
||||
# "产品线": product_line,
|
||||
# "产品名称": product_name,
|
||||
# "模块名称": module_name,
|
||||
# "客户问题": user_question_str,
|
||||
# "问题类型": problem_type,
|
||||
# "是否抱怨": "是" if is_dissatisfaction else '否',
|
||||
# "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
|
||||
# "抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
|
||||
# "是否投诉": "是" if is_complaint else '否',
|
||||
# "解决方案": solution_str
|
||||
# })
|
||||
# workorder_list.append(base_workorder_dict)
|
||||
for user_question in user_question_list:
|
||||
user_question_str = user_question.user_question
|
||||
solution_str = user_question.solution
|
||||
# 分析问题类型
|
||||
problem_type = self.get_problem_type(conversation_rows, user_question_str, solution_str)
|
||||
|
||||
# 分析产品线
|
||||
product_line = self.get_product_line(conversation_rows, product_detail_dict, user_question_str, solution_str)
|
||||
# 分析产品名称和模块名称
|
||||
if product_line != '':
|
||||
product_name, module_name = self.get_product_name_and_module_name(
|
||||
product_line, conversation_rows, product_detail_dict, user_question_str, solution_str)
|
||||
else:
|
||||
product_name = ''
|
||||
module_name = ''
|
||||
|
||||
# 创建工单列表
|
||||
workorder_list = []
|
||||
|
||||
# 创建新的工单字典,复制基本信息
|
||||
workorder_dict = base_workorder_dict.copy()
|
||||
|
||||
# 更新工单字典
|
||||
workorder_dict.update({
|
||||
"产品线": product_line,
|
||||
"产品名称": product_name,
|
||||
"模块名称": module_name,
|
||||
"客户问题": user_question_str,
|
||||
"问题类型": problem_type,
|
||||
"是否抱怨": "是" if is_dissatisfaction else '否',
|
||||
"抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
|
||||
"抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
|
||||
"是否投诉": "是" if is_complaint else '否',
|
||||
"解决方案": solution_str
|
||||
})
|
||||
# base_workorder_dict.update({
|
||||
# "产品线": product_line,
|
||||
# "产品名称": product_name,
|
||||
# "模块名称": module_name,
|
||||
# "客户问题": user_question_str,
|
||||
# "问题类型": problem_type,
|
||||
# "是否抱怨": "是" if is_dissatisfaction else '否',
|
||||
# "抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
|
||||
# "抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
|
||||
# "是否投诉": "是" if is_complaint else '否',
|
||||
# "解决方案": solution_str
|
||||
# })
|
||||
# workorder_list.append(base_workorder_dict)
|
||||
for user_question in user_question_list:
|
||||
user_question_str = user_question.user_question
|
||||
solution_str = user_question.solution
|
||||
|
||||
# 创建新的工单字典,复制基本信息
|
||||
workorder_dict = base_workorder_dict.copy()
|
||||
|
||||
# 更新工单字典
|
||||
workorder_dict.update({
|
||||
"产品线": product_line,
|
||||
"产品名称": product_name,
|
||||
"模块名称": module_name,
|
||||
"客户问题": user_question_str,
|
||||
"问题类型": problem_type,
|
||||
"是否抱怨": "是" if is_dissatisfaction else '否',
|
||||
"抱怨内容": dissatisfaction_reasoning if is_dissatisfaction else '',
|
||||
"抱怨级别": dissatisfaction_level if is_dissatisfaction else '',
|
||||
"是否投诉": "是" if is_complaint else '否',
|
||||
"解决方案": solution_str
|
||||
})
|
||||
|
||||
# 将工单添加到列表中
|
||||
workorder_list.append(workorder_dict)
|
||||
|
||||
# 将工单添加到列表中
|
||||
workorder_list.append(workorder_dict)
|
||||
|
||||
return workorder_list
|
||||
return workorder_list
|
||||
except Exception as e:
|
||||
logger.error(f"处理会话ID: {conversation_id} 时发生错误: {e}")
|
||||
return []
|
||||
|
||||
def analyze_conversation_data(self, conversation_excel_path, product_detail_excel_path, max_workers=10, start_date=None, end_date=None):
|
||||
"""分析会话数据主流程,使用多线程并发处理"""
|
||||
@@ -588,9 +605,10 @@ class DialogueToWorkorder:
|
||||
conversation_id = future_to_conversation[future]
|
||||
try:
|
||||
result_workorders = future.result()
|
||||
# 将每个会话的所有工单添加到总列表中
|
||||
workorder_dict_list.extend(result_workorders)
|
||||
logger.info(f"完成处理会话ID: {conversation_id},生成工单数量: {len(result_workorders)}")
|
||||
if result_workorders:
|
||||
# 将每个会话的所有工单添加到总列表中
|
||||
workorder_dict_list.extend(result_workorders)
|
||||
logger.info(f"完成处理会话ID: {conversation_id},生成工单数量: {len(result_workorders)}")
|
||||
except Exception as exc:
|
||||
logger.error(f"处理会话ID: {conversation_id} 时发生错误: {exc}")
|
||||
|
||||
@@ -678,7 +696,7 @@ def parse_arguments():
|
||||
help='会话内容Excel文件路径')
|
||||
parser.add_argument('--product_detail_file', type=str, required=False,
|
||||
help='产品详情Excel文件路径')
|
||||
parser.add_argument('--max_workers', type=int, default=16,
|
||||
parser.add_argument('--max_workers', type=int, default=6,
|
||||
help='并发处理线程数,默认为16')
|
||||
parser.add_argument('--start_date', type=str, required=False,default="2025-06-10 16:08:00",
|
||||
help='开始日期,格式为YYYY-MM-DD')
|
||||
|
||||
@@ -46,7 +46,7 @@ class DifyCompareTest:
|
||||
self.first_wiki_client = ChatClient(api_key="app-gocvuqduBnJptYNPpnW9V9R6", base_url=os.getenv("DIFY_BSAE_URL"))
|
||||
# 词条与工单同时检索
|
||||
self.both_wiki_worker_client = ChatClient(api_key="app-CPoOMaGDsLRPAe9TW7Xjhszy", base_url=os.getenv("DIFY_BSAE_URL"))
|
||||
self.llm = OpenAiLLM(base_url=os.getenv("OPENAI_API_BASE"), model="deepseek-ai/DeepSeek-R1")
|
||||
self.llm = OpenAiLLM(base_url=os.getenv("OPENAI_API_BASE"), model=os.getenv("MODEL_NAME"))
|
||||
|
||||
def llm_judge_answer(self, old_answer: str, now_answer: str):
|
||||
user_prompt = f"""
|
||||
@@ -76,7 +76,6 @@ class DifyCompareTest:
|
||||
response.content = response.content.strip()
|
||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
||||
result = JsonOutputParser().parse(clean_output)
|
||||
result = json.loads(clean_output)
|
||||
return "回答基本相同" if result.get("is_same", False) else "回答基本不相同"
|
||||
except Exception as e:
|
||||
retry_count += 1
|
||||
@@ -91,17 +90,28 @@ class DifyCompareTest:
|
||||
|
||||
def process_workflow(self, workflow_name, client, inputs, query, old_answer):
|
||||
"""处理单个工作流调用"""
|
||||
try:
|
||||
response = client.create_chat_message(
|
||||
inputs=inputs, query=query, user="AutoCodeRun", response_mode="blocking"
|
||||
)
|
||||
result = response.json()
|
||||
answer = result.get('answer', "")
|
||||
judge_result = self.llm_judge_answer(old_answer=old_answer, now_answer=answer)
|
||||
return answer, judge_result
|
||||
except Exception as e:
|
||||
logging.error(f"{workflow_name}调用失败: {e}")
|
||||
return '', ''
|
||||
max_retries = 3
|
||||
retry_count = 0
|
||||
|
||||
while retry_count < max_retries:
|
||||
try:
|
||||
response = client.create_chat_message(
|
||||
inputs=inputs, query=query, user="AutoCodeRun", response_mode="blocking"
|
||||
)
|
||||
result = response.json()
|
||||
answer = result.get('answer', "")
|
||||
if len(answer) == 0:
|
||||
raise Exception(f"回答为空: {result}")
|
||||
judge_result = self.llm_judge_answer(old_answer=old_answer, now_answer=answer)
|
||||
return answer, judge_result
|
||||
except Exception as e:
|
||||
retry_count += 1
|
||||
if retry_count >= max_retries:
|
||||
logging.error(f"{workflow_name}调用失败 (尝试 {max_retries} 次后): {e}")
|
||||
return '', ''
|
||||
else:
|
||||
import time
|
||||
time.sleep(1) # 等待1秒后重试
|
||||
|
||||
def process_single_row(self, index, row):
|
||||
"""处理单行数据的方法,用于多线程执行"""
|
||||
@@ -247,7 +257,7 @@ if __name__ == "__main__":
|
||||
|
||||
# 处理第一个文件
|
||||
excel_files = [
|
||||
("data/excel/5月.xlsx", "data/excel/5月问答对比.xlsx"),
|
||||
# ("data/excel/5月.xlsx", "data/excel/5月问答对比.xlsx"),
|
||||
("data/excel/其他月.xlsx", "data/excel/其他月问答对比.xlsx")
|
||||
]
|
||||
|
||||
|
||||
@@ -240,7 +240,7 @@ class OpenAiLLM:
|
||||
|
||||
self._kwargs = kwargs
|
||||
|
||||
def invoke(self, user_prompt="你是谁?", need_retry=True,**extra_kwargs):
|
||||
def invoke(self, user_prompt="你是谁?", need_retry=True, api_key:str = None, **extra_kwargs):
|
||||
# 初始化 OpenAI 客户端
|
||||
|
||||
max_retries = 3
|
||||
@@ -253,10 +253,13 @@ class OpenAiLLM:
|
||||
timeout = httpx.Timeout(300.0)
|
||||
self._kwargs["timeout"] = timeout
|
||||
|
||||
if api_key is None:
|
||||
api_key = APIKeyManager.get_api_key()
|
||||
|
||||
if need_retry:
|
||||
while retry_count < max_retries:
|
||||
try:
|
||||
api_key = APIKeyManager.get_api_key()
|
||||
|
||||
# 使用with语句创建客户端,确保资源会被正确释放
|
||||
with OpenAI(api_key=api_key, base_url=self._url) as client:
|
||||
# 创建 Completion 请求. 超时120s
|
||||
@@ -276,7 +279,6 @@ class OpenAiLLM:
|
||||
else:
|
||||
try:
|
||||
# 创建 Completion 请求. 超时120s
|
||||
api_key = APIKeyManager.get_api_key()
|
||||
# 使用with语句创建客户端,确保资源会被正确释放
|
||||
with OpenAI(api_key=api_key, base_url=self._url) as client:
|
||||
completion = client.chat.completions.create(
|
||||
|
||||
Reference in New Issue
Block a user