优化对话转工单处理逻辑,调整LLM参数,增强用户问题和解决方案的提取功能,添加槽位填充支持,提升代码结构和可读性。

This commit is contained in:
2025-05-30 11:10:24 +08:00
parent 05caedc4fa
commit d4ff7b6fad
6 changed files with 469 additions and 123 deletions
+143 -6
View File
@@ -11,12 +11,17 @@ import os
from langchain_openai import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
import json
from typing import List, Tuple
from typing import List, Tuple, Dict, Any, Optional, Union
import re
from .PromptTemplates import classification_prompt, query_rewrite_prompt, extract_nouns_prompt, classification_info
from .DataModels import Classification, QueryRewrite, Term, TermList
from .PromptTemplates import classification_prompt, query_rewrite_prompt, extract_nouns_prompt, classification_info, slot_filling_prompt
from .DataModels import (
Classification, QueryRewrite, Term, TermList,
SoftwareFunction, TroubleShooting, ProfessionalConsulting,
DataProblem, FileExtensionConsulting, SoftwareLock,
InstallationDownload, ProblemDiagnosis
)
from .ProfessionalNounVector import ProfessionalNounRetriever
from rag2_0.tool.ModelTool import XinferenceReRankerModel, OpenAiLLM
from rag2_0.tool.ModelTool import XinferenceReRankerModel, OpenAiLLM, SiliconFlowReRankerModel
class IntentRecognizer:
@@ -184,7 +189,7 @@ class IntentRecognizer:
if len(matched_terms) != 0:
txts = ["名称:" + term.name + "|" + "同义词:" + ";".join(term.synonymous) + "|" + "描述:" + term.description for term in matched_terms]
# txts = [term.name for term in matched_terms]
xinference_reranker = XinferenceReRankerModel()
xinference_reranker = SiliconFlowReRankerModel()
rerank_results = xinference_reranker.rerank(query, txts, top_k=5)
matched_terms_list = list(matched_terms)
matched_terms = [matched_terms_list[result["index"]] for result in rerank_results]
@@ -288,4 +293,136 @@ class IntentRecognizer:
return classification, TermList(terms=[]), QueryRewrite(rewrite=query),[]
# rewrite = QueryRewrite(rewrite=query)
return classification, keywords_terms, rewrite, query_keys
return classification, keywords_terms, rewrite, query_keys
def fill_slots(self, query: str, classification: Classification, keywords: TermList) -> Dict[str, Any]:
"""
根据分类结果对问题进行槽位填充
Args:
query: 用户原始问题
classification: 意图分类结果
keywords: 匹配的关键词列表
Returns:
填充后的槽位数据模型
"""
# 根据分类结果选择对应的数据模型
slot_model = self._get_slot_model(classification)
if not slot_model:
return {"error": "未找到匹配的槽位模型"}
# 使用LLM进行槽位填充
filled_slots = self._fill_slots_with_llm(query, classification, keywords, slot_model)
# 检查必填槽位是否都已填充
is_complete, missing_slots = filled_slots.check_required_slots()
return {
"is_complete": is_complete,
"missing_slots": missing_slots,
"filled_data": filled_slots.model_dump()
}
def _get_slot_model(self, classification: Classification) -> Optional[type]:
"""
根据分类结果获取对应的槽位模型类
Args:
classification: 意图分类结果
Returns:
对应的槽位模型类
"""
# 软件问题
if classification.vertical_classification == "软件问题":
if classification.sub_classification == "软件功能":
return SoftwareFunction
elif classification.sub_classification == "故障排查":
return TroubleShooting
# 业务问题
elif classification.vertical_classification == "业务问题":
if classification.sub_classification == "专业咨询":
return ProfessionalConsulting
elif classification.sub_classification == "数据问题":
return DataProblem
# 安装下载注册
elif classification.vertical_classification == "安装下载":
if classification.sub_classification == "后缀名咨询":
return FileExtensionConsulting
elif classification.sub_classification == "软件锁类":
return SoftwareLock
elif classification.sub_classification == "安装下载类":
return InstallationDownload
elif classification.sub_classification == "问题排查类":
return ProblemDiagnosis
return None
def _fill_slots_with_llm(self, query: str, classification: Classification, keywords: TermList, slot_model_class: type) -> Any:
"""
使用LLM进行槽位填充
Args:
query: 用户原始问题
classification: 意图分类结果
keywords: 匹配的关键词列表
slot_model_class: 槽位模型类
Returns:
填充后的槽位数据模型实例
"""
# 准备提示词
slot_parser = PydanticOutputParser(pydantic_object=slot_model_class)
model_schema = json.dumps(slot_model_class.model_json_schema(), ensure_ascii=False)
terms_dict = [term.model_dump() for term in keywords.terms]
keywords_str = json.dumps(terms_dict, ensure_ascii=False)
formatted_prompt = slot_filling_prompt.format(
query=query,
vertical_classification=classification.vertical_classification,
sub_classification=classification.sub_classification,
keywords=keywords_str,
model_schema=model_schema,
output_format=slot_parser.get_format_instructions()
)
# 调用LLM
response = self.llm.invoke(formatted_prompt, False)
try:
# 尝试解析LLM响应
parsed_output = slot_parser.parse(response.content)
return parsed_output
except Exception as e:
# 如果解析失败,创建一个空的模型实例
empty_instance = slot_model_class()
return empty_instance
def process_query_with_slots(self, query: str) -> Dict[str, Any]:
"""
处理用户问题的完整流程,包括槽位填充
Args:
query: 用户原始问题
Returns:
包含分类、关键词、改写和槽位填充结果的字典
"""
# 执行基本处理流程
classification, keywords, rewrite, query_keys = self.process_query(query)
# 如果是有效分类,进行槽位填充
slot_filling_result = {}
if classification.vertical_classification not in ["其他", "闲聊"] and classification.sub_classification not in ["其他", "闲聊"]:
slot_filling_result = self.fill_slots(rewrite.rewrite, classification, keywords)
return {
"classification": classification.model_dump(),
"keywords": keywords.model_dump(),
"rewrite": rewrite.model_dump(),
"query_keys": query_keys,
"slot_filling": slot_filling_result
}