优化对话转工单处理逻辑,调整LLM参数,增强用户问题和解决方案的提取功能,添加槽位填充支持,提升代码结构和可读性。

This commit is contained in:
2025-05-30 11:10:24 +08:00
parent 05caedc4fa
commit d4ff7b6fad
6 changed files with 469 additions and 123 deletions
+136 -2
View File
@@ -8,7 +8,7 @@ Description: 提取和分类的数据模型
"""
from pydantic import BaseModel, Field
from typing import List, Optional
from typing import List, Optional, Dict, Tuple
# 定义输出模型
@@ -33,4 +33,138 @@ class Classification(BaseModel):
sub_classification:str = Field(description="一级分类下的二级分类")
class QueryRewrite(BaseModel):
rewrite:str = Field(description="问题改写")
rewrite:str = Field(description="问题改写")
# 1. 软件问题
# 1.1 软件功能
class SoftwareFunction(BaseModel):
software_name: str = Field(description="软件名称")
function_name: str = Field(description="具体功能名称")
operation: str = Field(description="用户操作意图(如何使用功能、功能入口、功能使用场景)")
software_version: Optional[str] = Field(None, description="软件版本")
operation_steps: Optional[str] = Field(None, description="操作步骤描述")
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
"""检查必填槽位是否都存在"""
missing_slots = {}
if not self.software_name:
missing_slots["software_name"] = SoftwareFunction.model_fields["software_name"].description
if not self.function_name:
missing_slots["function_name"] = SoftwareFunction.model_fields["function_name"].description
if not self.operation:
missing_slots["operation"] = SoftwareFunction.model_fields["operation"].description
return len(missing_slots) == 0, missing_slots
# 1.2 故障排查
class TroubleShooting(BaseModel):
software_name: str = Field(description="软件名称")
function_name: str = Field(description="具体功能名称/操作描述")
error_message: str = Field(description="报错信息/异常现象")
software_version: Optional[str] = Field(None, description="软件版本")
os_version: Optional[str] = Field(None, description="操作系统及版本")
reproduction_steps: Optional[str] = Field(None, description="故障重现步骤")
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
"""检查必填槽位是否都存在"""
missing_slots = {}
if not self.software_name:
missing_slots["software_name"] = TroubleShooting.model_fields["software_name"].description
if not self.function_name:
missing_slots["function_name"] = TroubleShooting.model_fields["function_name"].description
if not self.error_message:
missing_slots["error_message"] = TroubleShooting.model_fields["error_message"].description
return len(missing_slots) == 0, missing_slots
# 2. 业务问题
# 2.1 专业咨询
class ProfessionalConsulting(BaseModel):
scene_subject: str = Field(description="场景主体")
business_scene: str = Field(description="业务场景描述")
software_name: Optional[str] = Field(None, description="软件名称")
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
"""检查必填槽位是否都存在"""
missing_slots = {}
if not self.scene_subject:
missing_slots["scene_subject"] = ProfessionalConsulting.model_fields["scene_subject"].description
if not self.business_scene:
missing_slots["business_scene"] = ProfessionalConsulting.model_fields["business_scene"].description
return len(missing_slots) == 0, missing_slots
# 2.2 数据问题
class DataProblem(BaseModel):
expense_type: str = Field(description="费用类型")
operation_purpose: str = Field(description="操作目的")
software_name: Optional[str] = Field(None, description="软件名称")
project_type: Optional[str] = Field(None, description="工程类型")
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
"""检查必填槽位是否都存在"""
missing_slots = {}
if not self.expense_type:
missing_slots["expense_type"] = DataProblem.model_fields["expense_type"].description
if not self.operation_purpose:
missing_slots["operation_purpose"] = DataProblem.model_fields["operation_purpose"].description
return len(missing_slots) == 0, missing_slots
# 3. 安装下载注册
# 3.1 后缀名咨询
class FileExtensionConsulting(BaseModel):
file_extension: str = Field(description="文件后缀名")
operation_purpose: str = Field(description="操作目的")
file_source: Optional[str] = Field(None, description="文件来源场景")
related_software: Optional[str] = Field(None, description="相关软件名称")
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
"""检查必填槽位是否都存在"""
missing_slots = {}
if not self.file_extension:
missing_slots["file_extension"] = FileExtensionConsulting.model_fields["file_extension"].description
if not self.operation_purpose:
missing_slots["operation_purpose"] = FileExtensionConsulting.model_fields["operation_purpose"].description
return len(missing_slots) == 0, missing_slots
# 3.2 软件锁类
class SoftwareLock(BaseModel):
lock_type: str = Field(description="锁类型")
operation_purpose: str = Field(description="操作目的")
lock_number: Optional[str] = Field(None, description="软件锁编号/注册号")
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
"""检查必填槽位是否都存在"""
missing_slots = {}
if not self.lock_type:
missing_slots["lock_type"] = SoftwareLock.model_fields["lock_type"].description
if not self.operation_purpose:
missing_slots["operation_purpose"] = SoftwareLock.model_fields["operation_purpose"].description
return len(missing_slots) == 0, missing_slots
# 3.3 安装下载类
class InstallationDownload(BaseModel):
software_name: str = Field(description="软件/插件名称")
operation_stage: str = Field(description="操作阶段")
os_version: Optional[str] = Field(None, description="操作系统版本")
package_source: Optional[str] = Field(None, description="安装包来源/版本号")
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
"""检查必填槽位是否都存在"""
missing_slots = {}
if not self.software_name:
missing_slots["software_name"] = InstallationDownload.model_fields["software_name"].description
if not self.operation_stage:
missing_slots["operation_stage"] = InstallationDownload.model_fields["operation_stage"].description
return len(missing_slots) == 0, missing_slots
# 3.4 问题排查类
class ProblemDiagnosis(BaseModel):
error_message: str = Field(description="报错信息/异常现象")
software_name: Optional[str] = Field(None, description="软件名称")
os_version: Optional[str] = Field(None, description="操作系统版本")
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
"""检查必填槽位是否都存在"""
missing_slots = {}
if not self.error_message:
missing_slots["error_message"] = ProblemDiagnosis.model_fields["error_message"].description
return len(missing_slots) == 0, missing_slots
+143 -6
View File
@@ -11,12 +11,17 @@ import os
from langchain_openai import ChatOpenAI
from langchain.output_parsers import PydanticOutputParser
import json
from typing import List, Tuple
from typing import List, Tuple, Dict, Any, Optional, Union
import re
from .PromptTemplates import classification_prompt, query_rewrite_prompt, extract_nouns_prompt, classification_info
from .DataModels import Classification, QueryRewrite, Term, TermList
from .PromptTemplates import classification_prompt, query_rewrite_prompt, extract_nouns_prompt, classification_info, slot_filling_prompt
from .DataModels import (
Classification, QueryRewrite, Term, TermList,
SoftwareFunction, TroubleShooting, ProfessionalConsulting,
DataProblem, FileExtensionConsulting, SoftwareLock,
InstallationDownload, ProblemDiagnosis
)
from .ProfessionalNounVector import ProfessionalNounRetriever
from rag2_0.tool.ModelTool import XinferenceReRankerModel, OpenAiLLM
from rag2_0.tool.ModelTool import XinferenceReRankerModel, OpenAiLLM, SiliconFlowReRankerModel
class IntentRecognizer:
@@ -184,7 +189,7 @@ class IntentRecognizer:
if len(matched_terms) != 0:
txts = ["名称:" + term.name + "|" + "同义词:" + ";".join(term.synonymous) + "|" + "描述:" + term.description for term in matched_terms]
# txts = [term.name for term in matched_terms]
xinference_reranker = XinferenceReRankerModel()
xinference_reranker = SiliconFlowReRankerModel()
rerank_results = xinference_reranker.rerank(query, txts, top_k=5)
matched_terms_list = list(matched_terms)
matched_terms = [matched_terms_list[result["index"]] for result in rerank_results]
@@ -288,4 +293,136 @@ class IntentRecognizer:
return classification, TermList(terms=[]), QueryRewrite(rewrite=query),[]
# rewrite = QueryRewrite(rewrite=query)
return classification, keywords_terms, rewrite, query_keys
return classification, keywords_terms, rewrite, query_keys
def fill_slots(self, query: str, classification: Classification, keywords: TermList) -> Dict[str, Any]:
"""
根据分类结果对问题进行槽位填充
Args:
query: 用户原始问题
classification: 意图分类结果
keywords: 匹配的关键词列表
Returns:
填充后的槽位数据模型
"""
# 根据分类结果选择对应的数据模型
slot_model = self._get_slot_model(classification)
if not slot_model:
return {"error": "未找到匹配的槽位模型"}
# 使用LLM进行槽位填充
filled_slots = self._fill_slots_with_llm(query, classification, keywords, slot_model)
# 检查必填槽位是否都已填充
is_complete, missing_slots = filled_slots.check_required_slots()
return {
"is_complete": is_complete,
"missing_slots": missing_slots,
"filled_data": filled_slots.model_dump()
}
def _get_slot_model(self, classification: Classification) -> Optional[type]:
"""
根据分类结果获取对应的槽位模型类
Args:
classification: 意图分类结果
Returns:
对应的槽位模型类
"""
# 软件问题
if classification.vertical_classification == "软件问题":
if classification.sub_classification == "软件功能":
return SoftwareFunction
elif classification.sub_classification == "故障排查":
return TroubleShooting
# 业务问题
elif classification.vertical_classification == "业务问题":
if classification.sub_classification == "专业咨询":
return ProfessionalConsulting
elif classification.sub_classification == "数据问题":
return DataProblem
# 安装下载注册
elif classification.vertical_classification == "安装下载":
if classification.sub_classification == "后缀名咨询":
return FileExtensionConsulting
elif classification.sub_classification == "软件锁类":
return SoftwareLock
elif classification.sub_classification == "安装下载类":
return InstallationDownload
elif classification.sub_classification == "问题排查类":
return ProblemDiagnosis
return None
def _fill_slots_with_llm(self, query: str, classification: Classification, keywords: TermList, slot_model_class: type) -> Any:
"""
使用LLM进行槽位填充
Args:
query: 用户原始问题
classification: 意图分类结果
keywords: 匹配的关键词列表
slot_model_class: 槽位模型类
Returns:
填充后的槽位数据模型实例
"""
# 准备提示词
slot_parser = PydanticOutputParser(pydantic_object=slot_model_class)
model_schema = json.dumps(slot_model_class.model_json_schema(), ensure_ascii=False)
terms_dict = [term.model_dump() for term in keywords.terms]
keywords_str = json.dumps(terms_dict, ensure_ascii=False)
formatted_prompt = slot_filling_prompt.format(
query=query,
vertical_classification=classification.vertical_classification,
sub_classification=classification.sub_classification,
keywords=keywords_str,
model_schema=model_schema,
output_format=slot_parser.get_format_instructions()
)
# 调用LLM
response = self.llm.invoke(formatted_prompt, False)
try:
# 尝试解析LLM响应
parsed_output = slot_parser.parse(response.content)
return parsed_output
except Exception as e:
# 如果解析失败,创建一个空的模型实例
empty_instance = slot_model_class()
return empty_instance
def process_query_with_slots(self, query: str) -> Dict[str, Any]:
"""
处理用户问题的完整流程,包括槽位填充
Args:
query: 用户原始问题
Returns:
包含分类、关键词、改写和槽位填充结果的字典
"""
# 执行基本处理流程
classification, keywords, rewrite, query_keys = self.process_query(query)
# 如果是有效分类,进行槽位填充
slot_filling_result = {}
if classification.vertical_classification not in ["其他", "闲聊"] and classification.sub_classification not in ["其他", "闲聊"]:
slot_filling_result = self.fill_slots(rewrite.rewrite, classification, keywords)
return {
"classification": classification.model_dump(),
"keywords": keywords.model_dump(),
"rewrite": rewrite.model_dump(),
"query_keys": query_keys,
"slot_filling": slot_filling_result
}
+42 -3
View File
@@ -38,7 +38,7 @@ classification_info="""【垂直领域分类】:
【业务问题包括以下两类】:
1. 专业咨询:涉及电力造价规范、工程计价规则问题、行业标准解读等
2. 数据问题:涉及电力造价费用、造价指标等
2. 数据问题:涉及电力造价费用、造价指标的计算或构成
【安装下载注册包括以下三类】:
1. 后缀名咨询:所有涉及文件扩展名的使用场景、软件关联等问题,包括但不限于:询问文件是否由特定软件打开、扩展名与软件的匹配关系、扩展名含义及关联等
@@ -94,7 +94,7 @@ query_rewrite_prompt = """
b. 执行结构优化:
- 采用【术语标记】规范标注关键概念
- 构建主谓宾明确的问题句式
- 保持原问题时态与语态特征
- 保持原问题时态与语态特征, 保留5W2H问题特征
- 执行同义词替换:将synonymous中的同义词替换为对应name字段的标准术语
# 输出规范
@@ -132,4 +132,43 @@ query_rewrite_prompt = """
4. 异常处理机制
- 当关键词与问题无明显关联时,触发直通输出规则
- 出现术语冲突时优先保留原始表述
"""
"""
slot_filling_prompt = """
你是一个专业的电力造价领域问题槽位填充助手。你需要从用户问题中提取关键信息,并填充到对应的数据结构中。
【用户问题】
{query}
【问题分类】
垂直领域分类: {vertical_classification}
子分类: {sub_classification}
【已识别关键词】
{keywords}
【目标数据结构】
{model_schema}
【输出格式】
{output_format}
【任务要求】
1. 仔细分析用户问题,从中提取所有可能的槽位信息
2. 对于必填槽位,必须尽力从问题中提取,如果确实无法提取则留空
3. 对于选填槽位,如果能从问题中提取则填写,否则留空
4. 只输出符合格式的JSON数据,不要有任何额外的解释
【示例】
用户问题: "我的西藏Z1软件安装后闪退,提示缺少组件,怎么解决?"
分类: 软件问题/故障排查
输出:
{{
"software_name": "西藏Z1软件",
"function_name": "软件安装",
"error_message": "闪退,提示缺少组件",
"software_version": null,
"os_version": null,
"reproduction_steps": "软件安装后"
}}
"""