优化对话转工单处理逻辑,调整LLM参数,增强用户问题和解决方案的提取功能,添加槽位填充支持,提升代码结构和可读性。
This commit is contained in:
@@ -8,7 +8,7 @@ Description: 提取和分类的数据模型
|
||||
"""
|
||||
|
||||
from pydantic import BaseModel, Field
|
||||
from typing import List, Optional
|
||||
from typing import List, Optional, Dict, Tuple
|
||||
|
||||
|
||||
# 定义输出模型
|
||||
@@ -33,4 +33,138 @@ class Classification(BaseModel):
|
||||
sub_classification:str = Field(description="一级分类下的二级分类")
|
||||
|
||||
class QueryRewrite(BaseModel):
|
||||
rewrite:str = Field(description="问题改写")
|
||||
rewrite:str = Field(description="问题改写")
|
||||
|
||||
# 1. 软件问题
|
||||
# 1.1 软件功能
|
||||
class SoftwareFunction(BaseModel):
|
||||
software_name: str = Field(description="软件名称")
|
||||
function_name: str = Field(description="具体功能名称")
|
||||
operation: str = Field(description="用户操作意图(如何使用功能、功能入口、功能使用场景)")
|
||||
software_version: Optional[str] = Field(None, description="软件版本")
|
||||
operation_steps: Optional[str] = Field(None, description="操作步骤描述")
|
||||
|
||||
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
|
||||
"""检查必填槽位是否都存在"""
|
||||
missing_slots = {}
|
||||
if not self.software_name:
|
||||
missing_slots["software_name"] = SoftwareFunction.model_fields["software_name"].description
|
||||
if not self.function_name:
|
||||
missing_slots["function_name"] = SoftwareFunction.model_fields["function_name"].description
|
||||
if not self.operation:
|
||||
missing_slots["operation"] = SoftwareFunction.model_fields["operation"].description
|
||||
return len(missing_slots) == 0, missing_slots
|
||||
|
||||
# 1.2 故障排查
|
||||
class TroubleShooting(BaseModel):
|
||||
software_name: str = Field(description="软件名称")
|
||||
function_name: str = Field(description="具体功能名称/操作描述")
|
||||
error_message: str = Field(description="报错信息/异常现象")
|
||||
software_version: Optional[str] = Field(None, description="软件版本")
|
||||
os_version: Optional[str] = Field(None, description="操作系统及版本")
|
||||
reproduction_steps: Optional[str] = Field(None, description="故障重现步骤")
|
||||
|
||||
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
|
||||
"""检查必填槽位是否都存在"""
|
||||
missing_slots = {}
|
||||
if not self.software_name:
|
||||
missing_slots["software_name"] = TroubleShooting.model_fields["software_name"].description
|
||||
if not self.function_name:
|
||||
missing_slots["function_name"] = TroubleShooting.model_fields["function_name"].description
|
||||
if not self.error_message:
|
||||
missing_slots["error_message"] = TroubleShooting.model_fields["error_message"].description
|
||||
return len(missing_slots) == 0, missing_slots
|
||||
|
||||
# 2. 业务问题
|
||||
# 2.1 专业咨询
|
||||
class ProfessionalConsulting(BaseModel):
|
||||
scene_subject: str = Field(description="场景主体")
|
||||
business_scene: str = Field(description="业务场景描述")
|
||||
software_name: Optional[str] = Field(None, description="软件名称")
|
||||
|
||||
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
|
||||
"""检查必填槽位是否都存在"""
|
||||
missing_slots = {}
|
||||
if not self.scene_subject:
|
||||
missing_slots["scene_subject"] = ProfessionalConsulting.model_fields["scene_subject"].description
|
||||
if not self.business_scene:
|
||||
missing_slots["business_scene"] = ProfessionalConsulting.model_fields["business_scene"].description
|
||||
return len(missing_slots) == 0, missing_slots
|
||||
|
||||
# 2.2 数据问题
|
||||
class DataProblem(BaseModel):
|
||||
expense_type: str = Field(description="费用类型")
|
||||
operation_purpose: str = Field(description="操作目的")
|
||||
software_name: Optional[str] = Field(None, description="软件名称")
|
||||
project_type: Optional[str] = Field(None, description="工程类型")
|
||||
|
||||
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
|
||||
"""检查必填槽位是否都存在"""
|
||||
missing_slots = {}
|
||||
if not self.expense_type:
|
||||
missing_slots["expense_type"] = DataProblem.model_fields["expense_type"].description
|
||||
if not self.operation_purpose:
|
||||
missing_slots["operation_purpose"] = DataProblem.model_fields["operation_purpose"].description
|
||||
return len(missing_slots) == 0, missing_slots
|
||||
|
||||
# 3. 安装下载注册
|
||||
# 3.1 后缀名咨询
|
||||
class FileExtensionConsulting(BaseModel):
|
||||
file_extension: str = Field(description="文件后缀名")
|
||||
operation_purpose: str = Field(description="操作目的")
|
||||
file_source: Optional[str] = Field(None, description="文件来源场景")
|
||||
related_software: Optional[str] = Field(None, description="相关软件名称")
|
||||
|
||||
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
|
||||
"""检查必填槽位是否都存在"""
|
||||
missing_slots = {}
|
||||
if not self.file_extension:
|
||||
missing_slots["file_extension"] = FileExtensionConsulting.model_fields["file_extension"].description
|
||||
if not self.operation_purpose:
|
||||
missing_slots["operation_purpose"] = FileExtensionConsulting.model_fields["operation_purpose"].description
|
||||
return len(missing_slots) == 0, missing_slots
|
||||
|
||||
# 3.2 软件锁类
|
||||
class SoftwareLock(BaseModel):
|
||||
lock_type: str = Field(description="锁类型")
|
||||
operation_purpose: str = Field(description="操作目的")
|
||||
lock_number: Optional[str] = Field(None, description="软件锁编号/注册号")
|
||||
|
||||
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
|
||||
"""检查必填槽位是否都存在"""
|
||||
missing_slots = {}
|
||||
if not self.lock_type:
|
||||
missing_slots["lock_type"] = SoftwareLock.model_fields["lock_type"].description
|
||||
if not self.operation_purpose:
|
||||
missing_slots["operation_purpose"] = SoftwareLock.model_fields["operation_purpose"].description
|
||||
return len(missing_slots) == 0, missing_slots
|
||||
|
||||
# 3.3 安装下载类
|
||||
class InstallationDownload(BaseModel):
|
||||
software_name: str = Field(description="软件/插件名称")
|
||||
operation_stage: str = Field(description="操作阶段")
|
||||
os_version: Optional[str] = Field(None, description="操作系统版本")
|
||||
package_source: Optional[str] = Field(None, description="安装包来源/版本号")
|
||||
|
||||
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
|
||||
"""检查必填槽位是否都存在"""
|
||||
missing_slots = {}
|
||||
if not self.software_name:
|
||||
missing_slots["software_name"] = InstallationDownload.model_fields["software_name"].description
|
||||
if not self.operation_stage:
|
||||
missing_slots["operation_stage"] = InstallationDownload.model_fields["operation_stage"].description
|
||||
return len(missing_slots) == 0, missing_slots
|
||||
|
||||
# 3.4 问题排查类
|
||||
class ProblemDiagnosis(BaseModel):
|
||||
error_message: str = Field(description="报错信息/异常现象")
|
||||
software_name: Optional[str] = Field(None, description="软件名称")
|
||||
os_version: Optional[str] = Field(None, description="操作系统版本")
|
||||
|
||||
def check_required_slots(self) -> Tuple[bool, Dict[str, str]]:
|
||||
"""检查必填槽位是否都存在"""
|
||||
missing_slots = {}
|
||||
if not self.error_message:
|
||||
missing_slots["error_message"] = ProblemDiagnosis.model_fields["error_message"].description
|
||||
return len(missing_slots) == 0, missing_slots
|
||||
|
||||
|
||||
@@ -11,12 +11,17 @@ import os
|
||||
from langchain_openai import ChatOpenAI
|
||||
from langchain.output_parsers import PydanticOutputParser
|
||||
import json
|
||||
from typing import List, Tuple
|
||||
from typing import List, Tuple, Dict, Any, Optional, Union
|
||||
import re
|
||||
from .PromptTemplates import classification_prompt, query_rewrite_prompt, extract_nouns_prompt, classification_info
|
||||
from .DataModels import Classification, QueryRewrite, Term, TermList
|
||||
from .PromptTemplates import classification_prompt, query_rewrite_prompt, extract_nouns_prompt, classification_info, slot_filling_prompt
|
||||
from .DataModels import (
|
||||
Classification, QueryRewrite, Term, TermList,
|
||||
SoftwareFunction, TroubleShooting, ProfessionalConsulting,
|
||||
DataProblem, FileExtensionConsulting, SoftwareLock,
|
||||
InstallationDownload, ProblemDiagnosis
|
||||
)
|
||||
from .ProfessionalNounVector import ProfessionalNounRetriever
|
||||
from rag2_0.tool.ModelTool import XinferenceReRankerModel, OpenAiLLM
|
||||
from rag2_0.tool.ModelTool import XinferenceReRankerModel, OpenAiLLM, SiliconFlowReRankerModel
|
||||
|
||||
|
||||
class IntentRecognizer:
|
||||
@@ -184,7 +189,7 @@ class IntentRecognizer:
|
||||
if len(matched_terms) != 0:
|
||||
txts = ["名称:" + term.name + "|" + "同义词:" + ";".join(term.synonymous) + "|" + "描述:" + term.description for term in matched_terms]
|
||||
# txts = [term.name for term in matched_terms]
|
||||
xinference_reranker = XinferenceReRankerModel()
|
||||
xinference_reranker = SiliconFlowReRankerModel()
|
||||
rerank_results = xinference_reranker.rerank(query, txts, top_k=5)
|
||||
matched_terms_list = list(matched_terms)
|
||||
matched_terms = [matched_terms_list[result["index"]] for result in rerank_results]
|
||||
@@ -288,4 +293,136 @@ class IntentRecognizer:
|
||||
return classification, TermList(terms=[]), QueryRewrite(rewrite=query),[]
|
||||
|
||||
# rewrite = QueryRewrite(rewrite=query)
|
||||
return classification, keywords_terms, rewrite, query_keys
|
||||
return classification, keywords_terms, rewrite, query_keys
|
||||
|
||||
def fill_slots(self, query: str, classification: Classification, keywords: TermList) -> Dict[str, Any]:
|
||||
"""
|
||||
根据分类结果对问题进行槽位填充
|
||||
|
||||
Args:
|
||||
query: 用户原始问题
|
||||
classification: 意图分类结果
|
||||
keywords: 匹配的关键词列表
|
||||
|
||||
Returns:
|
||||
填充后的槽位数据模型
|
||||
"""
|
||||
# 根据分类结果选择对应的数据模型
|
||||
slot_model = self._get_slot_model(classification)
|
||||
if not slot_model:
|
||||
return {"error": "未找到匹配的槽位模型"}
|
||||
|
||||
# 使用LLM进行槽位填充
|
||||
filled_slots = self._fill_slots_with_llm(query, classification, keywords, slot_model)
|
||||
|
||||
# 检查必填槽位是否都已填充
|
||||
is_complete, missing_slots = filled_slots.check_required_slots()
|
||||
|
||||
return {
|
||||
"is_complete": is_complete,
|
||||
"missing_slots": missing_slots,
|
||||
"filled_data": filled_slots.model_dump()
|
||||
}
|
||||
|
||||
def _get_slot_model(self, classification: Classification) -> Optional[type]:
|
||||
"""
|
||||
根据分类结果获取对应的槽位模型类
|
||||
|
||||
Args:
|
||||
classification: 意图分类结果
|
||||
|
||||
Returns:
|
||||
对应的槽位模型类
|
||||
"""
|
||||
# 软件问题
|
||||
if classification.vertical_classification == "软件问题":
|
||||
if classification.sub_classification == "软件功能":
|
||||
return SoftwareFunction
|
||||
elif classification.sub_classification == "故障排查":
|
||||
return TroubleShooting
|
||||
|
||||
# 业务问题
|
||||
elif classification.vertical_classification == "业务问题":
|
||||
if classification.sub_classification == "专业咨询":
|
||||
return ProfessionalConsulting
|
||||
elif classification.sub_classification == "数据问题":
|
||||
return DataProblem
|
||||
|
||||
# 安装下载注册
|
||||
elif classification.vertical_classification == "安装下载":
|
||||
if classification.sub_classification == "后缀名咨询":
|
||||
return FileExtensionConsulting
|
||||
elif classification.sub_classification == "软件锁类":
|
||||
return SoftwareLock
|
||||
elif classification.sub_classification == "安装下载类":
|
||||
return InstallationDownload
|
||||
elif classification.sub_classification == "问题排查类":
|
||||
return ProblemDiagnosis
|
||||
|
||||
return None
|
||||
|
||||
def _fill_slots_with_llm(self, query: str, classification: Classification, keywords: TermList, slot_model_class: type) -> Any:
|
||||
"""
|
||||
使用LLM进行槽位填充
|
||||
|
||||
Args:
|
||||
query: 用户原始问题
|
||||
classification: 意图分类结果
|
||||
keywords: 匹配的关键词列表
|
||||
slot_model_class: 槽位模型类
|
||||
|
||||
Returns:
|
||||
填充后的槽位数据模型实例
|
||||
"""
|
||||
# 准备提示词
|
||||
slot_parser = PydanticOutputParser(pydantic_object=slot_model_class)
|
||||
model_schema = json.dumps(slot_model_class.model_json_schema(), ensure_ascii=False)
|
||||
terms_dict = [term.model_dump() for term in keywords.terms]
|
||||
keywords_str = json.dumps(terms_dict, ensure_ascii=False)
|
||||
|
||||
formatted_prompt = slot_filling_prompt.format(
|
||||
query=query,
|
||||
vertical_classification=classification.vertical_classification,
|
||||
sub_classification=classification.sub_classification,
|
||||
keywords=keywords_str,
|
||||
model_schema=model_schema,
|
||||
output_format=slot_parser.get_format_instructions()
|
||||
)
|
||||
|
||||
# 调用LLM
|
||||
response = self.llm.invoke(formatted_prompt, False)
|
||||
|
||||
try:
|
||||
# 尝试解析LLM响应
|
||||
parsed_output = slot_parser.parse(response.content)
|
||||
return parsed_output
|
||||
except Exception as e:
|
||||
# 如果解析失败,创建一个空的模型实例
|
||||
empty_instance = slot_model_class()
|
||||
return empty_instance
|
||||
|
||||
def process_query_with_slots(self, query: str) -> Dict[str, Any]:
|
||||
"""
|
||||
处理用户问题的完整流程,包括槽位填充
|
||||
|
||||
Args:
|
||||
query: 用户原始问题
|
||||
|
||||
Returns:
|
||||
包含分类、关键词、改写和槽位填充结果的字典
|
||||
"""
|
||||
# 执行基本处理流程
|
||||
classification, keywords, rewrite, query_keys = self.process_query(query)
|
||||
|
||||
# 如果是有效分类,进行槽位填充
|
||||
slot_filling_result = {}
|
||||
if classification.vertical_classification not in ["其他", "闲聊"] and classification.sub_classification not in ["其他", "闲聊"]:
|
||||
slot_filling_result = self.fill_slots(rewrite.rewrite, classification, keywords)
|
||||
|
||||
return {
|
||||
"classification": classification.model_dump(),
|
||||
"keywords": keywords.model_dump(),
|
||||
"rewrite": rewrite.model_dump(),
|
||||
"query_keys": query_keys,
|
||||
"slot_filling": slot_filling_result
|
||||
}
|
||||
@@ -38,7 +38,7 @@ classification_info="""【垂直领域分类】:
|
||||
|
||||
【业务问题包括以下两类】:
|
||||
1. 专业咨询:涉及电力造价规范、工程计价规则问题、行业标准解读等
|
||||
2. 数据问题:涉及电力造价费用、造价指标等
|
||||
2. 数据问题:涉及电力造价费用、造价指标的计算或构成等
|
||||
|
||||
【安装下载注册包括以下三类】:
|
||||
1. 后缀名咨询:所有涉及文件扩展名的使用场景、软件关联等问题,包括但不限于:询问文件是否由特定软件打开、扩展名与软件的匹配关系、扩展名含义及关联等
|
||||
@@ -94,7 +94,7 @@ query_rewrite_prompt = """
|
||||
b. 执行结构优化:
|
||||
- 采用【术语标记】规范标注关键概念
|
||||
- 构建主谓宾明确的问题句式
|
||||
- 保持原问题时态与语态特征
|
||||
- 保持原问题时态与语态特征, 保留5W2H问题特征
|
||||
- 执行同义词替换:将synonymous中的同义词替换为对应name字段的标准术语
|
||||
|
||||
# 输出规范
|
||||
@@ -132,4 +132,43 @@ query_rewrite_prompt = """
|
||||
4. 异常处理机制
|
||||
- 当关键词与问题无明显关联时,触发直通输出规则
|
||||
- 出现术语冲突时优先保留原始表述
|
||||
"""
|
||||
"""
|
||||
|
||||
slot_filling_prompt = """
|
||||
你是一个专业的电力造价领域问题槽位填充助手。你需要从用户问题中提取关键信息,并填充到对应的数据结构中。
|
||||
|
||||
【用户问题】
|
||||
{query}
|
||||
|
||||
【问题分类】
|
||||
垂直领域分类: {vertical_classification}
|
||||
子分类: {sub_classification}
|
||||
|
||||
【已识别关键词】
|
||||
{keywords}
|
||||
|
||||
【目标数据结构】
|
||||
{model_schema}
|
||||
|
||||
【输出格式】
|
||||
{output_format}
|
||||
|
||||
【任务要求】
|
||||
1. 仔细分析用户问题,从中提取所有可能的槽位信息
|
||||
2. 对于必填槽位,必须尽力从问题中提取,如果确实无法提取则留空
|
||||
3. 对于选填槽位,如果能从问题中提取则填写,否则留空
|
||||
4. 只输出符合格式的JSON数据,不要有任何额外的解释
|
||||
|
||||
【示例】
|
||||
用户问题: "我的西藏Z1软件安装后闪退,提示缺少组件,怎么解决?"
|
||||
分类: 软件问题/故障排查
|
||||
输出:
|
||||
{{
|
||||
"software_name": "西藏Z1软件",
|
||||
"function_name": "软件安装",
|
||||
"error_message": "闪退,提示缺少组件",
|
||||
"software_version": null,
|
||||
"os_version": null,
|
||||
"reproduction_steps": "软件安装后"
|
||||
}}
|
||||
"""
|
||||
Reference in New Issue
Block a user