198 lines
8.7 KiB
Python
198 lines
8.7 KiB
Python
from datetime import datetime
|
||
from enum import Enum
|
||
from dataclasses import dataclass, field
|
||
from typing import List, Dict, Optional, Any
|
||
|
||
|
||
@dataclass
|
||
class UserInfo:
|
||
"""用户基本信息"""
|
||
user_id: str # 用户账户ID:当前用户唯一标识符
|
||
attributes: Dict[str, Any] # 用户账户属性:个人相关信息等
|
||
|
||
|
||
# ==========================================================================================================
|
||
@dataclass
|
||
class SessionId:
|
||
id: str
|
||
items: List[str]
|
||
|
||
@dataclass
|
||
class DialogType(str, Enum):
|
||
IN_PROGRESS = "应答中"
|
||
OPENING = "开头"
|
||
CLOSING = "结尾"
|
||
COMPLETION = "补全"
|
||
FEEDBACK = "反馈"
|
||
SCRIPTING = "话术进行中"
|
||
|
||
|
||
@dataclass
|
||
class DialogInfo:
|
||
"""当前对话信息"""
|
||
dialog_id: str # 对话ID:当前对话唯一标识符(一问一答)
|
||
session_id: SessionId # 会话ID:咨询开始到咨询结束的一段完整交互的唯一标识符,包含多个对话ID
|
||
terminal_type: str # 终端类型:交互时用的客户端类型,如windows、Android等
|
||
bot_code: str # 机器人Code:当前对话机器人的唯一标识符
|
||
dialog_type: DialogType # 对话类型:应答中、开头、结尾、补全、反馈、话术进行中
|
||
timestamp: datetime # 时间戳:一问一答的时间
|
||
|
||
|
||
# ==========================================================================================================
|
||
@dataclass
|
||
class NLUResult:
|
||
"""基础识别结果单元"""
|
||
label: str # 标签名称
|
||
score: float # 概率分数
|
||
|
||
|
||
@dataclass
|
||
class FAQResult:
|
||
"""FAQ匹配结果"""
|
||
question: str # 匹配到的问题
|
||
similarity_score: float # 相似度得分
|
||
classification_score: float # 分类得分
|
||
|
||
|
||
class QuestionType(str, Enum):
|
||
WHEN = "when"
|
||
WHERE = "where"
|
||
WHO = "who"
|
||
WHAT = "what"
|
||
WHY = "why"
|
||
HOW = "how"
|
||
HOW_MUCH = "how much"
|
||
|
||
|
||
@dataclass
|
||
class Tokens:
|
||
"""分词结果单元"""
|
||
text: List[str] # 分词文本
|
||
pos: List[str] # 词性标注
|
||
entity: Optional[str] = None # 实体类型
|
||
|
||
|
||
class EmotionCategory(str, Enum):
|
||
POSITIVE = "正面"
|
||
NEUTRAL = "中性"
|
||
NEGATIVE = "负面"
|
||
|
||
|
||
class DetailedEmotion(str, Enum):
|
||
HAPPY = "开心"
|
||
EXCITED = "兴奋"
|
||
ANGRY = "愤怒"
|
||
SAD = "悲伤"
|
||
NEUTRAL = "客观描述"
|
||
|
||
|
||
@dataclass
|
||
class InheritedInfo:
|
||
"""继承信息容器"""
|
||
business_entity: Optional[str] = None # 业务实体词
|
||
intent_keyword: Optional[str] = None # 意图词
|
||
domain: Optional[str] = None # 领域
|
||
intent: Optional[str] = None # 意图
|
||
|
||
def get_priority_info(self):
|
||
"""按优先级获取继承信息"""
|
||
return next(filter(None, [self.business_entity, self.intent_keyword, self.domain, self.intent]), None)
|
||
|
||
|
||
@dataclass
|
||
class NLUInfo:
|
||
"""语义理解信息"""
|
||
vertical_category: str # 垂直/开放分类(一级分类):开头第一句要判断为是闲聊还是业务咨询的分类
|
||
intent_category: str # 意图分类(二级分类):在一级分类之下的动作分类
|
||
domain_category: str # 领域分类(三级分类):对识别到的意图(动作)明确施加于哪个领域
|
||
domain_results: List[NLUResult] # 领域识别结果列表:每个元素是预测到的领域概率分(通常通过多分类技术来实现)
|
||
intent_results: List[NLUResult] # 意图识别结果列表:每个元素是预测到的意图概率分(通常通过多分类技术来实现)
|
||
previous_domain: Optional[str] = None # 上文领域:距离当前“对话”最近的上一个“对话”的领域
|
||
previous_intent: Optional[str] = None # 上文意图:距离当前“对话”最近的上一个“对话”的意图
|
||
faq_results: Optional[FAQResult] = None # FAQ结果列表:每个元素是匹配到的最相似问题以及相似度得分和分类得分(当都高于某个阈值时,则可以采纳)
|
||
question_type: Optional[QuestionType] = None # 问句类型结果:5W2H中的一种(when、where、who、what、why、how、how much)
|
||
tokens: Optional[Tokens] = None # 分词结果:对用户问句进行分词、并判定词性和实体识别
|
||
pinyin: List[str] = field(default_factory=list) # 拼音列表:逐字译为拼音,方便纠错
|
||
tones: List[str] = field(default_factory=list) # 音调列表:逐字译为音调,方便纠错
|
||
emotion: Optional[EmotionCategory] = None # 用户情绪:粗分类(正面、中性、负面)
|
||
detailed_emotion: Optional[DetailedEmotion] = None # 用户情绪:细分类(开心、兴奋、愤怒、悲伤、客观描述等)
|
||
inherited_info: InheritedInfo = field(default_factory=InheritedInfo) # 用户继承信息:一次对话只能从上下文继承一种信息(优先度:业务实体词>意图词>领域>意图)
|
||
|
||
|
||
# ==========================================================================================================
|
||
|
||
@dataclass
|
||
class SceneInfo:
|
||
"""场景信息"""
|
||
scene_id: str # 场景ID
|
||
scene_name: str # 场景名称
|
||
sub_intent: Optional[str] = None # 场景子意图
|
||
# 定义默认值,并确保它是一个新的 dict,防止多个实例共享同一个 dict。
|
||
slots: Dict[str, Any] = field(default_factory=dict) # 槽位信息
|
||
variables: Dict[str, Any] = field(default_factory=dict) # 场景变量列表
|
||
trigger_type: Optional[str] = None # 触发方式
|
||
process_status: Optional[str] = None # 场景流程状态
|
||
interaction_flow_id: Optional[str] = None # 交互流程ID
|
||
flow_instance_id: Optional[str] = None # 交互流程实例ID
|
||
|
||
|
||
# ==========================================================================================================
|
||
|
||
@dataclass
|
||
class QuestionInfo:
|
||
"""用户问句信息"""
|
||
raw_question: str # 原始问句:用户咨询问题源输入
|
||
composite_question: Dict[str, Any] = field(default_factory=dict) # 复合问句:用户多元咨询问题源输入(包含多媒体信息)
|
||
mediate_processing: Dict[str, Any] = field(default_factory=dict) # 中间临时处理:形如停用词处理、文本纠错等中间步骤暂存的临时存储信息
|
||
final_processed: Optional[str] = None # 最终处理:经过一系列处理后形成的最终问句的信息存储
|
||
|
||
|
||
# ==========================================================================================================
|
||
@dataclass
|
||
class AnswerUnit:
|
||
"""答案单元"""
|
||
answer_id: str # 答案ID
|
||
question: str # 用户源输入
|
||
content: str # 答案内容
|
||
score: Optional[float] # 得分
|
||
faq_flag: bool # FAQ标记
|
||
render_type: str # 答案渲染
|
||
model_source: str # 答案模型
|
||
|
||
|
||
class HumanType(str, Enum):
|
||
chat = "闲聊"
|
||
qa = "问答"
|
||
task = "任务"
|
||
recommend = "推荐"
|
||
|
||
|
||
@dataclass
|
||
class Artificial:
|
||
human_type: HumanType # 人工类型:问答、任务、推荐
|
||
transfer_text: Optional[str] = None # 转人工文本:用户咨询问题
|
||
transfer_reason: Optional[str] = None # 转人工理由:触发条件
|
||
|
||
|
||
class PrimaryType(str, Enum):
|
||
chat = "闲聊"
|
||
qa = "问答"
|
||
task = "任务"
|
||
recommend = "推荐"
|
||
|
||
|
||
@dataclass
|
||
class SecondaryType:
|
||
# 与具体业务相关
|
||
pass
|
||
|
||
|
||
@dataclass
|
||
class AnswerInfo:
|
||
"""系统应答信息"""
|
||
primary_type: PrimaryType # 一级答案类型
|
||
secondary_type: Optional[SecondaryType] = None # 二级答案类型
|
||
need_rating: bool = False # 邀评:邀请用户进行服务评价
|
||
transfer_human: bool = False # 转人工
|
||
artificial_info: Optional[Artificial] = None # 人工类型:闲聊、问答、任务、推荐;仅在 need_rating=True 时存在
|
||
answers: List[AnswerUnit] = AnswerUnit # 答案列表:每个元素是一个AnswerUnit,包含答案ID、咨询问句、答案内容、得分、FAQ标记、答案渲染方式、出答案模型等 |