Files
2025-03-31 17:28:23 +08:00

198 lines
8.7 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
from datetime import datetime
from enum import Enum
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Any
@dataclass
class UserInfo:
"""用户基本信息"""
user_id: str # 用户账户ID:当前用户唯一标识符
attributes: Dict[str, Any] # 用户账户属性:个人相关信息等
# ==========================================================================================================
@dataclass
class SessionId:
id: str
items: List[str]
@dataclass
class DialogType(str, Enum):
IN_PROGRESS = "应答中"
OPENING = "开头"
CLOSING = "结尾"
COMPLETION = "补全"
FEEDBACK = "反馈"
SCRIPTING = "话术进行中"
@dataclass
class DialogInfo:
"""当前对话信息"""
dialog_id: str # 对话ID:当前对话唯一标识符(一问一答)
session_id: SessionId # 会话ID:咨询开始到咨询结束的一段完整交互的唯一标识符,包含多个对话ID
terminal_type: str # 终端类型:交互时用的客户端类型,如windows、Android等
bot_code: str # 机器人Code:当前对话机器人的唯一标识符
dialog_type: DialogType # 对话类型:应答中、开头、结尾、补全、反馈、话术进行中
timestamp: datetime # 时间戳:一问一答的时间
# ==========================================================================================================
@dataclass
class NLUResult:
"""基础识别结果单元"""
label: str # 标签名称
score: float # 概率分数
@dataclass
class FAQResult:
"""FAQ匹配结果"""
question: str # 匹配到的问题
similarity_score: float # 相似度得分
classification_score: float # 分类得分
class QuestionType(str, Enum):
WHEN = "when"
WHERE = "where"
WHO = "who"
WHAT = "what"
WHY = "why"
HOW = "how"
HOW_MUCH = "how much"
@dataclass
class Tokens:
"""分词结果单元"""
text: List[str] # 分词文本
pos: List[str] # 词性标注
entity: Optional[str] = None # 实体类型
class EmotionCategory(str, Enum):
POSITIVE = "正面"
NEUTRAL = "中性"
NEGATIVE = "负面"
class DetailedEmotion(str, Enum):
HAPPY = "开心"
EXCITED = "兴奋"
ANGRY = "愤怒"
SAD = "悲伤"
NEUTRAL = "客观描述"
@dataclass
class InheritedInfo:
"""继承信息容器"""
business_entity: Optional[str] = None # 业务实体词
intent_keyword: Optional[str] = None # 意图词
domain: Optional[str] = None # 领域
intent: Optional[str] = None # 意图
def get_priority_info(self):
"""按优先级获取继承信息"""
return next(filter(None, [self.business_entity, self.intent_keyword, self.domain, self.intent]), None)
@dataclass
class NLUInfo:
"""语义理解信息"""
vertical_category: str # 垂直/开放分类(一级分类):开头第一句要判断为是闲聊还是业务咨询的分类
intent_category: str # 意图分类(二级分类):在一级分类之下的动作分类
domain_category: str # 领域分类(三级分类):对识别到的意图(动作)明确施加于哪个领域
domain_results: List[NLUResult] # 领域识别结果列表:每个元素是预测到的领域概率分(通常通过多分类技术来实现)
intent_results: List[NLUResult] # 意图识别结果列表:每个元素是预测到的意图概率分(通常通过多分类技术来实现)
previous_domain: Optional[str] = None # 上文领域:距离当前“对话”最近的上一个“对话”的领域
previous_intent: Optional[str] = None # 上文意图:距离当前“对话”最近的上一个“对话”的意图
faq_results: Optional[FAQResult] = None # FAQ结果列表:每个元素是匹配到的最相似问题以及相似度得分和分类得分(当都高于某个阈值时,则可以采纳)
question_type: Optional[QuestionType] = None # 问句类型结果:5W2H中的一种(when、where、who、what、why、how、how much
tokens: Optional[Tokens] = None # 分词结果:对用户问句进行分词、并判定词性和实体识别
pinyin: List[str] = field(default_factory=list) # 拼音列表:逐字译为拼音,方便纠错
tones: List[str] = field(default_factory=list) # 音调列表:逐字译为音调,方便纠错
emotion: Optional[EmotionCategory] = None # 用户情绪:粗分类(正面、中性、负面)
detailed_emotion: Optional[DetailedEmotion] = None # 用户情绪:细分类(开心、兴奋、愤怒、悲伤、客观描述等)
inherited_info: InheritedInfo = field(default_factory=InheritedInfo) # 用户继承信息:一次对话只能从上下文继承一种信息(优先度:业务实体词>意图词>领域>意图)
# ==========================================================================================================
@dataclass
class SceneInfo:
"""场景信息"""
scene_id: str # 场景ID
scene_name: str # 场景名称
sub_intent: Optional[str] = None # 场景子意图
# 定义默认值,并确保它是一个新的 dict,防止多个实例共享同一个 dict。
slots: Dict[str, Any] = field(default_factory=dict) # 槽位信息
variables: Dict[str, Any] = field(default_factory=dict) # 场景变量列表
trigger_type: Optional[str] = None # 触发方式
process_status: Optional[str] = None # 场景流程状态
interaction_flow_id: Optional[str] = None # 交互流程ID
flow_instance_id: Optional[str] = None # 交互流程实例ID
# ==========================================================================================================
@dataclass
class QuestionInfo:
"""用户问句信息"""
raw_question: str # 原始问句:用户咨询问题源输入
composite_question: Dict[str, Any] = field(default_factory=dict) # 复合问句:用户多元咨询问题源输入(包含多媒体信息)
mediate_processing: Dict[str, Any] = field(default_factory=dict) # 中间临时处理:形如停用词处理、文本纠错等中间步骤暂存的临时存储信息
final_processed: Optional[str] = None # 最终处理:经过一系列处理后形成的最终问句的信息存储
# ==========================================================================================================
@dataclass
class AnswerUnit:
"""答案单元"""
answer_id: str # 答案ID
question: str # 用户源输入
content: str # 答案内容
score: Optional[float] # 得分
faq_flag: bool # FAQ标记
render_type: str # 答案渲染
model_source: str # 答案模型
class HumanType(str, Enum):
chat = "闲聊"
qa = "问答"
task = "任务"
recommend = "推荐"
@dataclass
class Artificial:
human_type: HumanType # 人工类型:问答、任务、推荐
transfer_text: Optional[str] = None # 转人工文本:用户咨询问题
transfer_reason: Optional[str] = None # 转人工理由:触发条件
class PrimaryType(str, Enum):
chat = "闲聊"
qa = "问答"
task = "任务"
recommend = "推荐"
@dataclass
class SecondaryType:
# 与具体业务相关
pass
@dataclass
class AnswerInfo:
"""系统应答信息"""
primary_type: PrimaryType # 一级答案类型
secondary_type: Optional[SecondaryType] = None # 二级答案类型
need_rating: bool = False # 邀评:邀请用户进行服务评价
transfer_human: bool = False # 转人工
artificial_info: Optional[Artificial] = None # 人工类型:闲聊、问答、任务、推荐;仅在 need_rating=True 时存在
answers: List[AnswerUnit] = AnswerUnit # 答案列表:每个元素是一个AnswerUnit,包含答案ID、咨询问句、答案内容、得分、FAQ标记、答案渲染方式、出答案模型等