首次提交:上传本地文件夹

This commit is contained in:
ruxia
2025-03-31 17:28:23 +08:00
commit 0de349447c
439 changed files with 36643 additions and 0 deletions
+198
View File
@@ -0,0 +1,198 @@
from datetime import datetime
from enum import Enum
from dataclasses import dataclass, field
from typing import List, Dict, Optional, Any
@dataclass
class UserInfo:
"""用户基本信息"""
user_id: str # 用户账户ID:当前用户唯一标识符
attributes: Dict[str, Any] # 用户账户属性:个人相关信息等
# ==========================================================================================================
@dataclass
class SessionId:
id: str
items: List[str]
@dataclass
class DialogType(str, Enum):
IN_PROGRESS = "应答中"
OPENING = "开头"
CLOSING = "结尾"
COMPLETION = "补全"
FEEDBACK = "反馈"
SCRIPTING = "话术进行中"
@dataclass
class DialogInfo:
"""当前对话信息"""
dialog_id: str # 对话ID:当前对话唯一标识符(一问一答)
session_id: SessionId # 会话ID:咨询开始到咨询结束的一段完整交互的唯一标识符,包含多个对话ID
terminal_type: str # 终端类型:交互时用的客户端类型,如windows、Android等
bot_code: str # 机器人Code:当前对话机器人的唯一标识符
dialog_type: DialogType # 对话类型:应答中、开头、结尾、补全、反馈、话术进行中
timestamp: datetime # 时间戳:一问一答的时间
# ==========================================================================================================
@dataclass
class NLUResult:
"""基础识别结果单元"""
label: str # 标签名称
score: float # 概率分数
@dataclass
class FAQResult:
"""FAQ匹配结果"""
question: str # 匹配到的问题
similarity_score: float # 相似度得分
classification_score: float # 分类得分
class QuestionType(str, Enum):
WHEN = "when"
WHERE = "where"
WHO = "who"
WHAT = "what"
WHY = "why"
HOW = "how"
HOW_MUCH = "how much"
@dataclass
class Tokens:
"""分词结果单元"""
text: List[str] # 分词文本
pos: List[str] # 词性标注
entity: Optional[str] = None # 实体类型
class EmotionCategory(str, Enum):
POSITIVE = "正面"
NEUTRAL = "中性"
NEGATIVE = "负面"
class DetailedEmotion(str, Enum):
HAPPY = "开心"
EXCITED = "兴奋"
ANGRY = "愤怒"
SAD = "悲伤"
NEUTRAL = "客观描述"
@dataclass
class InheritedInfo:
"""继承信息容器"""
business_entity: Optional[str] = None # 业务实体词
intent_keyword: Optional[str] = None # 意图词
domain: Optional[str] = None # 领域
intent: Optional[str] = None # 意图
def get_priority_info(self):
"""按优先级获取继承信息"""
return next(filter(None, [self.business_entity, self.intent_keyword, self.domain, self.intent]), None)
@dataclass
class NLUInfo:
"""语义理解信息"""
vertical_category: str # 垂直/开放分类(一级分类):开头第一句要判断为是闲聊还是业务咨询的分类
intent_category: str # 意图分类(二级分类):在一级分类之下的动作分类
domain_category: str # 领域分类(三级分类):对识别到的意图(动作)明确施加于哪个领域
domain_results: List[NLUResult] # 领域识别结果列表:每个元素是预测到的领域概率分(通常通过多分类技术来实现)
intent_results: List[NLUResult] # 意图识别结果列表:每个元素是预测到的意图概率分(通常通过多分类技术来实现)
previous_domain: Optional[str] = None # 上文领域:距离当前“对话”最近的上一个“对话”的领域
previous_intent: Optional[str] = None # 上文意图:距离当前“对话”最近的上一个“对话”的意图
faq_results: Optional[FAQResult] = None # FAQ结果列表:每个元素是匹配到的最相似问题以及相似度得分和分类得分(当都高于某个阈值时,则可以采纳)
question_type: Optional[QuestionType] = None # 问句类型结果:5W2H中的一种(when、where、who、what、why、how、how much
tokens: Optional[Tokens] = None # 分词结果:对用户问句进行分词、并判定词性和实体识别
pinyin: List[str] = field(default_factory=list) # 拼音列表:逐字译为拼音,方便纠错
tones: List[str] = field(default_factory=list) # 音调列表:逐字译为音调,方便纠错
emotion: Optional[EmotionCategory] = None # 用户情绪:粗分类(正面、中性、负面)
detailed_emotion: Optional[DetailedEmotion] = None # 用户情绪:细分类(开心、兴奋、愤怒、悲伤、客观描述等)
inherited_info: InheritedInfo = field(default_factory=InheritedInfo) # 用户继承信息:一次对话只能从上下文继承一种信息(优先度:业务实体词>意图词>领域>意图)
# ==========================================================================================================
@dataclass
class SceneInfo:
"""场景信息"""
scene_id: str # 场景ID
scene_name: str # 场景名称
sub_intent: Optional[str] = None # 场景子意图
# 定义默认值,并确保它是一个新的 dict,防止多个实例共享同一个 dict。
slots: Dict[str, Any] = field(default_factory=dict) # 槽位信息
variables: Dict[str, Any] = field(default_factory=dict) # 场景变量列表
trigger_type: Optional[str] = None # 触发方式
process_status: Optional[str] = None # 场景流程状态
interaction_flow_id: Optional[str] = None # 交互流程ID
flow_instance_id: Optional[str] = None # 交互流程实例ID
# ==========================================================================================================
@dataclass
class QuestionInfo:
"""用户问句信息"""
raw_question: str # 原始问句:用户咨询问题源输入
composite_question: Dict[str, Any] = field(default_factory=dict) # 复合问句:用户多元咨询问题源输入(包含多媒体信息)
mediate_processing: Dict[str, Any] = field(default_factory=dict) # 中间临时处理:形如停用词处理、文本纠错等中间步骤暂存的临时存储信息
final_processed: Optional[str] = None # 最终处理:经过一系列处理后形成的最终问句的信息存储
# ==========================================================================================================
@dataclass
class AnswerUnit:
"""答案单元"""
answer_id: str # 答案ID
question: str # 用户源输入
content: str # 答案内容
score: Optional[float] # 得分
faq_flag: bool # FAQ标记
render_type: str # 答案渲染
model_source: str # 答案模型
class HumanType(str, Enum):
chat = "闲聊"
qa = "问答"
task = "任务"
recommend = "推荐"
@dataclass
class Artificial:
human_type: HumanType # 人工类型:问答、任务、推荐
transfer_text: Optional[str] = None # 转人工文本:用户咨询问题
transfer_reason: Optional[str] = None # 转人工理由:触发条件
class PrimaryType(str, Enum):
chat = "闲聊"
qa = "问答"
task = "任务"
recommend = "推荐"
@dataclass
class SecondaryType:
# 与具体业务相关
pass
@dataclass
class AnswerInfo:
"""系统应答信息"""
primary_type: PrimaryType # 一级答案类型
secondary_type: Optional[SecondaryType] = None # 二级答案类型
need_rating: bool = False # 邀评:邀请用户进行服务评价
transfer_human: bool = False # 转人工
artificial_info: Optional[Artificial] = None # 人工类型:闲聊、问答、任务、推荐;仅在 need_rating=True 时存在
answers: List[AnswerUnit] = AnswerUnit # 答案列表:每个元素是一个AnswerUnit,包含答案ID、咨询问句、答案内容、得分、FAQ标记、答案渲染方式、出答案模型等