#!/usr/bin/env python # -*- coding: utf-8 -*- """ File: DataModels.py Author: oyyz Date: 2025-05-13 Description: 提取和分类的数据模型 """ from pydantic import BaseModel, Field, field_validator from typing import List, Optional, Dict, Tuple, Union, Any from enum import Enum class SoftwareName(str, Enum): """软件名称枚举类""" D3 = "配网工程计价通D3软件" C1 = "新型储能电站建设计价通C1软件" Z1 = "西藏电力工程计价通Z1软件" T1 = "技改检修工程计价通T1软件" T1_LIST = "技改检修清单计价通T1软件" MAIN = "主网电力建设计价通软件" UNKNOWN = "" # 未知 # 软件别名映射 ALIASES = { D3: "别名包括:配网D3、D3软件、配网工程软件等其他类似称呼", C1: "别名包括:储能C1、C1软件、储能电站软件、储能软件等其他类似称呼", Z1: "别名包括:西藏Z1、Z1软件、西藏电力软件等其他类似称呼", T1: "别名包括:技改T1、T1软件、技改检修软件等其他类似称呼", T1_LIST: "别名包括:技改清单T1、T1清单软件、技改检修清单软件等其他类似称呼", MAIN: "别名包括:主网软件、电力建设软件、主网建设软件、博微电力建设计价通等其他类似称呼" } # 构建别名到标准名称的映射 def build_alias_mapping() -> Dict[str, SoftwareName]: """构建从别名到标准软件名称的映射字典""" alias_map = {} # 配网工程计价通D3软件的别名映射 alias_map["配网D3"] = SoftwareName.D3 alias_map["D3软件"] = SoftwareName.D3 alias_map["配网工程软件"] = SoftwareName.D3 alias_map["配网软件"] = SoftwareName.D3 # 新型储能电站建设计价通C1软件的别名映射 alias_map["储能C1"] = SoftwareName.C1 alias_map["C1软件"] = SoftwareName.C1 alias_map["储能电站软件"] = SoftwareName.C1 alias_map["储能软件"] = SoftwareName.C1 # 西藏电力工程计价通Z1软件的别名映射 alias_map["西藏Z1"] = SoftwareName.Z1 alias_map["Z1软件"] = SoftwareName.Z1 alias_map["西藏电力软件"] = SoftwareName.Z1 # 技改检修工程计价通T1软件的别名映射 alias_map["技改T1"] = SoftwareName.T1 alias_map["T1软件"] = SoftwareName.T1 alias_map["技改检修软件"] = SoftwareName.T1 # 技改检修清单计价通T1软件的别名映射 alias_map["技改清单T1"] = SoftwareName.T1_LIST alias_map["T1清单软件"] = SoftwareName.T1_LIST alias_map["技改检修清单软件"] = SoftwareName.T1_LIST # 主网电力建设计价通软件的别名映射 alias_map["主网软件"] = SoftwareName.MAIN alias_map["电力建设软件"] = SoftwareName.MAIN alias_map["主网建设软件"] = SoftwareName.MAIN alias_map["博微电力建设计价通"] = SoftwareName.MAIN alias_map["主网计价通"] = SoftwareName.MAIN alias_map["主网计价通软件"] = SoftwareName.MAIN alias_map["计价通软件"] = SoftwareName.MAIN alias_map["电力计价通软件"] = SoftwareName.MAIN alias_map["计价通"] = SoftwareName.MAIN # 添加标准名称映射 alias_map[SoftwareName.D3.value] = SoftwareName.D3 alias_map[SoftwareName.C1.value] = SoftwareName.C1 alias_map[SoftwareName.Z1.value] = SoftwareName.Z1 alias_map[SoftwareName.T1.value] = SoftwareName.T1 alias_map[SoftwareName.T1_LIST.value] = SoftwareName.T1_LIST alias_map[SoftwareName.MAIN.value] = SoftwareName.MAIN return alias_map # 全局别名映射字典 SOFTWARE_NAME_ALIAS_MAP = build_alias_mapping() # 定义输出模型 class Term(BaseModel): name: str = Field(description="专业名词") synonymous: List[str] = Field(description="同义词列表") description: str = Field(description="描述信息", default="") def __hash__(self): return hash(self.name) def __eq__(self, other): if isinstance(other, Term): return self.name == other.name return False class TermList(BaseModel): terms: List[Term] = Field(description="专业名词列表") class Classification(BaseModel): vertical_classification:str = Field(description="垂直领域一级分类") sub_classification:str = Field(description="一级分类下的二级分类") class QueryRewrite(BaseModel): rewrite:str = Field(description="问题改写") ##########################槽位模型########################### class SlotBase(BaseModel): """槽位基础模型""" def check_required_slots(self) -> Tuple[bool, Dict[str, str]]: """检查必填槽位是否都存在""" raise NotImplementedError("子类必须实现check_required_slots方法") @field_validator('software_name', mode='before', check_fields=False) @classmethod def validate_software_name(cls, v): """验证并转换软件名称,支持别名""" if v is None or v == "": return "" # 如果已经是枚举类型,直接返回其值 if isinstance(v, SoftwareName): return v.value # 如果是字符串,尝试转换 if isinstance(v, str): # 直接匹配枚举值 for software in SoftwareName: if v == software.value: return software.value # 尝试通过别名匹配 if v in SOFTWARE_NAME_ALIAS_MAP: return SOFTWARE_NAME_ALIAS_MAP[v].value # 如果无法匹配,返回原值用于错误提示 return v return v # 1. 软件问题 # 1.1 软件功能 class SoftwareFunctionSlots(SlotBase): software_name: str = Field(default="", description="软件名称") function_name: str = Field(default="", description="具体功能名称") operation: str = Field(default="", description="用户操作意图(如何使用功能、功能入口、功能使用场景)") project_type: Optional[str] = Field(default="单工程", description="工程类型(单工程、多工程、批次工程)") software_version: Optional[str] = Field(default="", description="软件版本") operation_steps: Optional[str] = Field(default="", description="操作步骤描述") def check_required_slots(self) -> Tuple[bool, Dict[str, str]]: """检查必填槽位是否都存在""" missing_slots = {} if not self.software_name: missing_slots["software_name"] = f"{SoftwareFunctionSlots.model_fields['software_name'].description},可选值:{', '.join([name.value for name in SoftwareName if name not in [SoftwareName.UNKNOWN, SoftwareName.ALIASES]])}" if not self.function_name: missing_slots["function_name"] = SoftwareFunctionSlots.model_fields["function_name"].description if not self.operation: missing_slots["operation"] = SoftwareFunctionSlots.model_fields["operation"].description return len(missing_slots) == 0, missing_slots # 1.2 故障排查 class SoftwareTroubleShootingSlots(SlotBase): software_name: str = Field(default="", description="软件名称") function_name: str = Field(default="", description="具体功能名称/操作描述") error_message: str = Field(default="", description="报错信息/异常现象") software_version: Optional[str] = Field(default="", description="软件版本") os_version: Optional[str] = Field(default="", description="操作系统及版本") reproduction_steps: Optional[str] = Field(default="", description="故障重现步骤") project_type: Optional[str] = Field(default="单工程", description="工程类型(单工程、多工程、批次工程)") def check_required_slots(self) -> Tuple[bool, Dict[str, str]]: """检查必填槽位是否都存在""" missing_slots = {} if not self.software_name: missing_slots["software_name"] = f"{SoftwareTroubleShootingSlots.model_fields['software_name'].description},可选值:{', '.join([name.value for name in SoftwareName if name not in [SoftwareName.UNKNOWN, SoftwareName.ALIASES]])}" if not self.function_name: missing_slots["function_name"] = SoftwareTroubleShootingSlots.model_fields["function_name"].description if not self.error_message: missing_slots["error_message"] = SoftwareTroubleShootingSlots.model_fields["error_message"].description return len(missing_slots) == 0, missing_slots # 2. 业务问题 # 2.1 专业咨询 class ProfessionalConsultingSlots(SlotBase): scene_subject: str = Field(default="", description="场景主体") business_scene: str = Field(default="", description="业务场景描述") software_name: Optional[str] = Field(default="", description="软件名称") def check_required_slots(self) -> Tuple[bool, Dict[str, str]]: """检查必填槽位是否都存在""" missing_slots = {} if not self.scene_subject: missing_slots["scene_subject"] = ProfessionalConsultingSlots.model_fields["scene_subject"].description if not self.business_scene: missing_slots["business_scene"] = ProfessionalConsultingSlots.model_fields["business_scene"].description return len(missing_slots) == 0, missing_slots # 2.2 数据问题 class DataProblemSlots(SlotBase): expense_type: str = Field(default="", description="费用类型") operation_purpose: str = Field(default="", description="操作目的") software_name: Optional[str] = Field(default="", description="软件名称") project_type: Optional[str] = Field(default="", description="工程类型") def check_required_slots(self) -> Tuple[bool, Dict[str, str]]: """检查必填槽位是否都存在""" missing_slots = {} if not self.expense_type: missing_slots["expense_type"] = DataProblemSlots.model_fields["expense_type"].description if not self.operation_purpose: missing_slots["operation_purpose"] = DataProblemSlots.model_fields["operation_purpose"].description return len(missing_slots) == 0, missing_slots # 3. 安装下载注册 # 3.1 后缀名咨询 class FileExtensionConsultingSlots(SlotBase): file_extension: str = Field(default="", description="文件后缀名") operation_purpose: str = Field(default="", description="操作目的(了解对应软件,对应工程)") file_source: Optional[str] = Field(default="", description="文件来源场景") related_software: Optional[str] = Field(default="", description="相关软件名称") def check_required_slots(self) -> Tuple[bool, Dict[str, str]]: """检查必填槽位是否都存在""" missing_slots = {} if not self.file_extension: missing_slots["file_extension"] = FileExtensionConsultingSlots.model_fields["file_extension"].description if not self.operation_purpose: missing_slots["operation_purpose"] = FileExtensionConsultingSlots.model_fields["operation_purpose"].description return len(missing_slots) == 0, missing_slots # 3.2 软件锁类 class SoftwareLockSlots(SlotBase): lock_type: str = Field(default="", description="锁类型") operation_purpose: str = Field(default="", description="操作目的") lock_number: Optional[str] = Field(default="", description="软件锁编号/注册号") def check_required_slots(self) -> Tuple[bool, Dict[str, str]]: """检查必填槽位是否都存在""" missing_slots = {} if not self.lock_type: missing_slots["lock_type"] = SoftwareLockSlots.model_fields["lock_type"].description if not self.operation_purpose: missing_slots["operation_purpose"] = SoftwareLockSlots.model_fields["operation_purpose"].description return len(missing_slots) == 0, missing_slots # 3.3 安装下载类 class InstallationDownloadSlots(SlotBase): software_name: str = Field(default="", description="软件/插件名称,与file_name二选一") file_name: str = Field(default="", description="文件名,与software_name二选一") operation_stage: str = Field(default="", description="操作阶段(下载、安装等)") os_version: Optional[str] = Field(default="", description="操作系统版本") package_source: Optional[str] = Field(default="", description="安装包来源/版本号") def check_required_slots(self) -> Tuple[bool, Dict[str, str]]: """检查必填槽位是否都存在""" missing_slots = {} if not self.software_name and not self.file_name: missing_slots["software_name"] = f"{InstallationDownloadSlots.model_fields['software_name'].description}," f"可选值:{', '.join([name.value for name in SoftwareName if name not in [SoftwareName.UNKNOWN, SoftwareName.ALIASES]])}" missing_slots["file_name"] = InstallationDownloadSlots.model_fields["file_name"].description if not self.operation_stage: missing_slots["operation_stage"] = InstallationDownloadSlots.model_fields["operation_stage"].description return len(missing_slots) == 0, missing_slots # 3.4 问题排查类 class ProblemDiagnosisSlots(SlotBase): error_message: str = Field(default="", description="报错信息/异常现象") software_name: Optional[str] = Field(default="", description="软件名称") os_version: Optional[str] = Field(default="", description="操作系统版本") def check_required_slots(self) -> Tuple[bool, Dict[str, str]]: """检查必填槽位是否都存在""" missing_slots = {} if not self.error_message: missing_slots["error_message"] = ProblemDiagnosisSlots.model_fields["error_message"].description return len(missing_slots) == 0, missing_slots class OtherSlots(SlotBase): """其他类型槽位""" content_type: str = Field(default="", description="内容类型(必填)") intent: Optional[str] = Field(default="", description="用户意图(选填)") def check_required_slots(self) -> Tuple[bool, Dict[str, str]]: missing_slots = {} if not self.content_type: missing_slots["content_type"] = OtherSlots.model_fields["content_type"].description return len(missing_slots) == 0, missing_slots class IntentAndSlotResult(BaseModel): """意图槽位填充结果""" classification: Classification slots: Union[ SoftwareFunctionSlots, SoftwareTroubleShootingSlots, ProfessionalConsultingSlots, DataProblemSlots, FileExtensionConsultingSlots, SoftwareLockSlots, InstallationDownloadSlots, ProblemDiagnosisSlots, OtherSlots ]