更新词库,删除冗余同义词条目,优化意图识别逻辑,增强代码可读性和维护性,同时添加去重功能以处理同义词。更新相关文档以反映最新变化。
This commit is contained in:
+459
-829
File diff suppressed because it is too large
Load Diff
@@ -9815,7 +9815,6 @@
|
|||||||
{
|
{
|
||||||
"name": "措施项目一",
|
"name": "措施项目一",
|
||||||
"synonymous": [
|
"synonymous": [
|
||||||
"措施费"
|
|
||||||
],
|
],
|
||||||
"description": "费用项目分类,包含特殊地区施工增加费等临时性工程费用;在建安预算中手动添加的不按费率计取的措施项目费,用于计算一笔性费用。"
|
"description": "费用项目分类,包含特殊地区施工增加费等临时性工程费用;在建安预算中手动添加的不按费率计取的措施项目费,用于计算一笔性费用。"
|
||||||
},
|
},
|
||||||
|
|||||||
@@ -235,12 +235,12 @@
|
|||||||
"description": "查看定额升级的具体规则"
|
"description": "查看定额升级的具体规则"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "西藏电力工程造价2016 V2.7.1.0",
|
"name": "西藏电力工程造价2016",
|
||||||
"synonymous": [],
|
"synonymous": [],
|
||||||
"description": "西藏电力工程造价的旧版定额标准"
|
"description": "西藏电力工程造价的旧版定额标准"
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"name": "西藏配网造价2017 V2.12.3.0",
|
"name": "西藏配网造价2017",
|
||||||
"synonymous": [],
|
"synonymous": [],
|
||||||
"description": "西藏配网造价的旧版定额标准"
|
"description": "西藏配网造价的旧版定额标准"
|
||||||
},
|
},
|
||||||
@@ -4207,11 +4207,6 @@
|
|||||||
"synonymous": [],
|
"synonymous": [],
|
||||||
"description": "用于批量设置或修改多个工程的模板、参数和物料信息"
|
"description": "用于批量设置或修改多个工程的模板、参数和物料信息"
|
||||||
},
|
},
|
||||||
{
|
|
||||||
"name": "参数",
|
|
||||||
"synonymous": [],
|
|
||||||
"description": "工程设置中的各项参数"
|
|
||||||
},
|
|
||||||
{
|
{
|
||||||
"name": "地区范围",
|
"name": "地区范围",
|
||||||
"synonymous": [],
|
"synonymous": [],
|
||||||
|
|||||||
@@ -71,16 +71,16 @@ class TermMerger:
|
|||||||
logging.warning(f"读取{file}失败: {e}")
|
logging.warning(f"读取{file}失败: {e}")
|
||||||
|
|
||||||
# 加载suffix_keywords.json文件
|
# 加载suffix_keywords.json文件
|
||||||
suffix_keywords_path = os.path.join(os.path.dirname(os.path.dirname(self.EXTRACTED_NOUNS_DIR)), 'data', 'nouns', 'suffix_keywords.json')
|
# suffix_keywords_path = os.path.join(os.path.dirname(os.path.dirname(self.EXTRACTED_NOUNS_DIR)), 'data', 'nouns', 'suffix_keywords.json')
|
||||||
if os.path.exists(suffix_keywords_path):
|
# if os.path.exists(suffix_keywords_path):
|
||||||
try:
|
# try:
|
||||||
with open(suffix_keywords_path, 'r', encoding='utf-8') as f:
|
# with open(suffix_keywords_path, 'r', encoding='utf-8') as f:
|
||||||
suffix_terms = json.load(f)
|
# suffix_terms = json.load(f)
|
||||||
suffix_terms = [{"name": term["name"].upper(), "synonymous": "", "description": ""} for term in suffix_terms]
|
# suffix_terms = [{"name": term["name"].upper(), "synonymous": "", "description": ""} for term in suffix_terms]
|
||||||
all_terms.extend(suffix_terms)
|
# all_terms.extend(suffix_terms)
|
||||||
logging.info(f"加载{suffix_keywords_path},共{len(suffix_terms)}条")
|
# logging.info(f"加载{suffix_keywords_path},共{len(suffix_terms)}条")
|
||||||
except Exception as e:
|
# except Exception as e:
|
||||||
logging.warning(f"读取{suffix_keywords_path}失败: {e}")
|
# logging.warning(f"读取{suffix_keywords_path}失败: {e}")
|
||||||
|
|
||||||
return all_terms
|
return all_terms
|
||||||
|
|
||||||
@@ -154,6 +154,9 @@ class TermMerger:
|
|||||||
for result in tqdm(executor.map(self.process_term, items_to_process), total=len(items_to_process)):
|
for result in tqdm(executor.map(self.process_term, items_to_process), total=len(items_to_process)):
|
||||||
merged_terms.append(result)
|
merged_terms.append(result)
|
||||||
|
|
||||||
|
# 4. 去重
|
||||||
|
merged_terms = self.deduplicate_synonymous_name(merged_terms)
|
||||||
|
|
||||||
# 4. 保存合并结果
|
# 4. 保存合并结果
|
||||||
os.makedirs(os.path.dirname(self.OUTPUT_PATH), exist_ok=True)
|
os.makedirs(os.path.dirname(self.OUTPUT_PATH), exist_ok=True)
|
||||||
with open(self.OUTPUT_PATH, 'w', encoding='utf-8') as f:
|
with open(self.OUTPUT_PATH, 'w', encoding='utf-8') as f:
|
||||||
@@ -162,6 +165,26 @@ class TermMerger:
|
|||||||
|
|
||||||
return merged_terms
|
return merged_terms
|
||||||
|
|
||||||
|
def deduplicate_synonymous_name(self, terms):
|
||||||
|
# 1. 删除name字段重复的条目
|
||||||
|
unique_names = set()
|
||||||
|
unique_data = []
|
||||||
|
|
||||||
|
for item in terms:
|
||||||
|
if item["name"] not in unique_names:
|
||||||
|
unique_names.add(item["name"])
|
||||||
|
unique_data.append(item)
|
||||||
|
# 如果重复,则跳过该条目
|
||||||
|
|
||||||
|
# 2. 如果A条目的某一个synonymou字段是B条目的name,则删除A条目中的对应的synonymou
|
||||||
|
name_set = {item["name"] for item in unique_data}
|
||||||
|
|
||||||
|
for item in unique_data:
|
||||||
|
# 过滤掉synonymous中与其他条目name重复的部分
|
||||||
|
filtered_synonymous = [syn for syn in item["synonymous"] if syn not in name_set]
|
||||||
|
item["synonymous"] = filtered_synonymous
|
||||||
|
|
||||||
|
return unique_data
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""主函数,创建TermMerger实例并执行合并"""
|
"""主函数,创建TermMerger实例并执行合并"""
|
||||||
|
|||||||
@@ -139,7 +139,7 @@ class SlotBase(BaseModel):
|
|||||||
if v in SOFTWARE_NAME_ALIAS_MAP:
|
if v in SOFTWARE_NAME_ALIAS_MAP:
|
||||||
return SOFTWARE_NAME_ALIAS_MAP[v].value
|
return SOFTWARE_NAME_ALIAS_MAP[v].value
|
||||||
|
|
||||||
# 如果无法匹配,返回原值用于错误提示
|
# 如果无法匹配,返回原值
|
||||||
return v
|
return v
|
||||||
|
|
||||||
return v
|
return v
|
||||||
@@ -204,7 +204,7 @@ class ProfessionalConsultingSlots(SlotBase):
|
|||||||
|
|
||||||
# 2.2 数据问题
|
# 2.2 数据问题
|
||||||
class DataProblemSlots(SlotBase):
|
class DataProblemSlots(SlotBase):
|
||||||
expense_type: str = Field(default="", description="费用类型")
|
expense_type: str = Field(default="", description="费用(数据)项、费用类型")
|
||||||
operation_purpose: str = Field(default="", description="操作目的")
|
operation_purpose: str = Field(default="", description="操作目的")
|
||||||
software_name: Optional[str] = Field(default="", description="软件名称")
|
software_name: Optional[str] = Field(default="", description="软件名称")
|
||||||
project_type: Optional[str] = Field(default="", description="工程类型")
|
project_type: Optional[str] = Field(default="", description="工程类型")
|
||||||
|
|||||||
@@ -266,14 +266,15 @@ class IntentRecognizer:
|
|||||||
term_list = TermList(terms=list(matched_terms))
|
term_list = TermList(terms=list(matched_terms))
|
||||||
return term_list, query_keys
|
return term_list, query_keys
|
||||||
|
|
||||||
def _rewrite_query(self, query: str, keywords: TermList, chat_history: List[Dict[str, str]] = None, context: str = "") -> QueryRewrite:
|
def _rewrite_query(self, query: str, keywords: TermList, query_keys:List[str], chat_history: List[Dict[str, str]] = None, context: str = "") -> QueryRewrite:
|
||||||
"""
|
"""
|
||||||
对用户问题进行改写
|
对用户问题进行改写
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query: 用户原始问题
|
query: 用户原始问题
|
||||||
keywords: 匹配到的关键词列表
|
keywords: 匹配到的关键词列表
|
||||||
|
query_keys: 用户查询中提取的关键词列表
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
改写结果
|
改写结果
|
||||||
"""
|
"""
|
||||||
@@ -362,6 +363,7 @@ class IntentRecognizer:
|
|||||||
rewrite = self._rewrite_query(
|
rewrite = self._rewrite_query(
|
||||||
query=query,
|
query=query,
|
||||||
keywords=keywords_terms,
|
keywords=keywords_terms,
|
||||||
|
query_keys=query_keys,
|
||||||
chat_history=chat_history,
|
chat_history=chat_history,
|
||||||
context=conversation_context
|
context=conversation_context
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -34,7 +34,7 @@ classification_info="""【垂直领域分类】:
|
|||||||
4. 其他 -- 指与软件或电力造价专业无关的日常对话、问候、感慨、情绪表达等。
|
4. 其他 -- 指与软件或电力造价专业无关的日常对话、问候、感慨、情绪表达等。
|
||||||
|
|
||||||
【软件问题包括以下两类】:
|
【软件问题包括以下两类】:
|
||||||
1. 软件功能:询问软件功能的使用、操作、位置等
|
1. 软件功能:询问软件功能的使用、功能操作(调整)、功能位置、如何设置、如何转换等
|
||||||
2. 故障排查:软件运行异常、软件报错、软件显示错误等
|
2. 故障排查:软件运行异常、软件报错、软件显示错误等
|
||||||
|
|
||||||
【业务问题包括以下两类】:
|
【业务问题包括以下两类】:
|
||||||
|
|||||||
@@ -18,6 +18,11 @@ import requests
|
|||||||
# sk-dvbaktabkdwdpjgxyoozlwnejosjyhdgqwllfeborqahndxs
|
# sk-dvbaktabkdwdpjgxyoozlwnejosjyhdgqwllfeborqahndxs
|
||||||
|
|
||||||
API_KEY_LIST=[
|
API_KEY_LIST=[
|
||||||
|
"sk-kvgfuqeqvpmfsccykyoohheshclcrtvjlnewratvrjpkpbkc",
|
||||||
|
"sk-zhnbqnpuumuuvegnvbgoggxafpukbzchpgrugpkobiwkzsar",
|
||||||
|
"sk-kzhxlqvqcxlnbdgnpalqnzumkmspepkttkgbophnkqanainw",
|
||||||
|
"sk-bzttugqtlskrvguvhckwamdssvgmgnrqpsialpdbskfsyyak",
|
||||||
|
"sk-tovmogiablsoeabwgqyvevpcfichyjpuzqdymmvksspdrtqt",
|
||||||
"sk-wqdpapdkisovziexgcyxvumpwzbjnhqbxvcqcspzctjhyhjk",
|
"sk-wqdpapdkisovziexgcyxvumpwzbjnhqbxvcqcspzctjhyhjk",
|
||||||
"sk-bbntrnifrtdzhhgrtlrhvwbnaysuszviemshdakxonnnymnb",
|
"sk-bbntrnifrtdzhhgrtlrhvwbnaysuszviemshdakxonnnymnb",
|
||||||
"sk-vmpnwjxersrwybmfhfxgsvbmhsmpjldxseiyxovnysrlbuzi",
|
"sk-vmpnwjxersrwybmfhfxgsvbmhsmpjldxseiyxovnysrlbuzi",
|
||||||
@@ -91,12 +96,6 @@ API_KEY_LIST=[
|
|||||||
"sk-jrdzerhmvrtvzawkksowbgkggkubwfquplmrxbdhespqgtis",
|
"sk-jrdzerhmvrtvzawkksowbgkggkubwfquplmrxbdhespqgtis",
|
||||||
"sk-jjbpnkbeupsxyclcivbhizcfpfjrppddunbqynyjkqhtmpwu",
|
"sk-jjbpnkbeupsxyclcivbhizcfpfjrppddunbqynyjkqhtmpwu",
|
||||||
"sk-oqehupcveovkjqqtxypqyifidcdissuyehwrkdwgruoyjkpq",
|
"sk-oqehupcveovkjqqtxypqyifidcdissuyehwrkdwgruoyjkpq",
|
||||||
"sk-orhfntzrbpmpavybcjyylofxncdvufdmvlznofmhxmnjymjl",
|
|
||||||
"sk-kvgfuqeqvpmfsccykyoohheshclcrtvjlnewratvrjpkpbkc",
|
|
||||||
"sk-zhnbqnpuumuuvegnvbgoggxafpukbzchpgrugpkobiwkzsar",
|
|
||||||
"sk-kzhxlqvqcxlnbdgnpalqnzumkmspepkttkgbophnkqanainw",
|
|
||||||
"sk-bzttugqtlskrvguvhckwamdssvgmgnrqpsialpdbskfsyyak",
|
|
||||||
"sk-tovmogiablsoeabwgqyvevpcfichyjpuzqdymmvksspdrtqt",
|
|
||||||
]
|
]
|
||||||
|
|
||||||
class APIKeyManager:
|
class APIKeyManager:
|
||||||
|
|||||||
Reference in New Issue
Block a user