更新专业术语索引文件,优化意图识别逻辑,添加后缀项更新功能,调整重排序参数以提高相关性,同时修正文档中的描述信息。

This commit is contained in:
2025-06-16 15:18:04 +08:00
parent f1b3f7e158
commit 503c7ff0bc
6 changed files with 57 additions and 25 deletions
@@ -127,7 +127,9 @@ class ProfessionalNounVectorizer:
# 准备数据
texts, metadatas = self._prepare_terms_for_faiss(deduplicated_terms)
suffix_text,suffix_metadatas = self._updata_suffix_item()
texts.extend(suffix_text)
metadatas.extend(suffix_metadatas)
# 创建索引
faiss_index = self._create_index(texts, metadatas)
@@ -140,6 +142,30 @@ class ProfessionalNounVectorizer:
logging.error(f"多文件向量化处理失败: {e}")
return False
def _updata_suffix_item(self)->Tuple[List[str], List[Dict]] :
"""
更新suffix_keywords.json文件
Returns:
更新后的术语列表
"""
# 加载suffix_keywords.json文件
text=[]
meta_info=[]
suffix_keywords_path = os.path.join(".", 'data', 'nouns', 'suffix_keywords.json')
if os.path.exists(suffix_keywords_path):
try:
with open(suffix_keywords_path, 'r', encoding='utf-8') as f:
suffix_terms = json.load(f)
suffix_terms = [{"name": term["name"].upper(), "synonymous": [], "description": ""} for term in suffix_terms]
for cur_suffix in suffix_terms:
text.append(cur_suffix["name"].upper())
meta_info.append(cur_suffix)
logging.info(f"加载{suffix_keywords_path},共{len(suffix_terms)}")
except Exception as e:
logging.warning(f"读取{suffix_keywords_path}失败: {e}")
return text,meta_info
def _prepare_terms_for_faiss(self, terms: List[Dict[str, Any]]) -> Tuple[List[str], List[Dict]]:
"""
@@ -156,15 +182,9 @@ class ProfessionalNounVectorizer:
for term in terms:
name = term["name"]
texts.append(name.strip())
synonymous = term.get("synonymous", [])
description = term.get("description", "")
# 记录元数据
metadatas.append({
"name": name,
"synonymous": synonymous,
"description": description
})
if len(synonymous) > 0:
for synonyms_str in synonymous:
@@ -175,13 +195,21 @@ class ProfessionalNounVectorizer:
"description": description
})
if len(description) > 0:
texts.append(description.strip())
metadatas.append({
"name": name,
"synonymous": synonymous,
"description": description
})
# texts.append(name.strip())
# metadatas.append({
# "name": name,
# "synonymous": synonymous,
# "description": description
# })
# 不检索描述字段
# if len(description) > 0:
# texts.append(description.strip())
# metadatas.append({
# "name": name,
# "synonymous": synonymous,
# "description": description
# })
return texts, metadatas