更新专业术语索引文件,优化意图识别逻辑,添加后缀项更新功能,调整重排序参数以提高相关性,同时修正文档中的描述信息。

This commit is contained in:
2025-06-16 15:18:04 +08:00
parent f1b3f7e158
commit 503c7ff0bc
6 changed files with 57 additions and 25 deletions
+11 -6
View File
@@ -184,7 +184,7 @@ class IntentRecognizer:
except Exception as e:
raise RuntimeError(f"无法解析LLM关键词提取响应: {e}") from e
def _rerank_matched_terms(self, query_key: str, matched_terms: set, top_k: int = 2) -> List[Term]:
def _rerank_matched_terms(self, query_key: str, matched_terms: set, top_k: int = 2, rerank_score:float = 0.6) -> List[Term]:
"""
对召回的专业术语进行重排序,按与用户查询的相关性排序
@@ -198,10 +198,14 @@ class IntentRecognizer:
"""
if not matched_terms:
return []
if len(matched_terms) <= top_k:
return list(matched_terms)
try:
# 将每个术语转换为可用于重排序的文本表示
term_texts = ["名称:" + term.name + "|" + "同义词:" + ";".join(term.synonymous) + "|" + "描述:" + term.description for term in matched_terms]
# term_texts = ["名称:" + term.name + "|" + "同义词:" + ";".join(term.synonymous) + "|" + "描述:" + term.description for term in matched_terms]
term_texts = ["名称:" + term.name + "|" + "同义词:" + ";".join(term.synonymous) for term in matched_terms]
# 使用重排序模型
xinference_reranker = SiliconFlowReRankerModel()
@@ -211,7 +215,7 @@ class IntentRecognizer:
matched_terms_list = list(matched_terms)
# 根据重排序结果获取排序后的术语列表
reranked_terms = [matched_terms_list[result["index"]] for result in rerank_results if result["score"] >= 0.6]
reranked_terms = [matched_terms_list[result["index"]] for result in rerank_results if result["score"] >= rerank_score]
return reranked_terms
@@ -279,7 +283,8 @@ class IntentRecognizer:
改写结果
"""
# 准备问题改写提示
terms_dict = [term.model_dump(exclude={"description"}) for term in keywords.terms]
# terms_dict = [term.model_dump(exclude={"description"}) for term in keywords.terms]
terms_dict = [term.model_dump() for term in keywords.terms]
keywords_str = json.dumps(terms_dict, ensure_ascii=False)
query_rewrite_parser = PydanticOutputParser(pydantic_object=QueryRewrite)
# formatted_prompt = query_rewrite_prompt.format(query=query,
@@ -369,7 +374,7 @@ class IntentRecognizer:
)
# 步骤3: 进行意图识别和槽位填充
result = self._process_intent_and_slot(query, conversation_context, chat_history, previous_slots)
result = self._process_intent_and_slot(rewrite.rewrite, conversation_context, chat_history, previous_slots)
result.update({"keywords": keywords_terms.model_dump(),
"rewrite": rewrite.model_dump(),
"query_keys": query_keys})