关键词检索增加容错

This commit is contained in:
wanyaokun
2024-09-05 18:16:54 +08:00
parent f171282a0c
commit 9ac53011e0
22 changed files with 6 additions and 23 deletions
+3 -2
View File
@@ -63,8 +63,9 @@ def persist_storage(docstore, vector_store):
def persist_BMRetriever(vector_store):
STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
top_k = int(os.getenv("TOP_K", "3"))
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes=vector_store.get_nodes([]))
nodes = vector_store.get_nodes([])
top_k = min(int(os.getenv("TOP_K", "3")),len(nodes))
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes = nodes)
bmRetriver.persist(STORAGE_DIR)
@@ -29,7 +29,9 @@ class HybridRetriever(BaseRetriever):
if os.path.exists(STORAGE_DIR) and len(os.listdir(STORAGE_DIR)) > 0:
self._bm25Retriever = CHBM25Retriever.from_persist_dir(STORAGE_DIR)
else:
self._bm25Retriever = CHBM25Retriever.from_defaults(similarity_top_k=similarity_top_k,nodes=self._vector_index.vector_store.get_nodes(None))
nodes = self._vector_index.vector_store.get_nodes(None)
similarity_top_k = min(len(nodes),similarity_top_k)
self._bm25Retriever = CHBM25Retriever.from_defaults(similarity_top_k=similarity_top_k,nodes=nodes)
self._bm25Retriever.persist(STORAGE_DIR)
self._alpha = alpha