修改关键词检索溢出问题

This commit is contained in:
wanyaokun
2024-09-05 18:13:39 +08:00
parent 626ff1e632
commit f171282a0c
2 changed files with 10 additions and 8 deletions
+7 -5
View File
@@ -10,7 +10,7 @@ from app.engine.vectordb import get_vector_store
from app.settings import init_settings
from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.node_parser import SentenceSplitter,MarkdownNodeParser
from llama_index.core.settings import Settings
from llama_index.core.storage import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
@@ -35,10 +35,11 @@ def get_doc_store(docType:str):
def run_pipeline(docstore, vector_store, documents):
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(
chunk_size=Settings.chunk_size,
chunk_overlap=Settings.chunk_overlap,
),
#SentenceSplitter(
#chunk_size=Settings.chunk_size,
#chunk_overlap=Settings.chunk_overlap,
#),
MarkdownNodeParser(),
Settings.embed_model,
],
docstore=docstore,
@@ -86,6 +87,7 @@ def generate_datasource():
# Build the index and persist storage
persist_storage(docstore, vector_store)
persist_BMRetriever(vector_store)
logger.info("Finished generating the index")