修改关键词检索溢出问题
This commit is contained in:
@@ -10,7 +10,7 @@ from app.engine.vectordb import get_vector_store
|
||||
from app.settings import init_settings
|
||||
from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
|
||||
from llama_index.core.ingestion import IngestionPipeline
|
||||
from llama_index.core.node_parser import SentenceSplitter
|
||||
from llama_index.core.node_parser import SentenceSplitter,MarkdownNodeParser
|
||||
from llama_index.core.settings import Settings
|
||||
from llama_index.core.storage import StorageContext
|
||||
from llama_index.core.storage.docstore import SimpleDocumentStore
|
||||
@@ -35,10 +35,11 @@ def get_doc_store(docType:str):
|
||||
def run_pipeline(docstore, vector_store, documents):
|
||||
pipeline = IngestionPipeline(
|
||||
transformations=[
|
||||
SentenceSplitter(
|
||||
chunk_size=Settings.chunk_size,
|
||||
chunk_overlap=Settings.chunk_overlap,
|
||||
),
|
||||
#SentenceSplitter(
|
||||
#chunk_size=Settings.chunk_size,
|
||||
#chunk_overlap=Settings.chunk_overlap,
|
||||
#),
|
||||
MarkdownNodeParser(),
|
||||
Settings.embed_model,
|
||||
],
|
||||
docstore=docstore,
|
||||
@@ -86,6 +87,7 @@ def generate_datasource():
|
||||
|
||||
# Build the index and persist storage
|
||||
persist_storage(docstore, vector_store)
|
||||
persist_BMRetriever(vector_store)
|
||||
|
||||
logger.info("Finished generating the index")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user