合并Dev分支代码
This commit is contained in:
@@ -10,12 +10,11 @@ from app.engine.index import get_index
|
||||
from app.engine.tools import ToolFactory
|
||||
|
||||
|
||||
def get_chat_engine(filters=None, params=None):
|
||||
def get_chat_engine(filters=None, params=None,**args):
|
||||
system_prompt = os.getenv("SYSTEM_PROMPT")
|
||||
top_k = int(os.getenv("TOP_K", "3"))
|
||||
use_reranker = os.getenv("RERANK_ENABLED")
|
||||
tools = []
|
||||
|
||||
# 创建SQL查询工具
|
||||
# sql_query_engine = create_summary_query_engine(index)
|
||||
# sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine,
|
||||
@@ -25,9 +24,7 @@ def get_chat_engine(filters=None, params=None):
|
||||
#tools.append(sql_query_tool)
|
||||
|
||||
# Add query tool if index exists
|
||||
indexs = get_index()
|
||||
if len(indexs) > 0:
|
||||
index = list(indexs.values())[0]
|
||||
index = get_index(**args)
|
||||
if index is not None:
|
||||
summary_query_engine = create_summary_query_engine(index,top_k,use_reranker,filters)
|
||||
summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool",
|
||||
|
||||
@@ -5,7 +5,7 @@ load_dotenv()
|
||||
import logging
|
||||
import os
|
||||
|
||||
from app.engine.loaders import get_documents
|
||||
from app.engine.loaders import get_document_Types, get_documents
|
||||
from app.engine.vectordb import get_vector_store
|
||||
from app.settings import init_settings
|
||||
from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
|
||||
@@ -21,12 +21,13 @@ logger = logging.getLogger()
|
||||
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
|
||||
|
||||
|
||||
def get_doc_store():
|
||||
def get_doc_store(docType:str):
|
||||
|
||||
# If the storage directory is there, load the document store from it.
|
||||
# If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
|
||||
if os.path.exists(STORAGE_DIR):
|
||||
return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
|
||||
storeDir = os.path.join(STORAGE_DIR,docType)
|
||||
if os.path.exists(storeDir):
|
||||
return SimpleDocumentStore.from_persist_dir(storeDir)
|
||||
else:
|
||||
return SimpleDocumentStore()
|
||||
|
||||
@@ -71,19 +72,20 @@ def generate_datasource():
|
||||
logger.info("Generate index for the provided data")
|
||||
|
||||
# Get the stores and documents or create new ones
|
||||
documents = get_documents()
|
||||
# Set private=false to mark the document as public (required for filtering)
|
||||
for doc in documents:
|
||||
doc.metadata["private"] = "false"
|
||||
docstore = get_doc_store()
|
||||
vector_store = get_vector_store()
|
||||
docTypes = get_document_Types()
|
||||
for docType in docTypes:
|
||||
documents = get_documents(docType)
|
||||
# Set private=false to mark the document as public (required for filtering)
|
||||
for doc in documents:
|
||||
doc.metadata["private"] = "false"
|
||||
docstore = get_doc_store(docType)
|
||||
vector_store = get_vector_store(docType)
|
||||
|
||||
# Run the ingestion pipeline
|
||||
_ = run_pipeline(docstore, vector_store, documents)
|
||||
# Run the ingestion pipeline
|
||||
_ = run_pipeline(docstore, vector_store, documents)
|
||||
|
||||
# Build the index and persist storage
|
||||
persist_storage(docstore, vector_store)
|
||||
persist_BMRetriever(vector_store)
|
||||
# Build the index and persist storage
|
||||
persist_storage(docstore, vector_store)
|
||||
|
||||
logger.info("Finished generating the index")
|
||||
|
||||
|
||||
+15
-17
@@ -2,22 +2,20 @@ import logging
|
||||
from llama_index.core.indices import VectorStoreIndex
|
||||
from app.engine.vectordb import get_vector_store
|
||||
from app.engine.loaders import get_document_Types
|
||||
|
||||
from typing import Dict,Any
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
indexs = {}
|
||||
|
||||
def get_index(params=None):
|
||||
global indexs
|
||||
if len(index) <= 0:
|
||||
logger.info("Connecting vector store...")
|
||||
docTypes = get_document_Types()
|
||||
for docType in docTypes:
|
||||
store = get_vector_store(docType)
|
||||
# Load the index from the vector store
|
||||
# If you are using a vector store that doesn't store text,
|
||||
# you must load the index from both the vector store and the document store
|
||||
index = VectorStoreIndex.from_vector_store(store)
|
||||
logger.info("Finished load index from vector store.")
|
||||
indexs[docType] = index
|
||||
return indexs
|
||||
def get_index(**args):
|
||||
logger.info("Connecting vector store...")
|
||||
prjFlags = get_document_Types()
|
||||
if len(prjFlags)<=0:
|
||||
return None
|
||||
prjFlag = args.get('prjFlag','')
|
||||
flag = prjFlags[0] if prjFlag not in prjFlags else prjFlag
|
||||
store = get_vector_store(flag)
|
||||
# Load the index from the vector store
|
||||
# If you are using a vector store that doesn't store text,
|
||||
# you must load the index from both the vector store and the document store
|
||||
index = VectorStoreIndex.from_vector_store(store)
|
||||
logger.info("Finished load index from vector store.")
|
||||
return index
|
||||
|
||||
@@ -1,10 +1,9 @@
|
||||
import os
|
||||
import yaml
|
||||
import json
|
||||
import importlib
|
||||
from cachetools import cached, LRUCache
|
||||
from llama_index.core.tools.tool_spec.base import BaseToolSpec
|
||||
import os
|
||||
|
||||
import yaml
|
||||
from llama_index.core.tools.function_tool import FunctionTool
|
||||
from llama_index.core.tools.tool_spec.base import BaseToolSpec
|
||||
|
||||
|
||||
class ToolType:
|
||||
@@ -46,7 +45,7 @@ class ToolFactory:
|
||||
def from_env() -> list[FunctionTool]:
|
||||
tools = []
|
||||
if os.path.exists("config/tools.yaml"):
|
||||
with open("config/tools.yaml", "r") as f:
|
||||
with open("config/tools.yaml", "r", encoding='UTF-8') as f:
|
||||
tool_configs = yaml.safe_load(f)
|
||||
if tool_configs != None and len(tool_configs.items()) != 0:
|
||||
for tool_type, config_entries in tool_configs.items():
|
||||
|
||||
Reference in New Issue
Block a user