合并Dev分支代码

2024-08-30 10:49:05 +08:00
parent e9ccd7db35
commit 73565b26e4
16 changed files with 486 additions and 409 deletions
@@ -10,12 +10,11 @@ from app.engine.index import get_index
 from app.engine.tools import ToolFactory


-def get_chat_engine(filters=None, params=None):
+def get_chat_engine(filters=None, params=None,**args):
    system_prompt = os.getenv("SYSTEM_PROMPT")
    top_k = int(os.getenv("TOP_K", "3"))
    use_reranker = os.getenv("RERANK_ENABLED")
    tools = []
-
    # 创建SQL查询工具
 #    sql_query_engine = create_summary_query_engine(index)
    # sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine,
@@ -25,9 +24,7 @@ def get_chat_engine(filters=None, params=None):
    #tools.append(sql_query_tool)

    # Add query tool if index exists
-    indexs = get_index()
-    if len(indexs) > 0:
-        index = list(indexs.values())[0]
+    index = get_index(**args)
    if index is not None:
        summary_query_engine = create_summary_query_engine(index,top_k,use_reranker,filters)
        summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool",
@@ -5,7 +5,7 @@ load_dotenv()
 import logging
 import os

-from app.engine.loaders import get_documents
+from app.engine.loaders import get_document_Types, get_documents
 from app.engine.vectordb import get_vector_store
 from app.settings import init_settings
 from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
@@ -21,12 +21,13 @@ logger = logging.getLogger()
 STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")


-def get_doc_store():
+def get_doc_store(docType:str):

    # If the storage directory is there, load the document store from it.
    # If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
-    if os.path.exists(STORAGE_DIR):
-        return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
+    storeDir = os.path.join(STORAGE_DIR,docType)
+    if os.path.exists(storeDir):
+        return SimpleDocumentStore.from_persist_dir(storeDir)
    else:
        return SimpleDocumentStore()

@@ -71,19 +72,20 @@ def generate_datasource():
    logger.info("Generate index for the provided data")

    # Get the stores and documents or create new ones
-    documents = get_documents()
-    # Set private=false to mark the document as public (required for filtering)
-    for doc in documents:
-        doc.metadata["private"] = "false"
-    docstore = get_doc_store()
-    vector_store = get_vector_store()
+    docTypes = get_document_Types()
+    for docType in docTypes:
+        documents = get_documents(docType)
+        # Set private=false to mark the document as public (required for filtering)
+        for doc in documents:
+            doc.metadata["private"] = "false"
+        docstore = get_doc_store(docType)
+        vector_store = get_vector_store(docType)

-    # Run the ingestion pipeline
-    _ = run_pipeline(docstore, vector_store, documents)
+        # Run the ingestion pipeline
+        _ = run_pipeline(docstore, vector_store, documents)

-    # Build the index and persist storage
-    persist_storage(docstore, vector_store)
-    persist_BMRetriever(vector_store)
+        # Build the index and persist storage
+        persist_storage(docstore, vector_store)

    logger.info("Finished generating the index")

@@ -2,22 +2,20 @@ import logging
 from llama_index.core.indices import VectorStoreIndex
 from app.engine.vectordb import get_vector_store
 from app.engine.loaders import get_document_Types
-
+from typing import Dict,Any
 logger = logging.getLogger("uvicorn")

-indexs = {}
-
-def get_index(params=None):
-    global indexs
-    if len(index) <= 0:
-        logger.info("Connecting vector store...")
-        docTypes = get_document_Types()
-        for docType in docTypes:
-            store = get_vector_store(docType)
-            # Load the index from the vector store
-            # If you are using a vector store that doesn't store text,
-            # you must load the index from both the vector store and the document store
-            index = VectorStoreIndex.from_vector_store(store)
-            logger.info("Finished load index from vector store.")
-            indexs[docType] = index
-    return indexs
+def get_index(**args):
+    logger.info("Connecting vector store...")
+    prjFlags = get_document_Types()
+    if len(prjFlags)<=0:
+        return None
+    prjFlag = args.get('prjFlag','')
+    flag = prjFlags[0] if prjFlag not in prjFlags else prjFlag
+    store = get_vector_store(flag)
+    # Load the index from the vector store
+    # If you are using a vector store that doesn't store text,
+    # you must load the index from both the vector store and the document store
+    index = VectorStoreIndex.from_vector_store(store)
+    logger.info("Finished load index from vector store.")
+    return index
@@ -1,10 +1,9 @@
-import os
-import yaml
-import json
 import importlib
-from cachetools import cached, LRUCache
-from llama_index.core.tools.tool_spec.base import BaseToolSpec
+import os
+
+import yaml
 from llama_index.core.tools.function_tool import FunctionTool
+from llama_index.core.tools.tool_spec.base import BaseToolSpec


 class ToolType:
@@ -46,7 +45,7 @@ class ToolFactory:
    def from_env() -> list[FunctionTool]:
        tools = []
        if os.path.exists("config/tools.yaml"):
-            with open("config/tools.yaml", "r") as f:
+            with open("config/tools.yaml", "r", encoding='UTF-8') as f:
                tool_configs = yaml.safe_load(f)
                if tool_configs != None and len(tool_configs.items()) != 0:
                    for tool_type, config_entries in tool_configs.items():