将属性图流程同步至主流程中

This commit is contained in:
wanyaokun
2024-09-14 16:35:46 +08:00
parent 6de6cc201e
commit 67b1959d80
8 changed files with 208 additions and 24 deletions
+29 -5
View File
@@ -12,6 +12,8 @@ from app.engine.prompt import text_qa_template, refine_template, summary_templat
from app.engine.retriever.HybridRetriever import HybridRetriever
from app.engine.response.treeSummResponse import CustomTreeResponse
from llama_index.core.settings import Settings
from llama_index.core.indices.property_graph import LLMSynonymRetriever,VectorContextRetriever
from llama_index.core import PropertyGraphIndex
ModelPlateCategory = '模型平台'
@@ -111,16 +113,38 @@ def create_summary_query_engine(index, top_k=3, use_reranker=False, filters=None
return summary_query_engine
# Create a query engine
def create_query_engine(index, top_k=3, use_reranker=False, filters=None, response_mode=None):
def create_query_engine(index,top_k=3, use_reranker=False, filters=None, response_mode=None):
# 创建向量检索查询工具
postprocess = None
if use_reranker:
postprocess = get_node_postprocessors()
llm_query = os.getenv('LLM_QUERY_WAY','rag')
if llm_query == 'graph':
graphIndex:PropertyGraphIndex = index
synonym_retriver = LLMSynonymRetriever(graphIndex.property_graph_store,
llm=Settings.llm,
include_text=False
)
if graphIndex.property_graph_store.supports_vector_queries:
vector_store = None
else:
vector_store = graphIndex.vector_store
vector_retriver = VectorContextRetriever(graphIndex.property_graph_store,
vector_store = vector_store,
embed_model=Settings.embed_model,
similarity_top_k=top_k,
include_text=False
)
retriever = graphIndex.as_retriever(sub_retrievers=[synonym_retriver,vector_retriver])
else:
retriever = get_Retriever(index,
similarity_top_k=top_k,
filters=filters),
query_engine = RetrieverQueryEngine.from_args(
get_Retriever(index,
similarity_top_k=top_k,
filters=filters),
retriever = retriever,
text_qa_template=text_qa_template,
refine_template=refine_template,
summary_template = summary_template,
@@ -131,4 +155,4 @@ def create_query_engine(index, top_k=3, use_reranker=False, filters=None, respon
response_mode = response_mode
)
return query_engine
return query_engine
+8 -2
View File
@@ -103,10 +103,12 @@ class PropertyGraphChache:
def simplePropertyGraph(self,prjName:str,prjFlag:str,filePath:str):
documents = get_documents(prjFlag)
storeContext = StorageContext.from_defaults(vector_store=get_vector_store(prjFlag))
index = PropertyGraphIndex(
nodes =documents,
kg_extractors = [PrjGraphExtractor(prjName)],
embed_model = Settings.embed_model,
storage_context = storeContext,
show_progress= True
)
os.makedirs(filePath,exist_ok = True)
@@ -126,7 +128,11 @@ class PropertyGraphChache:
if __name__ == "__main__":
init_settings()
llm_query = os.getenv('LLM_QUERY_WAY','rag')
from phoenix.trace import using_project
with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj:
generate_datasource()
PropertyGraphChache().generate()
if llm_query == 'graph':
PropertyGraphChache().generate()
else:
generate_datasource()
+4 -4
View File
@@ -9,7 +9,7 @@ import os
from llama_index.core.storage.storage_context import StorageContext
from llama_index.core import load_index_from_storage
from app.observability import init_observability
from app.engine.vectordb import get_Neo4j_Graph_Store
from app.engine.vectordb import get_Neo4j_Graph_Store,get_vector_store
from llama_index.core.response_synthesizers import ResponseMode
from util.register import *
from llama_index.core.query_engine import RetrieverQueryEngine
@@ -44,7 +44,7 @@ class PropertyGraph:
prjCachePath = GRAPH_STORAGE_DIR + f"/{self._prjFlag}"
if not os.path.exists(prjCachePath):
return None
storeContext = StorageContext.from_defaults(persist_dir = prjCachePath)
storeContext = StorageContext.from_defaults(persist_dir = prjCachePath,vector_store = get_vector_store(self._prjFlag))
index = load_index_from_storage(storeContext)
return index
@@ -77,8 +77,8 @@ class PropertyGraph:
if __name__ == "__main__":
init_settings()
init_observability()
# graph = PropertyGraph('projects_1b20bbf4-3243-4ac3-bcf0-8a91e9157521')
# graph.query('代码为XLBT的金额是')
graph = PropertyGraph('projects_1b20bbf4-3243-4ac3-bcf0-8a91e9157521')
graph.query('代码为XLBT的金额是')
+28 -5
View File
@@ -1,15 +1,38 @@
import logging
import logging,os
from llama_index.core.indices import VectorStoreIndex
from app.engine.vectordb import get_vector_store
from app.engine.loaders import get_document_Types
from app.engine.vectordb import get_vector_store,get_Neo4j_Graph_Store
from typing import Dict,Any
from llama_index.core import PropertyGraphIndex
from llama_index.core.storage.storage_context import StorageContext
from llama_index.core import load_index_from_storage
logger = logging.getLogger("uvicorn")
def get_index(prjFlag:str):
if prjFlag is None or prjFlag == '':
raise ValueError('无效的工程标识')
logger.info("Connecting vector store...")
store = get_vector_store(prjFlag)
index = VectorStoreIndex.from_vector_store(store)
index = None
llm_query = os.getenv('LLM_QUERY_WAY')
if llm_query == 'graph':
index = getPropertyGraphIndex(prjFlag)
else:
store = get_vector_store(prjFlag)
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished load index from vector store.")
return index
def getPropertyGraphIndex(prjFlag:str):
GRAPH_STORE_TYPE = os.getenv("GRAPH_STORE_TYPE", "")
if GRAPH_STORE_TYPE == 'neo4j':
index = PropertyGraphIndex.from_existing(property_graph_store= get_Neo4j_Graph_Store(prjFlag))
else:
GRAPH_STORAGE_DIR = os.getenv("GRAPH_STORAGE_PATH", "storage_graph")
prjCachePath = GRAPH_STORAGE_DIR + f"/{prjFlag}"
if not os.path.exists(prjCachePath):
return None
storeContext = StorageContext.from_defaults(persist_dir = prjCachePath,vector_store = get_vector_store(prjFlag))
index = load_index_from_storage(storeContext)
return index
+6 -4
View File
@@ -12,7 +12,7 @@ class MarkDown:
colComments:list = []
ignores:List[str] = []
for name,fld in flds.items():
if name =='_id' or name =='nodeType' or name =='relTbId':
if self._table.name() == '工程属性' and (name =='_id' or name =='nodeType' or name =='relTbId'):
ignores.append(name)
continue
@@ -26,7 +26,9 @@ class MarkDown:
if col in ignores:
continue
txt:str = record.value(col)
datas.append(txt.replace('\n'," "))
content = txt.replace('\n',"")
content = content.replace('\r',"")
datas.append(content)
rowdatas.append(datas)
content = self.convert(self._table.name(),self._table.comment(),columns,colComments,rowdatas)
@@ -57,8 +59,8 @@ class MarkDown:
if __name__ == "__main__":
intputDir = ''
outputDir = ''
intputDir = 'C:\\Users\\wanyaokun\\Desktop\\markdown\\Project'
outputDir = 'C:\\Users\\wanyaokun\\Desktop\\markdown\\data'
subdirectories = {}
for dp, dn, fn in os.walk(intputDir):
+4 -4
View File
@@ -8,11 +8,12 @@ qclient = None
def get_qdrant_vector_store(docType:str):
collection_name = docType
llm_query = os.getenv('LLM_QUERY_WAY','rag')
vector_store_path = os.getenv("VECTOR_STORE_PATH")
host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"),
port=int(os.getenv("VECTOR_STORE_PORT", "6333")),
vector_store_path =os.path.join(vector_store_path,docType)
vector_store_path =os.path.join(vector_store_path,llm_query,docType)
if not vector_store_path or not host:
raise ValueError(
"Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
@@ -36,7 +37,8 @@ def get_qdrant_vector_store(docType:str):
def get_chroma_vector_store(docType:str):
collection_name = docType
vector_store_path =os.path.join(os.getenv("VECTOR_STORE_PATH"),docType)
llm_query = os.getenv('LLM_QUERY_WAY','rag')
vector_store_path =os.path.join(os.getenv("VECTOR_STORE_PATH"),llm_query,docType)
# if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path
# otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
if vector_store_path:
@@ -59,7 +61,6 @@ def get_chroma_vector_store(docType:str):
def get_vector_store(docType:str):
store_type=os.getenv("VECTOR_STORE_TYPE")
store = None
match store_type:
@@ -72,7 +73,6 @@ def get_vector_store(docType:str):
return store
def get_Neo4j_Graph_Store(docType:str):
neo4jStore = Neo4jPropertyGraphStore(
username= os.getenv('NEO4J_USERNAME'),