diff --git a/backend/.env.example b/backend/.env.example index 231ef29..c3d85dd 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -65,6 +65,10 @@ VECTOR_STORE_TYPE=chroma # The name of the collection in your vector database VECTOR_STORE_COLLECTION=default +#模型查询方式:graph、rag +LLM_QUERY_WAY = graph + +#属性图存储类型:本地属性图库(默认),neo4j GRAPH_STORE_TYPE = #---------- neo4j - PropertyGraph ---------------- @@ -120,4 +124,5 @@ CHAT_UPLOAD_FILECACHE = "./output/uploaded" JIEBA_DATA=./nltk_data NLTK_DATA=./nltk_data +#IO流默认的编码格式 PYTHONUTF8=1 \ No newline at end of file diff --git a/backend/app/engine/engine.py b/backend/app/engine/engine.py index c7a9869..e42f977 100644 --- a/backend/app/engine/engine.py +++ b/backend/app/engine/engine.py @@ -12,6 +12,8 @@ from app.engine.prompt import text_qa_template, refine_template, summary_templat from app.engine.retriever.HybridRetriever import HybridRetriever from app.engine.response.treeSummResponse import CustomTreeResponse from llama_index.core.settings import Settings +from llama_index.core.indices.property_graph import LLMSynonymRetriever,VectorContextRetriever +from llama_index.core import PropertyGraphIndex ModelPlateCategory = '模型平台' @@ -111,16 +113,38 @@ def create_summary_query_engine(index, top_k=3, use_reranker=False, filters=None return summary_query_engine # Create a query engine -def create_query_engine(index, top_k=3, use_reranker=False, filters=None, response_mode=None): +def create_query_engine(index,top_k=3, use_reranker=False, filters=None, response_mode=None): # 创建向量检索查询工具 postprocess = None if use_reranker: postprocess = get_node_postprocessors() + llm_query = os.getenv('LLM_QUERY_WAY','rag') + if llm_query == 'graph': + graphIndex:PropertyGraphIndex = index + synonym_retriver = LLMSynonymRetriever(graphIndex.property_graph_store, + llm=Settings.llm, + include_text=False + ) + if graphIndex.property_graph_store.supports_vector_queries: + vector_store = None + else: + vector_store = graphIndex.vector_store + vector_retriver = VectorContextRetriever(graphIndex.property_graph_store, + vector_store = vector_store, + embed_model=Settings.embed_model, + similarity_top_k=top_k, + include_text=False + ) + + retriever = graphIndex.as_retriever(sub_retrievers=[synonym_retriver,vector_retriver]) + + else: + retriever = get_Retriever(index, + similarity_top_k=top_k, + filters=filters), query_engine = RetrieverQueryEngine.from_args( - get_Retriever(index, - similarity_top_k=top_k, - filters=filters), + retriever = retriever, text_qa_template=text_qa_template, refine_template=refine_template, summary_template = summary_template, @@ -131,4 +155,4 @@ def create_query_engine(index, top_k=3, use_reranker=False, filters=None, respon response_mode = response_mode ) - return query_engine \ No newline at end of file + return query_engine diff --git a/backend/app/engine/generate.py b/backend/app/engine/generate.py index 388f597..b5740c2 100644 --- a/backend/app/engine/generate.py +++ b/backend/app/engine/generate.py @@ -103,10 +103,12 @@ class PropertyGraphChache: def simplePropertyGraph(self,prjName:str,prjFlag:str,filePath:str): documents = get_documents(prjFlag) + storeContext = StorageContext.from_defaults(vector_store=get_vector_store(prjFlag)) index = PropertyGraphIndex( nodes =documents, kg_extractors = [PrjGraphExtractor(prjName)], embed_model = Settings.embed_model, + storage_context = storeContext, show_progress= True ) os.makedirs(filePath,exist_ok = True) @@ -126,7 +128,11 @@ class PropertyGraphChache: if __name__ == "__main__": init_settings() + llm_query = os.getenv('LLM_QUERY_WAY','rag') from phoenix.trace import using_project with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj: - generate_datasource() - PropertyGraphChache().generate() + if llm_query == 'graph': + PropertyGraphChache().generate() + else: + generate_datasource() + diff --git a/backend/app/engine/graph/propertyGraph.py b/backend/app/engine/graph/propertyGraph.py index accad65..cba20c6 100644 --- a/backend/app/engine/graph/propertyGraph.py +++ b/backend/app/engine/graph/propertyGraph.py @@ -9,7 +9,7 @@ import os from llama_index.core.storage.storage_context import StorageContext from llama_index.core import load_index_from_storage from app.observability import init_observability -from app.engine.vectordb import get_Neo4j_Graph_Store +from app.engine.vectordb import get_Neo4j_Graph_Store,get_vector_store from llama_index.core.response_synthesizers import ResponseMode from util.register import * from llama_index.core.query_engine import RetrieverQueryEngine @@ -44,7 +44,7 @@ class PropertyGraph: prjCachePath = GRAPH_STORAGE_DIR + f"/{self._prjFlag}" if not os.path.exists(prjCachePath): return None - storeContext = StorageContext.from_defaults(persist_dir = prjCachePath) + storeContext = StorageContext.from_defaults(persist_dir = prjCachePath,vector_store = get_vector_store(self._prjFlag)) index = load_index_from_storage(storeContext) return index @@ -77,8 +77,8 @@ class PropertyGraph: if __name__ == "__main__": init_settings() init_observability() - # graph = PropertyGraph('projects_1b20bbf4-3243-4ac3-bcf0-8a91e9157521') - # graph.query('代码为XLBT的金额是') + graph = PropertyGraph('projects_1b20bbf4-3243-4ac3-bcf0-8a91e9157521') + graph.query('代码为XLBT的金额是') diff --git a/backend/app/engine/index.py b/backend/app/engine/index.py index 71e3fd5..176e445 100644 --- a/backend/app/engine/index.py +++ b/backend/app/engine/index.py @@ -1,15 +1,38 @@ -import logging +import logging,os from llama_index.core.indices import VectorStoreIndex -from app.engine.vectordb import get_vector_store -from app.engine.loaders import get_document_Types +from app.engine.vectordb import get_vector_store,get_Neo4j_Graph_Store from typing import Dict,Any +from llama_index.core import PropertyGraphIndex +from llama_index.core.storage.storage_context import StorageContext +from llama_index.core import load_index_from_storage + logger = logging.getLogger("uvicorn") + def get_index(prjFlag:str): if prjFlag is None or prjFlag == '': raise ValueError('无效的工程标识') logger.info("Connecting vector store...") - store = get_vector_store(prjFlag) - index = VectorStoreIndex.from_vector_store(store) + index = None + llm_query = os.getenv('LLM_QUERY_WAY') + if llm_query == 'graph': + index = getPropertyGraphIndex(prjFlag) + else: + store = get_vector_store(prjFlag) + index = VectorStoreIndex.from_vector_store(store) logger.info("Finished load index from vector store.") return index + + +def getPropertyGraphIndex(prjFlag:str): + GRAPH_STORE_TYPE = os.getenv("GRAPH_STORE_TYPE", "") + if GRAPH_STORE_TYPE == 'neo4j': + index = PropertyGraphIndex.from_existing(property_graph_store= get_Neo4j_Graph_Store(prjFlag)) + else: + GRAPH_STORAGE_DIR = os.getenv("GRAPH_STORAGE_PATH", "storage_graph") + prjCachePath = GRAPH_STORAGE_DIR + f"/{prjFlag}" + if not os.path.exists(prjCachePath): + return None + storeContext = StorageContext.from_defaults(persist_dir = prjCachePath,vector_store = get_vector_store(prjFlag)) + index = load_index_from_storage(storeContext) + return index \ No newline at end of file diff --git a/backend/app/engine/loaders/markDown.py b/backend/app/engine/loaders/markDown.py index 080f52f..ceff05b 100644 --- a/backend/app/engine/loaders/markDown.py +++ b/backend/app/engine/loaders/markDown.py @@ -12,7 +12,7 @@ class MarkDown: colComments:list = [] ignores:List[str] = [] for name,fld in flds.items(): - if name =='_id' or name =='nodeType' or name =='relTbId': + if self._table.name() == '工程属性' and (name =='_id' or name =='nodeType' or name =='relTbId'): ignores.append(name) continue @@ -26,7 +26,9 @@ class MarkDown: if col in ignores: continue txt:str = record.value(col) - datas.append(txt.replace('\n'," ")) + content = txt.replace('\n',"") + content = content.replace('\r',"") + datas.append(content) rowdatas.append(datas) content = self.convert(self._table.name(),self._table.comment(),columns,colComments,rowdatas) @@ -57,8 +59,8 @@ class MarkDown: if __name__ == "__main__": - intputDir = '' - outputDir = '' + intputDir = 'C:\\Users\\wanyaokun\\Desktop\\markdown\\Project' + outputDir = 'C:\\Users\\wanyaokun\\Desktop\\markdown\\data' subdirectories = {} for dp, dn, fn in os.walk(intputDir): diff --git a/backend/app/engine/vectordb.py b/backend/app/engine/vectordb.py index 0992883..e610baa 100644 --- a/backend/app/engine/vectordb.py +++ b/backend/app/engine/vectordb.py @@ -8,11 +8,12 @@ qclient = None def get_qdrant_vector_store(docType:str): collection_name = docType + llm_query = os.getenv('LLM_QUERY_WAY','rag') vector_store_path = os.getenv("VECTOR_STORE_PATH") host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"), port=int(os.getenv("VECTOR_STORE_PORT", "6333")), - vector_store_path =os.path.join(vector_store_path,docType) + vector_store_path =os.path.join(vector_store_path,llm_query,docType) if not vector_store_path or not host: raise ValueError( "Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT" @@ -36,7 +37,8 @@ def get_qdrant_vector_store(docType:str): def get_chroma_vector_store(docType:str): collection_name = docType - vector_store_path =os.path.join(os.getenv("VECTOR_STORE_PATH"),docType) + llm_query = os.getenv('LLM_QUERY_WAY','rag') + vector_store_path =os.path.join(os.getenv("VECTOR_STORE_PATH"),llm_query,docType) # if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path # otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet) if vector_store_path: @@ -59,7 +61,6 @@ def get_chroma_vector_store(docType:str): def get_vector_store(docType:str): store_type=os.getenv("VECTOR_STORE_TYPE") - store = None match store_type: @@ -72,7 +73,6 @@ def get_vector_store(docType:str): return store - def get_Neo4j_Graph_Store(docType:str): neo4jStore = Neo4jPropertyGraphStore( username= os.getenv('NEO4J_USERNAME'), diff --git a/backend/unit_test/Quetions/projects_0ffaf7fb-8a61-46e2-97a2-8f924e9560a7.json b/backend/unit_test/Quetions/projects_0ffaf7fb-8a61-46e2-97a2-8f924e9560a7.json new file mode 100644 index 0000000..1e3fb4e --- /dev/null +++ b/backend/unit_test/Quetions/projects_0ffaf7fb-8a61-46e2-97a2-8f924e9560a7.json @@ -0,0 +1,124 @@ +{ + "test:线路工程查询": [ + { + "question": "基础工程项目划分合价", + "answer": "5099350万元 或者 49051649643元" + }, + { + "question": "此工程名称", + "answer": "架线南网" + }, + { + "question": "此工程电压等级", + "answer": "35kV" + }, + { + "question": "基础工程项目划分合价", + "answer": "合价为 49051649642.9667 元" + }, + { + "question": "基础工程项目划分合价,输出以万元为单位", + "answer": "合价总计为4905164.96429667万元" + }, + { + "question": "建设场地征用及清理费是多少", + "answer": "建设场地征用及清理费的金额为16831284.228711元" + }, + { + "question": "建设场地征用及清理费金额多少", + "answer": "建设场地征用及清理费的金额为16831284.228711元" + }, + { + "question": "线路亘长", + "answer": "此工程的线路亘长为5.0公里。" + }, + { + "question": "工程运距", + "answer": "工程运距包括:人力运距20km,汽车运距30km,拖拉机运距40km,船舶运距40km,海缆船舶运距50km,索道运距7km。" + }, + { + "question": "基坑土石方单公里用量", + "answer": "81081630.354" + }, + { + "question": "基本预备费费率", + "answer": "基本预备费的费率为2%。" + }, + { + "question": "调差文件", + "answer": "调差文件指的是调差系数文件〔2020年14号文〕" + }, + { + "question": "工程税率", + "answer": "工程税率是9.0%" + }, + { + "question": "新建时间", + "answer": "2020/10/9" + }, + { + "question": "工程静态投资,以万元为单位", + "answer": "71503585.3336万元" + }, + { + "question": "耐张塔比例", + "answer": "耐张塔的比例为38.462%" + }, + { + "question": "耐张杆比例", + "answer": "耐张杆的比例是72.222%。" + }, + { + "question": "耐张杆的比例是如何计算出来的?", + "answer": "耐张杆的比例计算方式是将耐张杆基数除以耐张杆和直线杆的总基数,然后将结果转换为百分比。" + }, + { + "question": "勘察设计费", + "answer": "勘察设计费的总金额为16164210209.43,其中包含勘察费12122154260.0和设计费4042055949.43。" + }, + { + "question": "岩石比例", + "answer": "99" + }, + { + "question": "人工挖孔", + "answer": "148" + }, + { + "question": "余土运距", + "answer": "余土运距为187.0公里" + }, + { + "question": "高压线(含10kV)", + "answer": "3处" + }, + { + "question": "基坑普通土", + "answer": "313873965.334m³" + }, + { + "question": "尖峰及施工基面普通土", + "answer": "尖峰及施工基面普通土的量为6534.528 m³" + }, + { + "question": "节能评估费用", + "answer": "节能评估费用在电力工程造价中被标识为C1A,其费率设置为100.0%,但需要注意的是,在当前工程中此费用的金额为0.0。" + }, + { + "question": "工程监理费", + "answer": "工程监理费的代码为B3,其费率是100.0,金额为131009.92。" + }, + { + "question": "可行性研究文件评审费", + "answer": "可行性研究文件评审费的代码是C41,其金额为13340.0。" + }, + { + "question": "接地工程合价", + "answer": "合价为 121964.914965元" + }, + { + "question": "接地工程项目划分合价", + "answer": "合价为 121964.914965元" + } + ] +} \ No newline at end of file