将属性图流程同步至主流程中

This commit is contained in:
wanyaokun
2024-09-14 16:35:46 +08:00
parent 6de6cc201e
commit 67b1959d80
8 changed files with 208 additions and 24 deletions
+5
View File
@@ -65,6 +65,10 @@ VECTOR_STORE_TYPE=chroma
# The name of the collection in your vector database # The name of the collection in your vector database
VECTOR_STORE_COLLECTION=default VECTOR_STORE_COLLECTION=default
#模型查询方式:graph、rag
LLM_QUERY_WAY = graph
#属性图存储类型:本地属性图库(默认),neo4j
GRAPH_STORE_TYPE = GRAPH_STORE_TYPE =
#---------- neo4j - PropertyGraph ---------------- #---------- neo4j - PropertyGraph ----------------
@@ -120,4 +124,5 @@ CHAT_UPLOAD_FILECACHE = "./output/uploaded"
JIEBA_DATA=./nltk_data JIEBA_DATA=./nltk_data
NLTK_DATA=./nltk_data NLTK_DATA=./nltk_data
#IO流默认的编码格式
PYTHONUTF8=1 PYTHONUTF8=1
+28 -4
View File
@@ -12,6 +12,8 @@ from app.engine.prompt import text_qa_template, refine_template, summary_templat
from app.engine.retriever.HybridRetriever import HybridRetriever from app.engine.retriever.HybridRetriever import HybridRetriever
from app.engine.response.treeSummResponse import CustomTreeResponse from app.engine.response.treeSummResponse import CustomTreeResponse
from llama_index.core.settings import Settings from llama_index.core.settings import Settings
from llama_index.core.indices.property_graph import LLMSynonymRetriever,VectorContextRetriever
from llama_index.core import PropertyGraphIndex
ModelPlateCategory = '模型平台' ModelPlateCategory = '模型平台'
@@ -111,16 +113,38 @@ def create_summary_query_engine(index, top_k=3, use_reranker=False, filters=None
return summary_query_engine return summary_query_engine
# Create a query engine # Create a query engine
def create_query_engine(index, top_k=3, use_reranker=False, filters=None, response_mode=None): def create_query_engine(index,top_k=3, use_reranker=False, filters=None, response_mode=None):
# 创建向量检索查询工具 # 创建向量检索查询工具
postprocess = None postprocess = None
if use_reranker: if use_reranker:
postprocess = get_node_postprocessors() postprocess = get_node_postprocessors()
llm_query = os.getenv('LLM_QUERY_WAY','rag')
if llm_query == 'graph':
graphIndex:PropertyGraphIndex = index
synonym_retriver = LLMSynonymRetriever(graphIndex.property_graph_store,
llm=Settings.llm,
include_text=False
)
if graphIndex.property_graph_store.supports_vector_queries:
vector_store = None
else:
vector_store = graphIndex.vector_store
vector_retriver = VectorContextRetriever(graphIndex.property_graph_store,
vector_store = vector_store,
embed_model=Settings.embed_model,
similarity_top_k=top_k,
include_text=False
)
retriever = graphIndex.as_retriever(sub_retrievers=[synonym_retriver,vector_retriver])
else:
retriever = get_Retriever(index,
similarity_top_k=top_k,
filters=filters),
query_engine = RetrieverQueryEngine.from_args( query_engine = RetrieverQueryEngine.from_args(
get_Retriever(index, retriever = retriever,
similarity_top_k=top_k,
filters=filters),
text_qa_template=text_qa_template, text_qa_template=text_qa_template,
refine_template=refine_template, refine_template=refine_template,
summary_template = summary_template, summary_template = summary_template,
+8 -2
View File
@@ -103,10 +103,12 @@ class PropertyGraphChache:
def simplePropertyGraph(self,prjName:str,prjFlag:str,filePath:str): def simplePropertyGraph(self,prjName:str,prjFlag:str,filePath:str):
documents = get_documents(prjFlag) documents = get_documents(prjFlag)
storeContext = StorageContext.from_defaults(vector_store=get_vector_store(prjFlag))
index = PropertyGraphIndex( index = PropertyGraphIndex(
nodes =documents, nodes =documents,
kg_extractors = [PrjGraphExtractor(prjName)], kg_extractors = [PrjGraphExtractor(prjName)],
embed_model = Settings.embed_model, embed_model = Settings.embed_model,
storage_context = storeContext,
show_progress= True show_progress= True
) )
os.makedirs(filePath,exist_ok = True) os.makedirs(filePath,exist_ok = True)
@@ -126,7 +128,11 @@ class PropertyGraphChache:
if __name__ == "__main__": if __name__ == "__main__":
init_settings() init_settings()
llm_query = os.getenv('LLM_QUERY_WAY','rag')
from phoenix.trace import using_project from phoenix.trace import using_project
with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj: with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj:
generate_datasource() if llm_query == 'graph':
PropertyGraphChache().generate() PropertyGraphChache().generate()
else:
generate_datasource()
+4 -4
View File
@@ -9,7 +9,7 @@ import os
from llama_index.core.storage.storage_context import StorageContext from llama_index.core.storage.storage_context import StorageContext
from llama_index.core import load_index_from_storage from llama_index.core import load_index_from_storage
from app.observability import init_observability from app.observability import init_observability
from app.engine.vectordb import get_Neo4j_Graph_Store from app.engine.vectordb import get_Neo4j_Graph_Store,get_vector_store
from llama_index.core.response_synthesizers import ResponseMode from llama_index.core.response_synthesizers import ResponseMode
from util.register import * from util.register import *
from llama_index.core.query_engine import RetrieverQueryEngine from llama_index.core.query_engine import RetrieverQueryEngine
@@ -44,7 +44,7 @@ class PropertyGraph:
prjCachePath = GRAPH_STORAGE_DIR + f"/{self._prjFlag}" prjCachePath = GRAPH_STORAGE_DIR + f"/{self._prjFlag}"
if not os.path.exists(prjCachePath): if not os.path.exists(prjCachePath):
return None return None
storeContext = StorageContext.from_defaults(persist_dir = prjCachePath) storeContext = StorageContext.from_defaults(persist_dir = prjCachePath,vector_store = get_vector_store(self._prjFlag))
index = load_index_from_storage(storeContext) index = load_index_from_storage(storeContext)
return index return index
@@ -77,8 +77,8 @@ class PropertyGraph:
if __name__ == "__main__": if __name__ == "__main__":
init_settings() init_settings()
init_observability() init_observability()
# graph = PropertyGraph('projects_1b20bbf4-3243-4ac3-bcf0-8a91e9157521') graph = PropertyGraph('projects_1b20bbf4-3243-4ac3-bcf0-8a91e9157521')
# graph.query('代码为XLBT的金额是') graph.query('代码为XLBT的金额是')
+28 -5
View File
@@ -1,15 +1,38 @@
import logging import logging,os
from llama_index.core.indices import VectorStoreIndex from llama_index.core.indices import VectorStoreIndex
from app.engine.vectordb import get_vector_store from app.engine.vectordb import get_vector_store,get_Neo4j_Graph_Store
from app.engine.loaders import get_document_Types
from typing import Dict,Any from typing import Dict,Any
from llama_index.core import PropertyGraphIndex
from llama_index.core.storage.storage_context import StorageContext
from llama_index.core import load_index_from_storage
logger = logging.getLogger("uvicorn") logger = logging.getLogger("uvicorn")
def get_index(prjFlag:str): def get_index(prjFlag:str):
if prjFlag is None or prjFlag == '': if prjFlag is None or prjFlag == '':
raise ValueError('无效的工程标识') raise ValueError('无效的工程标识')
logger.info("Connecting vector store...") logger.info("Connecting vector store...")
store = get_vector_store(prjFlag) index = None
index = VectorStoreIndex.from_vector_store(store) llm_query = os.getenv('LLM_QUERY_WAY')
if llm_query == 'graph':
index = getPropertyGraphIndex(prjFlag)
else:
store = get_vector_store(prjFlag)
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished load index from vector store.") logger.info("Finished load index from vector store.")
return index return index
def getPropertyGraphIndex(prjFlag:str):
GRAPH_STORE_TYPE = os.getenv("GRAPH_STORE_TYPE", "")
if GRAPH_STORE_TYPE == 'neo4j':
index = PropertyGraphIndex.from_existing(property_graph_store= get_Neo4j_Graph_Store(prjFlag))
else:
GRAPH_STORAGE_DIR = os.getenv("GRAPH_STORAGE_PATH", "storage_graph")
prjCachePath = GRAPH_STORAGE_DIR + f"/{prjFlag}"
if not os.path.exists(prjCachePath):
return None
storeContext = StorageContext.from_defaults(persist_dir = prjCachePath,vector_store = get_vector_store(prjFlag))
index = load_index_from_storage(storeContext)
return index
+6 -4
View File
@@ -12,7 +12,7 @@ class MarkDown:
colComments:list = [] colComments:list = []
ignores:List[str] = [] ignores:List[str] = []
for name,fld in flds.items(): for name,fld in flds.items():
if name =='_id' or name =='nodeType' or name =='relTbId': if self._table.name() == '工程属性' and (name =='_id' or name =='nodeType' or name =='relTbId'):
ignores.append(name) ignores.append(name)
continue continue
@@ -26,7 +26,9 @@ class MarkDown:
if col in ignores: if col in ignores:
continue continue
txt:str = record.value(col) txt:str = record.value(col)
datas.append(txt.replace('\n'," ")) content = txt.replace('\n',"")
content = content.replace('\r',"")
datas.append(content)
rowdatas.append(datas) rowdatas.append(datas)
content = self.convert(self._table.name(),self._table.comment(),columns,colComments,rowdatas) content = self.convert(self._table.name(),self._table.comment(),columns,colComments,rowdatas)
@@ -57,8 +59,8 @@ class MarkDown:
if __name__ == "__main__": if __name__ == "__main__":
intputDir = '' intputDir = 'C:\\Users\\wanyaokun\\Desktop\\markdown\\Project'
outputDir = '' outputDir = 'C:\\Users\\wanyaokun\\Desktop\\markdown\\data'
subdirectories = {} subdirectories = {}
for dp, dn, fn in os.walk(intputDir): for dp, dn, fn in os.walk(intputDir):
+4 -4
View File
@@ -8,11 +8,12 @@ qclient = None
def get_qdrant_vector_store(docType:str): def get_qdrant_vector_store(docType:str):
collection_name = docType collection_name = docType
llm_query = os.getenv('LLM_QUERY_WAY','rag')
vector_store_path = os.getenv("VECTOR_STORE_PATH") vector_store_path = os.getenv("VECTOR_STORE_PATH")
host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"), host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"),
port=int(os.getenv("VECTOR_STORE_PORT", "6333")), port=int(os.getenv("VECTOR_STORE_PORT", "6333")),
vector_store_path =os.path.join(vector_store_path,docType) vector_store_path =os.path.join(vector_store_path,llm_query,docType)
if not vector_store_path or not host: if not vector_store_path or not host:
raise ValueError( raise ValueError(
"Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT" "Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
@@ -36,7 +37,8 @@ def get_qdrant_vector_store(docType:str):
def get_chroma_vector_store(docType:str): def get_chroma_vector_store(docType:str):
collection_name = docType collection_name = docType
vector_store_path =os.path.join(os.getenv("VECTOR_STORE_PATH"),docType) llm_query = os.getenv('LLM_QUERY_WAY','rag')
vector_store_path =os.path.join(os.getenv("VECTOR_STORE_PATH"),llm_query,docType)
# if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path # if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path
# otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet) # otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
if vector_store_path: if vector_store_path:
@@ -59,7 +61,6 @@ def get_chroma_vector_store(docType:str):
def get_vector_store(docType:str): def get_vector_store(docType:str):
store_type=os.getenv("VECTOR_STORE_TYPE") store_type=os.getenv("VECTOR_STORE_TYPE")
store = None store = None
match store_type: match store_type:
@@ -72,7 +73,6 @@ def get_vector_store(docType:str):
return store return store
def get_Neo4j_Graph_Store(docType:str): def get_Neo4j_Graph_Store(docType:str):
neo4jStore = Neo4jPropertyGraphStore( neo4jStore = Neo4jPropertyGraphStore(
username= os.getenv('NEO4J_USERNAME'), username= os.getenv('NEO4J_USERNAME'),
@@ -0,0 +1,124 @@
{
"test:线路工程查询": [
{
"question": "基础工程项目划分合价",
"answer": "5099350万元 或者 49051649643元"
},
{
"question": "此工程名称",
"answer": "架线南网"
},
{
"question": "此工程电压等级",
"answer": "35kV"
},
{
"question": "基础工程项目划分合价",
"answer": "合价为 49051649642.9667 元"
},
{
"question": "基础工程项目划分合价,输出以万元为单位",
"answer": "合价总计为4905164.96429667万元"
},
{
"question": "建设场地征用及清理费是多少",
"answer": "建设场地征用及清理费的金额为16831284.228711元"
},
{
"question": "建设场地征用及清理费金额多少",
"answer": "建设场地征用及清理费的金额为16831284.228711元"
},
{
"question": "线路亘长",
"answer": "此工程的线路亘长为5.0公里。"
},
{
"question": "工程运距",
"answer": "工程运距包括:人力运距20km,汽车运距30km,拖拉机运距40km,船舶运距40km,海缆船舶运距50km,索道运距7km。"
},
{
"question": "基坑土石方单公里用量",
"answer": "81081630.354"
},
{
"question": "基本预备费费率",
"answer": "基本预备费的费率为2%。"
},
{
"question": "调差文件",
"answer": "调差文件指的是调差系数文件〔2020年14号文〕"
},
{
"question": "工程税率",
"answer": "工程税率是9.0%"
},
{
"question": "新建时间",
"answer": "2020/10/9"
},
{
"question": "工程静态投资,以万元为单位",
"answer": "71503585.3336万元"
},
{
"question": "耐张塔比例",
"answer": "耐张塔的比例为38.462%"
},
{
"question": "耐张杆比例",
"answer": "耐张杆的比例是72.222%。"
},
{
"question": "耐张杆的比例是如何计算出来的?",
"answer": "耐张杆的比例计算方式是将耐张杆基数除以耐张杆和直线杆的总基数,然后将结果转换为百分比。"
},
{
"question": "勘察设计费",
"answer": "勘察设计费的总金额为16164210209.43,其中包含勘察费12122154260.0和设计费4042055949.43。"
},
{
"question": "岩石比例",
"answer": "99"
},
{
"question": "人工挖孔",
"answer": "148"
},
{
"question": "余土运距",
"answer": "余土运距为187.0公里"
},
{
"question": "高压线(含10kV",
"answer": "3处"
},
{
"question": "基坑普通土",
"answer": "313873965.334m³"
},
{
"question": "尖峰及施工基面普通土",
"answer": "尖峰及施工基面普通土的量为6534.528 m³"
},
{
"question": "节能评估费用",
"answer": "节能评估费用在电力工程造价中被标识为C1A,其费率设置为100.0%,但需要注意的是,在当前工程中此费用的金额为0.0。"
},
{
"question": "工程监理费",
"answer": "工程监理费的代码为B3,其费率是100.0,金额为131009.92。"
},
{
"question": "可行性研究文件评审费",
"answer": "可行性研究文件评审费的代码是C41,其金额为13340.0。"
},
{
"question": "接地工程合价",
"answer": "合价为 121964.914965元"
},
{
"question": "接地工程项目划分合价",
"answer": "合价为 121964.914965元"
}
]
}