新增属性图谱
This commit is contained in:
@@ -5,8 +5,8 @@ load_dotenv()
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from app.engine.loaders import get_document_Types, get_documents
|
from app.engine.loaders import get_document_Types, get_documents,getProjectInfos
|
||||||
from app.engine.vectordb import get_vector_store
|
from app.engine.vectordb import get_vector_store,get_Neo4j_Graph_Store
|
||||||
from app.settings import init_settings
|
from app.settings import init_settings
|
||||||
from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
|
from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
|
||||||
from llama_index.core.ingestion import IngestionPipeline
|
from llama_index.core.ingestion import IngestionPipeline
|
||||||
@@ -14,15 +14,15 @@ from llama_index.core.node_parser import SentenceSplitter,MarkdownNodeParser
|
|||||||
from llama_index.core.settings import Settings
|
from llama_index.core.settings import Settings
|
||||||
from llama_index.core.storage import StorageContext
|
from llama_index.core.storage import StorageContext
|
||||||
from llama_index.core.storage.docstore import SimpleDocumentStore
|
from llama_index.core.storage.docstore import SimpleDocumentStore
|
||||||
|
from llama_index.core import PropertyGraphIndex
|
||||||
|
from app.engine.graph.extractor import PrjGraphExtractor
|
||||||
|
|
||||||
logging.basicConfig(level=logging.INFO)
|
logging.basicConfig(level=logging.INFO)
|
||||||
logger = logging.getLogger()
|
logger = logging.getLogger()
|
||||||
|
|
||||||
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
|
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
|
||||||
|
|
||||||
|
|
||||||
def get_doc_store(docType:str):
|
def get_doc_store(docType:str):
|
||||||
|
|
||||||
# If the storage directory is there, load the document store from it.
|
# If the storage directory is there, load the document store from it.
|
||||||
# If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
|
# If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
|
||||||
storeDir = os.path.join(STORAGE_DIR,docType)
|
storeDir = os.path.join(STORAGE_DIR,docType)
|
||||||
@@ -31,7 +31,6 @@ def get_doc_store(docType:str):
|
|||||||
else:
|
else:
|
||||||
return SimpleDocumentStore()
|
return SimpleDocumentStore()
|
||||||
|
|
||||||
|
|
||||||
def run_pipeline(docstore, vector_store, documents):
|
def run_pipeline(docstore, vector_store, documents):
|
||||||
pipeline = IngestionPipeline(
|
pipeline = IngestionPipeline(
|
||||||
transformations=[
|
transformations=[
|
||||||
@@ -49,10 +48,8 @@ def run_pipeline(docstore, vector_store, documents):
|
|||||||
|
|
||||||
# Run the ingestion pipeline and store the results
|
# Run the ingestion pipeline and store the results
|
||||||
nodes = pipeline.run(show_progress=True, documents=documents)
|
nodes = pipeline.run(show_progress=True, documents=documents)
|
||||||
|
|
||||||
return nodes
|
return nodes
|
||||||
|
|
||||||
|
|
||||||
def persist_storage(docstore, vector_store):
|
def persist_storage(docstore, vector_store):
|
||||||
storage_context = StorageContext.from_defaults(
|
storage_context = StorageContext.from_defaults(
|
||||||
docstore=docstore,
|
docstore=docstore,
|
||||||
@@ -60,7 +57,6 @@ def persist_storage(docstore, vector_store):
|
|||||||
)
|
)
|
||||||
storage_context.persist(STORAGE_DIR)
|
storage_context.persist(STORAGE_DIR)
|
||||||
|
|
||||||
|
|
||||||
def persist_BMRetriever(vector_store):
|
def persist_BMRetriever(vector_store):
|
||||||
STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
|
STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
|
||||||
nodes = vector_store.get_nodes([])
|
nodes = vector_store.get_nodes([])
|
||||||
@@ -68,9 +64,7 @@ def persist_BMRetriever(vector_store):
|
|||||||
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes = nodes)
|
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes = nodes)
|
||||||
bmRetriver.persist(STORAGE_DIR)
|
bmRetriver.persist(STORAGE_DIR)
|
||||||
|
|
||||||
|
|
||||||
def generate_datasource():
|
def generate_datasource():
|
||||||
init_settings()
|
|
||||||
logger.info("Generate index for the provided data")
|
logger.info("Generate index for the provided data")
|
||||||
|
|
||||||
# Get the stores and documents or create new ones
|
# Get the stores and documents or create new ones
|
||||||
@@ -92,8 +86,47 @@ def generate_datasource():
|
|||||||
|
|
||||||
logger.info("Finished generating the index")
|
logger.info("Finished generating the index")
|
||||||
|
|
||||||
|
class PropertyGraphChache:
|
||||||
|
def generate(self):
|
||||||
|
GRAPH_STORE_TYPE = os.getenv("GRAPH_STORE_TYPE", "")
|
||||||
|
GRAPH_STORAGE_DIR = os.getenv("GRAPH_STORAGE_DIR", "storage_graph")
|
||||||
|
prjInfos = getProjectInfos()
|
||||||
|
for prjInfo in prjInfos:
|
||||||
|
prjFlag = prjInfo['flag']
|
||||||
|
prjName = prjInfo['name']
|
||||||
|
chche_Path = GRAPH_STORAGE_DIR + f'/{prjFlag}'
|
||||||
|
|
||||||
|
if GRAPH_STORE_TYPE == 'neo4j':
|
||||||
|
self.neo4jProertyGraph()
|
||||||
|
else:
|
||||||
|
self.simplePropertyGraph(prjName,prjFlag,chche_Path)
|
||||||
|
|
||||||
|
def simplePropertyGraph(self,prjName:str,prjFlag:str,filePath:str):
|
||||||
|
documents = get_documents(prjFlag)
|
||||||
|
index = PropertyGraphIndex(
|
||||||
|
nodes =documents,
|
||||||
|
kg_extractors = [PrjGraphExtractor(prjName)],
|
||||||
|
embed_model = Settings.embed_model,
|
||||||
|
show_progress= True
|
||||||
|
)
|
||||||
|
os.makedirs(filePath,exist_ok = True)
|
||||||
|
index.storage_context.persist(persist_dir = filePath)
|
||||||
|
|
||||||
|
def neo4jProertyGraph(self,prjName:str,prjFlag:str,filePath:str):
|
||||||
|
neo4jStore =get_Neo4j_Graph_Store(prjFlag)
|
||||||
|
documents = get_documents(prjFlag)
|
||||||
|
PropertyGraphIndex(
|
||||||
|
nodes =documents,
|
||||||
|
property_graph_store = neo4jStore,
|
||||||
|
kg_extractors = [PrjGraphExtractor(prjName)],
|
||||||
|
embed_model = Settings.embed_model,
|
||||||
|
show_progress= True
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
init_settings()
|
||||||
from phoenix.trace import using_project
|
from phoenix.trace import using_project
|
||||||
with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj:
|
with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj:
|
||||||
generate_datasource()
|
generate_datasource()
|
||||||
|
PropertyGraphChache().generate()
|
||||||
|
|||||||
@@ -0,0 +1,128 @@
|
|||||||
|
import os
|
||||||
|
from llama_index.core.schema import TransformComponent, BaseNode
|
||||||
|
from llama_index.core.graph_stores.types import (
|
||||||
|
EntityNode,
|
||||||
|
Relation,
|
||||||
|
Triplet,
|
||||||
|
KG_NODES_KEY,
|
||||||
|
KG_RELATIONS_KEY,
|
||||||
|
)
|
||||||
|
from app.engine.loaders.projectJson import ProjectJson
|
||||||
|
from app.engine.loaders.markdownReader import ChunkMarkdownReader
|
||||||
|
|
||||||
|
class PrjGraphExtractor(TransformComponent):
|
||||||
|
ProjectName:str
|
||||||
|
def __init__(self,PrjName:str):
|
||||||
|
super().__init__(ProjectName = PrjName)
|
||||||
|
|
||||||
|
|
||||||
|
def __call__(
|
||||||
|
self, llama_nodes: list[BaseNode], **kwargs
|
||||||
|
) -> list[BaseNode]:
|
||||||
|
if len(llama_nodes) > 0:
|
||||||
|
self._addPrjNode(llama_nodes[0])
|
||||||
|
|
||||||
|
for llama_node in llama_nodes:
|
||||||
|
fileName = self._getFileName(llama_node)
|
||||||
|
if fileName == '工程属性':
|
||||||
|
self._dealAttributeNode(llama_node)
|
||||||
|
else:
|
||||||
|
self._dealCommonNode(llama_node)
|
||||||
|
return llama_nodes
|
||||||
|
|
||||||
|
def _dealCommonNode(self,llama_node:BaseNode):
|
||||||
|
fileName = self._getFileName(llama_node)
|
||||||
|
existing_nodes:list = llama_node.metadata.pop(KG_NODES_KEY, [])
|
||||||
|
existing_relations:list = llama_node.metadata.pop(KG_RELATIONS_KEY, [])
|
||||||
|
|
||||||
|
records:dict[str,list] = self._getRecordNode(llama_node)
|
||||||
|
fInfos = fileName.split('_')
|
||||||
|
if len(fInfos) == 1:
|
||||||
|
existing_nodes.append(EntityNode(name=fInfos[0], label=fInfos[0]))
|
||||||
|
elif len(fInfos) == 2:
|
||||||
|
existing_nodes.append(EntityNode(name=fileName, label=fInfos[1]))
|
||||||
|
else:
|
||||||
|
raise ValueError("文件名存在多个下划线")
|
||||||
|
|
||||||
|
index = 0
|
||||||
|
for record in records:
|
||||||
|
index = index + 1
|
||||||
|
rcdName = self._getRecordName(fileName,record)
|
||||||
|
existing_nodes.append(EntityNode(name=rcdName, label=rcdName,properties = record))
|
||||||
|
existing_relations.append(
|
||||||
|
Relation(
|
||||||
|
label="包含",
|
||||||
|
source_id= fileName,
|
||||||
|
target_id= rcdName,
|
||||||
|
properties={},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
existing_relations.append(
|
||||||
|
Relation(
|
||||||
|
label="包含",
|
||||||
|
source_id= self.ProjectName,
|
||||||
|
target_id= fileName,
|
||||||
|
properties={},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
llama_node.metadata[KG_NODES_KEY] = existing_nodes
|
||||||
|
llama_node.metadata[KG_RELATIONS_KEY] = existing_relations
|
||||||
|
|
||||||
|
def _dealAttributeNode(self,llama_node:BaseNode):
|
||||||
|
fileName = self._getFileName(llama_node)
|
||||||
|
existing_nodes:list = llama_node.metadata.pop(KG_NODES_KEY, [])
|
||||||
|
existing_relations:list = llama_node.metadata.pop(KG_RELATIONS_KEY, [])
|
||||||
|
records:dict[str,list] = self._getRecordNode(llama_node)
|
||||||
|
existing_nodes.append(EntityNode(name=fileName, label=fileName))
|
||||||
|
|
||||||
|
index = 0
|
||||||
|
for record in records:
|
||||||
|
index = index + 1
|
||||||
|
attName = self._getRecordName(fileName,record)
|
||||||
|
existing_nodes.append(EntityNode(name=attName, label='属性',properties = record))
|
||||||
|
existing_relations.append(
|
||||||
|
Relation(
|
||||||
|
label="聚合",
|
||||||
|
source_id= fileName,
|
||||||
|
target_id= attName,
|
||||||
|
properties={},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
existing_relations.append(
|
||||||
|
Relation(
|
||||||
|
label="包含",
|
||||||
|
source_id= self.ProjectName,
|
||||||
|
target_id= fileName,
|
||||||
|
properties={},
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
llama_node.metadata[KG_NODES_KEY] = existing_nodes
|
||||||
|
llama_node.metadata[KG_RELATIONS_KEY] = existing_relations
|
||||||
|
|
||||||
|
def _getRecordNode(self,llama_node:BaseNode):
|
||||||
|
content = llama_node.get_content()
|
||||||
|
rd = ChunkMarkdownReader()
|
||||||
|
rd.markdown_to_tups(content)
|
||||||
|
records = rd.records()
|
||||||
|
return records
|
||||||
|
|
||||||
|
def _getFileName(self,llama_node:BaseNode):
|
||||||
|
meta = llama_node.metadata
|
||||||
|
fileName:str = os.path.splitext(meta['file_name'])[0]
|
||||||
|
return fileName
|
||||||
|
|
||||||
|
def _addPrjNode(self,llama_node:BaseNode):
|
||||||
|
existing_nodes:list = llama_node.metadata.pop(KG_NODES_KEY, [])
|
||||||
|
existing_nodes.append(EntityNode(name=self.ProjectName, label=self.ProjectName))
|
||||||
|
llama_node.metadata[KG_NODES_KEY] = existing_nodes
|
||||||
|
|
||||||
|
def _getRecordName(self,fileName:str,record:dict):
|
||||||
|
for name,value in record.items():
|
||||||
|
if '名称' in name:
|
||||||
|
return value
|
||||||
|
raise ValueError('记录名称为空')
|
||||||
@@ -0,0 +1,87 @@
|
|||||||
|
from llama_index.core.indices.property_graph import LLMSynonymRetriever,VectorContextRetriever,PGRetriever
|
||||||
|
from llama_index.core.indices.property_graph.transformations.schema_llm import *
|
||||||
|
from llama_index.core import SimpleDirectoryReader
|
||||||
|
from llama_index.core import settings
|
||||||
|
from llama_index.core import PropertyGraphIndex
|
||||||
|
from typing import List,Tuple,Literal
|
||||||
|
from app.settings import init_settings
|
||||||
|
import os
|
||||||
|
from llama_index.core.storage.storage_context import StorageContext
|
||||||
|
from llama_index.core import load_index_from_storage
|
||||||
|
from app.observability import init_observability
|
||||||
|
from app.engine.vectordb import get_Neo4j_Graph_Store
|
||||||
|
from llama_index.core.response_synthesizers import ResponseMode
|
||||||
|
from util.register import *
|
||||||
|
from llama_index.core.query_engine import RetrieverQueryEngine
|
||||||
|
from app.engine.prompt import text_qa_template, refine_template, summary_template, simple_template
|
||||||
|
from app.engine.engine import get_node_postprocessors
|
||||||
|
|
||||||
|
class PropertyGraph:
|
||||||
|
def __init__(self,prjFlag:str) -> None:
|
||||||
|
self._prjFlag = prjFlag
|
||||||
|
|
||||||
|
def create_query_engine(self,retriever):
|
||||||
|
postprocess = get_node_postprocessors()
|
||||||
|
query_engine = RetrieverQueryEngine.from_args(
|
||||||
|
retriever = retriever,
|
||||||
|
text_qa_template=text_qa_template,
|
||||||
|
refine_template=refine_template,
|
||||||
|
summary_template = summary_template,
|
||||||
|
simple_template = simple_template,
|
||||||
|
node_postprocessors=postprocess,
|
||||||
|
use_async=True,
|
||||||
|
streaming=False,
|
||||||
|
response_mode = ResponseMode.TREE_SUMMARIZE
|
||||||
|
)
|
||||||
|
return query_engine
|
||||||
|
|
||||||
|
def getPropertyGraphIndex(self):
|
||||||
|
GRAPH_STORE_TYPE = os.getenv("GRAPH_STORE_TYPE", "")
|
||||||
|
if GRAPH_STORE_TYPE == 'neo4j':
|
||||||
|
index = PropertyGraphIndex.from_existing(property_graph_store= get_Neo4j_Graph_Store(self._prjFlag))
|
||||||
|
else:
|
||||||
|
GRAPH_STORAGE_DIR = os.getenv("GRAPH_STORAGE_DIR", "storage_graph")
|
||||||
|
prjCachePath = GRAPH_STORAGE_DIR + f"/{self._prjFlag}"
|
||||||
|
if not os.path.exists(prjCachePath):
|
||||||
|
return None
|
||||||
|
storeContext = StorageContext.from_defaults(persist_dir = prjCachePath)
|
||||||
|
index = load_index_from_storage(storeContext)
|
||||||
|
return index
|
||||||
|
|
||||||
|
def query(self,query_str:str):
|
||||||
|
index = self.getPropertyGraphIndex()
|
||||||
|
synonym_retriver = LLMSynonymRetriever(index.property_graph_store,
|
||||||
|
llm=settings.Settings.llm,
|
||||||
|
max_keywords=10,
|
||||||
|
include_text=False
|
||||||
|
)
|
||||||
|
if index.property_graph_store.supports_vector_queries:
|
||||||
|
vector_store = None
|
||||||
|
else:
|
||||||
|
vector_store = index.vector_store
|
||||||
|
vector_retriver = VectorContextRetriever(index.property_graph_store,
|
||||||
|
vector_store = vector_store,
|
||||||
|
embed_model=settings.Settings.embed_model,
|
||||||
|
similarity_top_k=5,
|
||||||
|
include_text=False
|
||||||
|
)
|
||||||
|
|
||||||
|
retriever = index.as_retriever(sub_retrievers=[synonym_retriver,vector_retriver])
|
||||||
|
query_engine = self.create_query_engine(retriever)
|
||||||
|
|
||||||
|
response = query_engine.query(query_str)
|
||||||
|
print(response)
|
||||||
|
return str(response)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
init_settings()
|
||||||
|
init_observability()
|
||||||
|
# graph = PropertyGraph('projects_1b20bbf4-3243-4ac3-bcf0-8a91e9157521')
|
||||||
|
# graph.query('代码为XLBT的金额是')
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -100,6 +100,7 @@ class CustomFileMetadataFunc:
|
|||||||
def _is_default_fs(self,fs: fsspec.AbstractFileSystem) -> bool:
|
def _is_default_fs(self,fs: fsspec.AbstractFileSystem) -> bool:
|
||||||
return isinstance(fs, LocalFileSystem) and not fs.auto_mkdir
|
return isinstance(fs, LocalFileSystem) and not fs.auto_mkdir
|
||||||
|
|
||||||
|
|
||||||
def llama_parse_parser():
|
def llama_parse_parser():
|
||||||
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
|
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
|
|||||||
@@ -56,9 +56,9 @@ class MarkDown:
|
|||||||
return strTitle + "\n" + markdown_table
|
return strTitle + "\n" + markdown_table
|
||||||
|
|
||||||
|
|
||||||
prjSon = ProjectJson('')
|
# prjSon = ProjectJson('C:\\Users\\wanyaokun\\Desktop\\markdown\\Project\\110千伏思科变电站工程')
|
||||||
prjSon.parse()
|
# prjSon.parse()
|
||||||
tables = prjSon.tables()
|
# tables = prjSon.tables()
|
||||||
for name,table in tables.items():
|
# for name,table in tables.items():
|
||||||
mdObj = MarkDown(table,f'')
|
# mdObj = MarkDown(table,f'C:\\Users\\wanyaokun\\Desktop\\markdown\\data\\110千伏思科变电站工程\\{table.name()}.md')
|
||||||
mdObj.build()
|
# mdObj.build()
|
||||||
@@ -19,32 +19,38 @@ class ChunkMarkdownReader(MarkdownReader):
|
|||||||
|
|
||||||
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
|
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
|
||||||
markdown_tups: List[Tuple[Optional[str], str]] = []
|
markdown_tups: List[Tuple[Optional[str], str]] = []
|
||||||
lines = markdown_text.split("\n")
|
lines = self._multi_char_split(markdown_text,'\r\n')
|
||||||
|
lines = [line for line in lines if line!='']
|
||||||
|
|
||||||
strTitle = ''
|
strTitle = ''
|
||||||
tokensNum:int = 0
|
tokensNum:int = 0
|
||||||
current_lines = []
|
current_lines = []
|
||||||
strheader:str = ''
|
strheader:str = ''
|
||||||
headerSize:int = 0
|
headerSize:int = 0
|
||||||
|
bAreadyJudgeTitle = False
|
||||||
for line in lines:
|
for line in lines:
|
||||||
tokensNum += self._token_size(line)
|
tokensNum += self._token_size(line)
|
||||||
if tokensNum > self._chunkSize and len(current_lines) > 0:
|
if tokensNum > self._chunkSize and len(current_lines) > 0:
|
||||||
if len(markdown_tups) == 0:
|
if len(markdown_tups) == 0:
|
||||||
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
titleHead = strTitle + '\n' + strheader if strTitle!= '' else strheader
|
||||||
|
markdown_tups.append((titleHead, "\n".join(current_lines)))
|
||||||
else:
|
else:
|
||||||
markdown_tups.append((strheader , "\n".join(current_lines)))
|
markdown_tups.append((strheader , "\n".join(current_lines)))
|
||||||
tokensNum = headerSize
|
tokensNum = headerSize
|
||||||
current_lines.clear()
|
current_lines.clear()
|
||||||
current_lines.append(line)
|
|
||||||
if strTitle!='' and strheader!='':
|
if strheader!='':
|
||||||
self._rows.append(line)
|
self._rows.append(line)
|
||||||
|
|
||||||
if line == '\n' or line == '\r':
|
if line.startswith('|') and strTitle == '' and not bAreadyJudgeTitle:
|
||||||
|
if len(current_lines) > 0:
|
||||||
if tokensNum > self._chunkSize:
|
if tokensNum > self._chunkSize:
|
||||||
raise ValueError('标题Token数大于chunkSize大小')
|
raise ValueError('标题Token数大于chunkSize大小')
|
||||||
strTitle = "\n".join(current_lines)
|
strTitle = "\n".join(current_lines)
|
||||||
#headerSize = headerSize + self._token_size(strTitle)
|
|
||||||
current_lines.clear()
|
current_lines.clear()
|
||||||
|
bAreadyJudgeTitle = True
|
||||||
|
|
||||||
|
current_lines.append(line)
|
||||||
|
|
||||||
if line.startswith("|---"):
|
if line.startswith("|---"):
|
||||||
self._colheader = current_lines[0]
|
self._colheader = current_lines[0]
|
||||||
@@ -55,7 +61,8 @@ class ChunkMarkdownReader(MarkdownReader):
|
|||||||
|
|
||||||
if len(current_lines) > 0:
|
if len(current_lines) > 0:
|
||||||
if len(markdown_tups) == 0:
|
if len(markdown_tups) == 0:
|
||||||
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
titleHead = strTitle + '\n' + strheader if strTitle!= '' else strheader
|
||||||
|
markdown_tups.append((titleHead, "\n".join(current_lines)))
|
||||||
else:
|
else:
|
||||||
markdown_tups.append((strheader , "\n".join(current_lines)))
|
markdown_tups.append((strheader , "\n".join(current_lines)))
|
||||||
|
|
||||||
@@ -86,4 +93,24 @@ class ChunkMarkdownReader(MarkdownReader):
|
|||||||
return gData[Field]
|
return gData[Field]
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
def records(self):
|
||||||
|
cols = self._colheader.split('|')
|
||||||
|
cols = cols[1:-1]
|
||||||
|
records = []
|
||||||
|
for row in self._rows:
|
||||||
|
rowtrs = row.split('|')
|
||||||
|
rowdatas = [item for item in rowtrs if (item!='\r' or item!='\n')]
|
||||||
|
rowdatas = rowdatas[1:-1]
|
||||||
|
if len(rowdatas) == 0:
|
||||||
|
continue
|
||||||
|
record = {}
|
||||||
|
for cName,rValue in zip(cols,rowdatas):
|
||||||
|
record[cName] = rValue
|
||||||
|
records.append(record)
|
||||||
|
return records
|
||||||
|
|
||||||
|
def _multi_char_split(self,string, separators):
|
||||||
|
# 将多个分隔符连成一个正则表达式
|
||||||
|
pattern = '[' + re.escape(separators) + ']'
|
||||||
|
# 使用正则表达式进行分割
|
||||||
|
return re.split(pattern, string)
|
||||||
@@ -2,6 +2,7 @@ import os
|
|||||||
from llama_index.vector_stores.chroma import ChromaVectorStore
|
from llama_index.vector_stores.chroma import ChromaVectorStore
|
||||||
from llama_index.vector_stores.qdrant import QdrantVectorStore
|
from llama_index.vector_stores.qdrant import QdrantVectorStore
|
||||||
from qdrant_client import qdrant_client
|
from qdrant_client import qdrant_client
|
||||||
|
from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore
|
||||||
|
|
||||||
qclient = None
|
qclient = None
|
||||||
|
|
||||||
@@ -70,3 +71,13 @@ def get_vector_store(docType:str):
|
|||||||
raise ValueError(f"Invalid vector store type: {store_type}")
|
raise ValueError(f"Invalid vector store type: {store_type}")
|
||||||
|
|
||||||
return store
|
return store
|
||||||
|
|
||||||
|
|
||||||
|
def get_Neo4j_Graph_Store(docType:str):
|
||||||
|
neo4jStore = Neo4jPropertyGraphStore(
|
||||||
|
username= os.getenv('NEO4J_USERNAME'),
|
||||||
|
password= os.getenv('NEO4J_PASSWORD'),
|
||||||
|
url=os.getenv('NEO4J_URL'),
|
||||||
|
database= docType
|
||||||
|
)
|
||||||
|
return neo4jStore
|
||||||
Reference in New Issue
Block a user