Files
zjdataai-app/backend/app/engine/loaders/__init__.py
T
2024-09-05 10:50:31 +08:00

111 lines
3.5 KiB
Python

import logging
import yaml
from app.engine.loaders.db import DBLoaderConfig, get_db_documents
from app.engine.loaders.file import FileLoaderConfig, get_file_documents
from app.engine.loaders.web import WebLoaderConfig, get_web_documents
from app.engine.loaders.projectJson import getProjectName
import os
logger = logging.getLogger(__name__)
def load_configs():
with open("config/loaders.yaml",encoding='utf-8') as f:
configs = yaml.safe_load(f)
return configs
def path_difference(path1:str, path2:str):
import os
path1 = os.path.abspath(path1)
path2 = os.path.abspath(path2)
path1_parts = path1.split(os.path.sep)
path2_parts = path2.split(os.path.sep)
for i, part in enumerate(path1_parts):
if part != path2_parts[i]:
break
else:
i += 1
pathKey = ''
for j in range(i,len(path2_parts)):
pathKey+=path2_parts[j] + '_'
return pathKey[0:-1]
def getFileCacahePath():
rootPath = 'data'
configs = load_configs()
if configs is not None and len(configs.items()) > 0:
for loader_type, loader_config in configs.items():
if loader_type == "file":
rootPath = FileLoaderConfig(**loader_config).data_dir
break
return rootPath
def get_document_Types():
rootPath = getFileCacahePath()
types = []
dirStack = [rootPath]
while len(dirStack) > 0:
curDir = dirStack.pop()
dirs = [os.path.join(curDir, d) for d in os.listdir(curDir) if os.path.isdir(os.path.join(curDir, d))]
if len(dirs) > 0:
for dir in dirs:
dirStack.append(dir)
else:
types.append(path_difference(rootPath,curDir))
return types
def getProjectInfos():
config = load_configs()
if config is None or len(config.items()) == 0:
return None
prjDir = None
for loader_type, loader_config in config.items():
if loader_config.get('enable', True):
loader_config = loader_config or []
config = FileLoaderConfig(**loader_config)
prjDir = config.data_dir
break
if prjDir is None:
return None
prjInfos = []
prjFlags = get_document_Types()
for prjFlag in prjFlags:
fileDir = os.path.join(config.data_dir,prjFlag.replace('_','\\'))
prjInfo = {}
prjInfo['flag'] = prjFlag
prjInfo['name'] = getProjectName(fileDir)
prjInfos.append(prjInfo)
return prjInfos
def get_documents(docType:str):
documents = []
config = load_configs()
if config is None or len(config.items()) == 0:
return documents
for loader_type, loader_config in config.items():
if loader_config.get('enable', True): # 检查 enable 字段
logger.info(
f"Loading documents from loader: {loader_type}, config: {loader_config}"
)
loader_config = loader_config or []
match loader_type:
case "file":
document = get_file_documents(FileLoaderConfig(**loader_config),docType)
case "web":
document = get_web_documents(WebLoaderConfig(**loader_config))
case "db":
document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config])
case _:
raise ValueError(f"Invalid loader type: {loader_type}")
documents.extend(document)
return documents