import logging import yaml from app.engine.loaders.db import DBLoaderConfig, get_db_documents from app.engine.loaders.file import FileLoaderConfig, get_file_documents from app.engine.loaders.web import WebLoaderConfig, get_web_documents from app.engine.loaders.projectJson import getProjectName import os logger = logging.getLogger(__name__) def load_configs(): with open("config/loaders.yaml",encoding='utf-8') as f: configs = yaml.safe_load(f) return configs def path_difference(path1:str, path2:str): import os path1 = os.path.abspath(path1) path2 = os.path.abspath(path2) path1_parts = path1.split(os.path.sep) path2_parts = path2.split(os.path.sep) for i, part in enumerate(path1_parts): if part != path2_parts[i]: break else: i += 1 pathKey = '' for j in range(i,len(path2_parts)): pathKey+=path2_parts[j] + '_' return pathKey[0:-1] def getFileCacahePath(): rootPath = 'data' configs = load_configs() if configs is not None and len(configs.items()) > 0: for loader_type, loader_config in configs.items(): if loader_type == "file": rootPath = FileLoaderConfig(**loader_config).data_dir break return rootPath def get_document_Types(): rootPath = getFileCacahePath() types = [] dirStack = [rootPath] while len(dirStack) > 0: curDir = dirStack.pop() dirs = [os.path.join(curDir, d) for d in os.listdir(curDir) if os.path.isdir(os.path.join(curDir, d))] if len(dirs) > 0: for dir in dirs: dirStack.append(dir) else: types.append(path_difference(rootPath,curDir)) return types def getProjectInfos(): config = load_configs() if config is None or len(config.items()) == 0: return None prjDir = None for loader_type, loader_config in config.items(): if loader_config.get('enable', True): loader_config = loader_config or [] config = FileLoaderConfig(**loader_config) prjDir = config.data_dir break if prjDir is None: return None prjInfos = [] prjFlags = get_document_Types() for prjFlag in prjFlags: fileDir = os.path.join(config.data_dir,prjFlag.replace('_','\\')) prjInfo = {} prjInfo['flag'] = prjFlag prjInfo['name'] = getProjectName(fileDir) prjInfos.append(prjInfo) return prjInfos def get_documents(docType:str): documents = [] config = load_configs() if config is None or len(config.items()) == 0: return documents for loader_type, loader_config in config.items(): if loader_config.get('enable', True): # 检查 enable 字段 logger.info( f"Loading documents from loader: {loader_type}, config: {loader_config}" ) loader_config = loader_config or [] match loader_type: case "file": document = get_file_documents(FileLoaderConfig(**loader_config),docType) case "web": document = get_web_documents(WebLoaderConfig(**loader_config)) case "db": document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config]) case _: raise ValueError(f"Invalid loader type: {loader_type}") documents.extend(document)