实现多工程数据存储支持
This commit is contained in:
@@ -13,8 +13,48 @@ def load_configs():
|
||||
configs = yaml.safe_load(f)
|
||||
return configs
|
||||
|
||||
def path_difference(path1:str, path2:str):
|
||||
import os
|
||||
path1 = os.path.abspath(path1)
|
||||
path2 = os.path.abspath(path2)
|
||||
|
||||
def get_documents():
|
||||
path1_parts = path1.split(os.path.sep)
|
||||
path2_parts = path2.split(os.path.sep)
|
||||
|
||||
for i, part in enumerate(path1_parts):
|
||||
if part != path2_parts[i]:
|
||||
break
|
||||
else:
|
||||
i += 1
|
||||
|
||||
pathKey = ''
|
||||
for j in range(i,len(path2_parts)):
|
||||
pathKey+=path2_parts[j] + '_'
|
||||
return pathKey[0:-1]
|
||||
|
||||
def get_document_Types():
|
||||
import os
|
||||
rootPath = 'data'
|
||||
configs = load_configs()
|
||||
if configs is not None and len(configs.items()) > 0:
|
||||
for loader_type, loader_config in configs.items():
|
||||
if loader_type == "file":
|
||||
rootPath = FileLoaderConfig(**loader_config).data_dir
|
||||
break
|
||||
|
||||
types = []
|
||||
dirStack = [rootPath]
|
||||
while len(dirStack) > 0:
|
||||
curDir = dirStack.pop()
|
||||
dirs = [os.path.join(curDir, d) for d in os.listdir(curDir) if os.path.isdir(os.path.join(curDir, d))]
|
||||
if len(dirs) > 0:
|
||||
for dir in dirs:
|
||||
dirStack.append(dir)
|
||||
else:
|
||||
types.append(path_difference(rootPath,curDir))
|
||||
return types
|
||||
|
||||
def get_documents(docType:str):
|
||||
documents = []
|
||||
config = load_configs()
|
||||
if config is None or len(config.items()) == 0:
|
||||
@@ -28,7 +68,7 @@ def get_documents():
|
||||
loader_config = loader_config or []
|
||||
match loader_type:
|
||||
case "file":
|
||||
document = get_file_documents(FileLoaderConfig(**loader_config))
|
||||
document = get_file_documents(FileLoaderConfig(**loader_config),docType)
|
||||
case "web":
|
||||
document = get_web_documents(WebLoaderConfig(**loader_config))
|
||||
case "db":
|
||||
|
||||
@@ -20,7 +20,6 @@ class FileLoaderConfig(BaseModel):
|
||||
raise ValueError(f"Directory '{v}' does not exist")
|
||||
return v
|
||||
|
||||
|
||||
def llama_parse_parser():
|
||||
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
|
||||
raise ValueError(
|
||||
@@ -35,7 +34,6 @@ def llama_parse_parser():
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def llama_parse_extractor() -> Dict[str, LlamaParse]:
|
||||
from llama_parse.utils import SUPPORTED_FILE_TYPES
|
||||
|
||||
@@ -45,8 +43,7 @@ def llama_parse_extractor() -> Dict[str, LlamaParse]:
|
||||
def llama_local_extractor() -> Dict[str, BaseReader]:
|
||||
return {"json" : JSONReader}
|
||||
|
||||
|
||||
def get_file_documents(config: FileLoaderConfig):
|
||||
def get_file_documents(config: FileLoaderConfig, childPath: str):
|
||||
from llama_index.core.readers import SimpleDirectoryReader
|
||||
|
||||
try:
|
||||
@@ -63,7 +60,7 @@ def get_file_documents(config: FileLoaderConfig):
|
||||
file_extractor = llama_local_extractor()
|
||||
|
||||
reader = SimpleDirectoryReader(
|
||||
config.data_dir,
|
||||
os.path.join(config.data_dir,childPath.replace('_','\\')),
|
||||
recursive=True,
|
||||
filename_as_id=True,
|
||||
raise_on_error=True,
|
||||
|
||||
Reference in New Issue
Block a user