初始化提交

This commit is contained in:
2024-08-13 09:37:23 +08:00
parent 4923337038
commit e112fa4e44
50 changed files with 1649 additions and 259 deletions
+9
View File
@@ -1,6 +1,9 @@
import os
import logging
from typing import Dict
from llama_index.core.readers.base import BaseReader
from llama_index.core.readers.json import JSONReader
from llama_parse import LlamaParse
from pydantic import BaseModel, validator
@@ -39,6 +42,9 @@ def llama_parse_extractor() -> Dict[str, LlamaParse]:
parser = llama_parse_parser()
return {file_type: parser for file_type in SUPPORTED_FILE_TYPES}
def llama_local_extractor() -> Dict[str, BaseReader]:
return {"json" : JSONReader}
def get_file_documents(config: FileLoaderConfig):
from llama_index.core.readers import SimpleDirectoryReader
@@ -53,6 +59,9 @@ def get_file_documents(config: FileLoaderConfig):
nest_asyncio.apply()
file_extractor = llama_parse_extractor()
else:
file_extractor = llama_local_extractor()
reader = SimpleDirectoryReader(
config.data_dir,
recursive=True,