工程名称下拉项获取兼容.md文件,同时新增自定义答案合成类

This commit is contained in:
wanyaokun
2024-09-10 09:59:00 +08:00
parent 54f19a20fc
commit cb34fde995
6 changed files with 308 additions and 17 deletions
+37 -4
View File
@@ -6,6 +6,9 @@ from llama_index.core.readers.base import BaseReader
from llama_index.core.readers.json import JSONReader
from llama_parse import LlamaParse
from pydantic import BaseModel, validator
from app.engine.loaders.markdownReader import ChunkMarkdownReader
from app.engine.loaders.projectJson import ProjectJson
logger = logging.getLogger(__name__)
@@ -20,7 +23,6 @@ class FileLoaderConfig(BaseModel):
raise ValueError(f"Directory '{v}' does not exist")
return v
def llama_parse_parser():
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
raise ValueError(
@@ -35,7 +37,6 @@ def llama_parse_parser():
)
return parser
def llama_parse_extractor() -> Dict[str, LlamaParse]:
from llama_parse.utils import SUPPORTED_FILE_TYPES
@@ -43,8 +44,11 @@ def llama_parse_extractor() -> Dict[str, LlamaParse]:
return {file_type: parser for file_type in SUPPORTED_FILE_TYPES}
def llama_local_extractor() -> Dict[str, BaseReader]:
return {".json" : JSONReader(clean_json=False,levels_back=0)}
parser = {
".json" : JSONReader(clean_json=False,levels_back=0),
".md" : ChunkMarkdownReader(),
}
return parser
def get_file_documents(config: FileLoaderConfig,childPath: str):
from llama_index.core.readers import SimpleDirectoryReader
@@ -86,3 +90,32 @@ def get_file_documents(config: FileLoaderConfig,childPath: str):
else:
# Raise the error if it is not the case of empty data dir
raise e
def prjFileSuffix(dir:str):
entries = os.listdir(dir)
file_names = [entry for entry in entries if os.path.isfile(os.path.join(dir, entry))]
if len(file_names) > 0:
return os.path.splitext(file_names[0])[1]
return ''
def getProjectName(dir:str):
suffix = prjFileSuffix(dir)
if suffix== '.json':
prjJson = ProjectJson(dir)
prjJson.parse()
tb = prjJson.table('工程属性')
records = tb.records()
for record in records:
name = record.value('名称')
if name == '工程名称':
return record.value('')
elif suffix == '.md':
md_files = [f for f in os.listdir(dir) if f.endswith('.md')]
for md_file in md_files:
prjPath = os.path.join(dir, md_file)
basename = os.path.splitext(md_file)[0]
if basename =='工程属性':
rd = ChunkMarkdownReader()
rd.load_data(prjPath)
return rd.findValue("名称=='工程名称'",'')
return ''