import base64,os from typing import List from uuid import uuid4 import requests from app.settings import init_settings from app.engine.loaders import get_document_Types, get_documents,getFileCacahePath from app.engine.vectordb import get_vector_store from app.engine.generate import get_doc_store,run_pipeline,persist_storage import tempfile STORAGE_DIR = os.getenv("STORAGE_DIR", "storage") class FileLoadService: @staticmethod def store_and_parse_file(file_data): prjtoJson_url = os.getenv('PRJTOJSON_URL') convert_url = prjtoJson_url +'/prj_convert_clt2json' files ={'file':file_data} response1 = requests.post( url = convert_url, files=files ) load_url = prjtoJson_url +'/file_download' response2 = requests.post( url = load_url, data=response1.text ) tempFilePath:str = tempfile.gettempdir() + f"\\{str(uuid4())}.zip" with open(tempFilePath,'wb') as file: file.write(response2.content) prjID = str(uuid4()) filePath = getFileCacahePath() + f'/Projects/{prjID}' os.makedirs(filePath) import zipfile with zipfile.ZipFile(tempFilePath,'r') as zip_File: for zip_info in zip_File.infolist(): zip_info.filename = zip_info.filename.encode('cp437').decode('gbk') zip_File.extract(zip_info,filePath) os.remove(tempFilePath) return f'Projects_{prjID}' @staticmethod def process_file(base64_content: str) -> str: prjFlag = FileLoadService.store_and_parse_file(base64_content) #生成向量并持久化至本地 documents = get_documents(prjFlag) for doc in documents: doc.metadata["private"] = "false" docstore = get_doc_store(prjFlag) vector_store = get_vector_store(prjFlag) _ = run_pipeline(docstore, vector_store, documents) persist_storage(docstore, vector_store) return prjFlag