上传文件至 /

上传文件
This commit is contained in:
2025-04-03 17:23:53 +08:00
parent d0261c5997
commit c152fb8714
5 changed files with 1188 additions and 424 deletions
+55 -51
View File
@@ -1,51 +1,55 @@
"""
===================================
@AutherWenZ
@Company: BooWay
@projectbooway_dm
===================================
"""
from vector_load import Mixed_retrieval, interface_search
xizang_input_path = '../data/temp/Z1_keywords_software_usage.txt'
retriever_txt_faiss1, retriever_txt_faiss2, retriever_txt_faiss3 = Mixed_retrieval(xizang_input_path)
cuceng_input_path = '../data/temp/C1_keywords_software_usage.txt'
retriever_txt_faiss4, retriever_txt_faiss5, retriever_txt_faiss6 = Mixed_retrieval(cuceng_input_path)
jigai_input_path = '../data/temp/T1_keywords_software_usage.txt'
retriever_txt_faiss7, retriever_txt_faiss8, retriever_txt_faiss9 = Mixed_retrieval(jigai_input_path)
from vector_load import Building_search_dictionary, Official_website_kg_search
input_index_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/data_index.csv"
xizang_input_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/西藏造价FAQ数据集.csv"
cuceng_input_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/新型储能电站建设计价通C1.csv"
jigai_input_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/技改检修计价通T1.csv"
def process_domain_category(nlu_info, retrievers, input_csv_path, index_csv_path):
"""
处理不同领域类别的检索逻辑。
:param nlu_info: 包含领域类别和检索关键词的对象
:param retrievers: 用于检索的 FAISS 索引列表
:param input_csv_path: 领域特定的 CSV 输入路径
:param index_csv_path: 索引 CSV 路径
:return: 处理后的 QA RAG 结果列表
"""
index_keywords = interface_search(nlu_info.retrieve_keywords, *retrievers)
qa_rag = []
for keyword in index_keywords:
# todo: bug修改: 避免output_id为None情况
output_path, output_id = Building_search_dictionary(input_csv_path, index_csv_path, keyword)
if output_path is not None or output_id is not None:
qa_rag.append(f"检索知识:{output_path}")
qa_rag.append(Official_website_kg_search(output_id))
else:
continue
return qa_rag
"""
===================================
@AutherWenZ
@Company: BooWay
@projectbooway_dm
===================================
"""
from vector_load import Mixed_retrieval, interface_search
xizang_input_path = '../data/temp/Z1_keywords_software_usage.txt'
retriever_txt_faiss1, retriever_txt_faiss2, retriever_txt_faiss3 = Mixed_retrieval(xizang_input_path)
cuceng_input_path = '../data/temp/C1_keywords_software_usage.txt'
retriever_txt_faiss4, retriever_txt_faiss5, retriever_txt_faiss6 = Mixed_retrieval(cuceng_input_path)
jigai_input_path = '../data/temp/T1_keywords_software_usage.txt'
retriever_txt_faiss7, retriever_txt_faiss8, retriever_txt_faiss9 = Mixed_retrieval(jigai_input_path)
peiwang_input_path = '../data/temp/D3_keywords_software_usage.txt'
retriever_txt_faiss10, retriever_txt_faiss11, retriever_txt_faiss12 = Mixed_retrieval(peiwang_input_path)
from vector_load import Building_search_dictionary, Official_website_kg_search
input_index_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/data_index.csv"
xizang_input_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/西藏造价FAQ数据集.csv"
cuceng_input_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/新型储能电站建设计价通C1.csv"
jigai_input_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/技改检修计价通T1.csv"
peiwang_input_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/博微配网工程计价通D3软件.csv"
def process_domain_category(nlu_info, retrievers, input_csv_path, index_csv_path):
"""
处理不同领域类别的检索逻辑。
:param nlu_info: 包含领域类别和检索关键词的对象
:param retrievers: 用于检索的 FAISS 索引列表
:param input_csv_path: 领域特定的 CSV 输入路径
:param index_csv_path: 索引 CSV 路径
:return: 处理后的 QA RAG 结果列表
"""
index_keywords = interface_search(nlu_info.retrieve_keywords, *retrievers)
qa_rag = []
for keyword in index_keywords:
# todo: bug修改: 避免output_id为None情况
output_path, output_id = Building_search_dictionary(input_csv_path, index_csv_path, keyword)
if output_path is not None or output_id is not None:
qa_rag.append(f"检索知识:{output_path}")
qa_rag.append(Official_website_kg_search(output_id))
else:
continue
return qa_rag