Files
DM_rewrite_3.31/kg_management.py
2025-04-03 17:23:53 +08:00

56 lines
2.4 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
===================================
@AutherWenZ
@Company: BooWay
@projectbooway_dm
===================================
"""
from vector_load import Mixed_retrieval, interface_search
xizang_input_path = '../data/temp/Z1_keywords_software_usage.txt'
retriever_txt_faiss1, retriever_txt_faiss2, retriever_txt_faiss3 = Mixed_retrieval(xizang_input_path)
cuceng_input_path = '../data/temp/C1_keywords_software_usage.txt'
retriever_txt_faiss4, retriever_txt_faiss5, retriever_txt_faiss6 = Mixed_retrieval(cuceng_input_path)
jigai_input_path = '../data/temp/T1_keywords_software_usage.txt'
retriever_txt_faiss7, retriever_txt_faiss8, retriever_txt_faiss9 = Mixed_retrieval(jigai_input_path)
peiwang_input_path = '../data/temp/D3_keywords_software_usage.txt'
retriever_txt_faiss10, retriever_txt_faiss11, retriever_txt_faiss12 = Mixed_retrieval(peiwang_input_path)
from vector_load import Building_search_dictionary, Official_website_kg_search
input_index_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/data_index.csv"
xizang_input_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/西藏造价FAQ数据集.csv"
cuceng_input_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/新型储能电站建设计价通C1.csv"
jigai_input_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/技改检修计价通T1.csv"
peiwang_input_csv_path = "/data/Z_project/rahulnyk/DM_lab/data/temp/博微配网工程计价通D3软件.csv"
def process_domain_category(nlu_info, retrievers, input_csv_path, index_csv_path):
"""
处理不同领域类别的检索逻辑。
:param nlu_info: 包含领域类别和检索关键词的对象
:param retrievers: 用于检索的 FAISS 索引列表
:param input_csv_path: 领域特定的 CSV 输入路径
:param index_csv_path: 索引 CSV 路径
:return: 处理后的 QA RAG 结果列表
"""
index_keywords = interface_search(nlu_info.retrieve_keywords, *retrievers)
qa_rag = []
for keyword in index_keywords:
# todo: bug修改: 避免output_id为None情况
output_path, output_id = Building_search_dictionary(input_csv_path, index_csv_path, keyword)
if output_path is not None or output_id is not None:
qa_rag.append(f"检索知识:{output_path}")
qa_rag.append(Official_website_kg_search(output_id))
else:
continue
return qa_rag