更新环境变量配置,调整模型名称获取方式,新增Dify API相关配置,删除无用的脚本文件,优化意图识别逻辑,添加LLM提取词条逻辑
This commit is contained in:
@@ -0,0 +1,66 @@
|
||||
import os
|
||||
import json
|
||||
import re
|
||||
import sys
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
sys.path.append(os.getcwd())
|
||||
|
||||
from rag2_0.dify.dify_client import DifyApi
|
||||
|
||||
soft_name_map = {
|
||||
"配网造价软件知识(new)": "配网计价通D3软件",
|
||||
"西藏造价软件知识(new)": "西藏计价通Z1软件",
|
||||
"储能C1计价通软件知识(new)": "储能计价通C1软件",
|
||||
"技改检修工程计价通T1软件知识(new)": "技改检修工程计价通T1软件",
|
||||
"技改检修清单计价通T1软件知识(new)": "技改检修清单计价通T1软件",
|
||||
"电力建设计价通(2018)软件知识(new)": "电力建设计价通软件",
|
||||
"下载安装注册(new)": "下载安装注册",
|
||||
}
|
||||
|
||||
soft_wiki_file_name = {
|
||||
"配网计价通D3软件": ["配网计价通D3软件.txt", []],
|
||||
"西藏计价通Z1软件": ["西藏计价通Z1软件.txt", []],
|
||||
"储能计价通C1软件": ["储能计价通C1软件.txt", []],
|
||||
"技改检修工程计价通T1软件": ["技改检修工程计价通T1软件.txt", []],
|
||||
"技改检修清单计价通T1软件": ["技改检修清单计价通T1软件.txt", []],
|
||||
"电力建设计价通软件": ["电力建设计价通软件.txt", []],
|
||||
"下载安装注册": ["下载安装注册.txt", []],
|
||||
}
|
||||
|
||||
def get_soft_wiki_titles(dify_api, soft_name_map, soft_wiki_file_name):
|
||||
"""获取每个软件的wiki标题列表"""
|
||||
dataset_list = dify_api.get_all_dataset_list()
|
||||
soft_name_map_keys = list(soft_name_map.keys())
|
||||
for dataset in dataset_list:
|
||||
if dataset["name"] not in soft_name_map_keys:
|
||||
continue
|
||||
dataset_name = dataset["name"]
|
||||
dataset_id = dataset["id"]
|
||||
documents = dify_api.get_documents(dataset_id=dataset_id)
|
||||
for document_id, doc_info in documents.items():
|
||||
document_name = doc_info["name"]
|
||||
wiki_name = document_name.split("/")[-1]
|
||||
wiki_title = re.sub(r'^(.*?)|^\(.*?\)', '', wiki_name)
|
||||
if wiki_title not in soft_wiki_file_name[soft_name_map[dataset_name]][1]:
|
||||
soft_wiki_file_name[soft_name_map[dataset_name]][1].append(wiki_title)
|
||||
return soft_wiki_file_name
|
||||
|
||||
def save_wiki_titles(soft_wiki_file_name, output_dir="data/wiki_data"):
|
||||
"""将wiki标题列表保存到对应txt文件"""
|
||||
os.makedirs(output_dir, exist_ok=True)
|
||||
for soft_name, (txt_file_name, wiki_titles) in soft_wiki_file_name.items():
|
||||
output_path = os.path.join(output_dir, txt_file_name)
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
for title in wiki_titles:
|
||||
f.write(title + "\n")
|
||||
print(f"已保存 {soft_name} 的wiki标题列表到 {output_path},共 {len(wiki_titles)} 条")
|
||||
|
||||
def main():
|
||||
dify_api = DifyApi()
|
||||
wiki_titles = get_soft_wiki_titles(dify_api, soft_name_map, soft_wiki_file_name)
|
||||
save_wiki_titles(wiki_titles)
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user