65 lines
2.8 KiB
Python
65 lines
2.8 KiB
Python
import os
|
||
import json
|
||
import re
|
||
import sys
|
||
from dotenv import load_dotenv
|
||
|
||
load_dotenv()
|
||
|
||
from rag2_0.dify.dify_client import DifyApi
|
||
|
||
soft_name_map = {
|
||
"配网造价软件知识(new)": "配网计价通D3软件",
|
||
"西藏造价软件知识(new)": "西藏计价通Z1软件",
|
||
"储能C1计价通软件知识(new)": "储能计价通C1软件",
|
||
"技改检修工程计价通T1软件知识(new)": "技改检修工程计价通T1软件",
|
||
"技改检修清单计价通T1软件知识(new)": "技改检修清单计价通T1软件",
|
||
"电力建设计价通(2018)软件知识(new)": "电力建设计价通软件",
|
||
"下载安装注册(new)": "下载安装注册",
|
||
}
|
||
|
||
soft_wiki_file_name = {
|
||
"配网计价通D3软件": ["配网计价通D3软件.txt", []],
|
||
"西藏计价通Z1软件": ["西藏计价通Z1软件.txt", []],
|
||
"储能计价通C1软件": ["储能计价通C1软件.txt", []],
|
||
"技改检修工程计价通T1软件": ["技改检修工程计价通T1软件.txt", []],
|
||
"技改检修清单计价通T1软件": ["技改检修清单计价通T1软件.txt", []],
|
||
"电力建设计价通软件": ["电力建设计价通软件.txt", []],
|
||
"下载安装注册": ["下载安装注册.txt", []],
|
||
}
|
||
|
||
def get_soft_wiki_titles(dify_api, soft_name_map, soft_wiki_file_name):
|
||
"""获取每个软件的wiki标题列表"""
|
||
dataset_list = dify_api.get_all_dataset_list()
|
||
soft_name_map_keys = list(soft_name_map.keys())
|
||
for dataset in dataset_list:
|
||
if dataset["name"] not in soft_name_map_keys:
|
||
continue
|
||
dataset_name = dataset["name"]
|
||
dataset_id = dataset["id"]
|
||
documents = dify_api.get_documents(dataset_id=dataset_id)
|
||
for document_id, doc_info in documents.items():
|
||
document_name = doc_info["name"]
|
||
wiki_name = document_name.split("/")[-1]
|
||
wiki_title = re.sub(r'^(.*?)|^\(.*?\)', '', wiki_name)
|
||
if wiki_title not in soft_wiki_file_name[soft_name_map[dataset_name]][1]:
|
||
soft_wiki_file_name[soft_name_map[dataset_name]][1].append(wiki_title)
|
||
return soft_wiki_file_name
|
||
|
||
def save_wiki_titles(soft_wiki_file_name, output_dir="data/wiki_data"):
|
||
"""将wiki标题列表保存到对应txt文件"""
|
||
os.makedirs(output_dir, exist_ok=True)
|
||
for soft_name, (txt_file_name, wiki_titles) in soft_wiki_file_name.items():
|
||
output_path = os.path.join(output_dir, txt_file_name)
|
||
with open(output_path, "w", encoding="utf-8") as f:
|
||
for title in wiki_titles:
|
||
f.write(title + "\n")
|
||
print(f"已保存 {soft_name} 的wiki标题列表到 {output_path},共 {len(wiki_titles)} 条")
|
||
|
||
def main():
|
||
dify_api = DifyApi()
|
||
wiki_titles = get_soft_wiki_titles(dify_api, soft_name_map, soft_wiki_file_name)
|
||
save_wiki_titles(wiki_titles)
|
||
|
||
if __name__ == "__main__":
|
||
main() |