更新环境变量配置,调整模型名称获取方式,新增Dify API相关配置,删除无用的脚本文件,优化意图识别逻辑,添加LLM提取词条逻辑

This commit is contained in:
2025-07-16 14:24:50 +08:00
parent 5e164882a1
commit a934f2c398
28 changed files with 1834 additions and 1235 deletions
@@ -0,0 +1,66 @@
import os
import json
import re
import sys
from dotenv import load_dotenv
load_dotenv()
sys.path.append(os.getcwd())
from rag2_0.dify.dify_client import DifyApi
soft_name_map = {
"配网造价软件知识(new)": "配网计价通D3软件",
"西藏造价软件知识(new)": "西藏计价通Z1软件",
"储能C1计价通软件知识(new)": "储能计价通C1软件",
"技改检修工程计价通T1软件知识(new)": "技改检修工程计价通T1软件",
"技改检修清单计价通T1软件知识(new)": "技改检修清单计价通T1软件",
"电力建设计价通(2018)软件知识(new)": "电力建设计价通软件",
"下载安装注册(new)": "下载安装注册",
}
soft_wiki_file_name = {
"配网计价通D3软件": ["配网计价通D3软件.txt", []],
"西藏计价通Z1软件": ["西藏计价通Z1软件.txt", []],
"储能计价通C1软件": ["储能计价通C1软件.txt", []],
"技改检修工程计价通T1软件": ["技改检修工程计价通T1软件.txt", []],
"技改检修清单计价通T1软件": ["技改检修清单计价通T1软件.txt", []],
"电力建设计价通软件": ["电力建设计价通软件.txt", []],
"下载安装注册": ["下载安装注册.txt", []],
}
def get_soft_wiki_titles(dify_api, soft_name_map, soft_wiki_file_name):
"""获取每个软件的wiki标题列表"""
dataset_list = dify_api.get_all_dataset_list()
soft_name_map_keys = list(soft_name_map.keys())
for dataset in dataset_list:
if dataset["name"] not in soft_name_map_keys:
continue
dataset_name = dataset["name"]
dataset_id = dataset["id"]
documents = dify_api.get_documents(dataset_id=dataset_id)
for document_id, doc_info in documents.items():
document_name = doc_info["name"]
wiki_name = document_name.split("/")[-1]
wiki_title = re.sub(r'^.*?|^\(.*?\)', '', wiki_name)
if wiki_title not in soft_wiki_file_name[soft_name_map[dataset_name]][1]:
soft_wiki_file_name[soft_name_map[dataset_name]][1].append(wiki_title)
return soft_wiki_file_name
def save_wiki_titles(soft_wiki_file_name, output_dir="data/wiki_data"):
"""将wiki标题列表保存到对应txt文件"""
os.makedirs(output_dir, exist_ok=True)
for soft_name, (txt_file_name, wiki_titles) in soft_wiki_file_name.items():
output_path = os.path.join(output_dir, txt_file_name)
with open(output_path, "w", encoding="utf-8") as f:
for title in wiki_titles:
f.write(title + "\n")
print(f"已保存 {soft_name} 的wiki标题列表到 {output_path},共 {len(wiki_titles)}")
def main():
dify_api = DifyApi()
wiki_titles = get_soft_wiki_titles(dify_api, soft_name_map, soft_wiki_file_name)
save_wiki_titles(wiki_titles)
if __name__ == "__main__":
main()