3.31 上传 dm rewrite

This commit is contained in:
Zdao032
2025-03-31 15:17:47 +08:00
commit b444310280
430 changed files with 39039 additions and 0 deletions
Binary file not shown.
+87
View File
@@ -0,0 +1,87 @@
# coding:utf-8
# @Time : 2024/9/5 上午11:15
# @Author : ouyangyouzhang
# @FileName : Dialog.py
# @Describe :
import sys
from PyQt5.QtWidgets import QApplication, QDialog, QVBoxLayout, QLineEdit, QPushButton, QLabel, QHBoxLayout, QDateEdit, \
QMessageBox
from PyQt5.QtCore import QDate
from main import export_by_conversation_id, export_by_data
class ExportDialog(QDialog):
def __init__(self):
super().__init__()
self.setWindowTitle("导出对话框")
self.setFixedSize(400, 150)
# self.setGeometry(100, 100, 400, 100)
layout = QVBoxLayout()
# 添加新的布局
self.appid_layout = QHBoxLayout()
# 添加标签
self.appid_label = QLabel("AppID:")
self.appid_layout.addWidget(self.appid_label)
# 添加编辑框
self.appid_input = QLineEdit(self)
self.appid_layout.addWidget(self.appid_input)
# 将新布局添加到主布局
layout.addLayout(self.appid_layout)
# 通过会话ID导出部分
self.id_label = QLabel("通过会话ID导出")
self.id_input = QLineEdit(self)
self.id_export_button = QPushButton("导出", self)
self.id_export_button.clicked.connect(self.export_by_id)
id_layout = QHBoxLayout()
id_layout.addWidget(self.id_input)
id_layout.addWidget(self.id_export_button)
layout.addWidget(self.id_label)
layout.addLayout(id_layout)
# 通过日期导出部分
self.date_label = QLabel("通过日期导出")
self.date_input = QDateEdit(self)
self.date_input.setCalendarPopup(True)
self.date_input.setDate(QDate.currentDate()) # 设置为当前日期
self.date_export_button = QPushButton("导出", self)
self.date_export_button.clicked.connect(self.export_by_date)
date_layout = QHBoxLayout()
date_layout.addWidget(self.date_input)
date_layout.addWidget(self.date_export_button)
layout.addWidget(self.date_label)
layout.addLayout(date_layout)
self.setLayout(layout)
def export_by_id(self):
session_id = self.id_input.text()
session_id = session_id.strip()
if session_id:
export_by_conversation_id(session_id, self.appid_input.text().strip())
QMessageBox.information(self, "完成", "导出成功!")
else:
print("请输入会话ID")
def export_by_date(self):
selected_date = self.date_input.date().toString("yyyy-MM-dd")
export_by_data(selected_date, self.appid_input.text().strip())
QMessageBox.information(self, "完成", "导出成功!")
if __name__ == "__main__":
app = QApplication(sys.argv)
dialog = ExportDialog()
dialog.show()
sys.exit(app.exec_())
+142
View File
@@ -0,0 +1,142 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 5,
"id": "e8f39ebb-71ab-4389-8bc3-29577470f948",
"metadata": {},
"outputs": [],
"source": [
"from WikijsTool import WikijsTool"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "0a6bde6e-5507-48d9-8e64-d804f6085723",
"metadata": {},
"outputs": [],
"source": [
"info = WikijsTool.get_all_documents()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "009c3e8d-6ff6-4e0b-83b8-740ed195b5c5",
"metadata": {},
"outputs": [],
"source": [
"html_text = WikijsTool.query_doc_info(8663)['content']"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "621bb76a-aa5c-4f57-8574-c852f24b64e3",
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"\n",
"cleaned_img_text = re.sub(r'<img\\s+[^>]*>', '', html_text)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "941e1e3b-7b8e-47f1-96ff-fa89898a1dd7",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'<h1>使用场景</h1>\\n<p>组合件或组合件下的消耗量想要修改所属项目划分</p>\\n<h1>功能入口</h1>\\n<p>【组合件】界面-——“组合件列表”页签-——选择已录入消耗量的组合件或组合件下的消耗量,鼠标右键-——选择”设置所属项目“。</p>\\n<figure class=\"image\">\\n <figcaption>设置所属项目划分</figcaption>\\n</figure>\\n<h1>操作步骤</h1>\\n<p>1.设置所属项目划分</p>\\n<p>方法一:【组合件】界面——“组合件列表”页签-——选择已录入消耗量的组合件或组合件下的消耗量,鼠标右键——选择”设置所属项目划分“,在弹窗中选择项目划分,点击确定;</p>\\n<figure class=\"image\">\\n <figcaption>批量设置</figcaption>\\n</figure>\\n<p>方法二:对于组合件下单条工程量,在此工程量的所属项目划分列双击,在弹窗中选择项目划分,点击确定;</p>\\n<figure class=\"image\">\\n <figcaption>单条设置</figcaption>\\n</figure>\\n<h1>内部补充</h1>\\n<p>1.如工程量下方还有子级消耗量,需选择到父级消耗量进行操作,子级工程量的“设置所属项目”为灰色不可选。</p>\\n<figure class=\"image\">\\n <figcaption>子级不可设置</figcaption>\\n</figure>\\n<p>&nbsp;</p>\\n<p>&nbsp;</p>\\n'"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cleaned_img_text"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "13973a4a-1f19-4b4d-b3c8-441d69e0091b",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"使用场景\n",
"组合件或组合件下的消耗量想要修改所属项目划分\n",
"功能入口\n",
"【组合件】界面-——“组合件列表”页签-——选择已录入消耗量的组合件或组合件下的消耗量,鼠标右键-——选择”设置所属项目“。\n",
"\n",
"设置所属项目划分\n",
"\n",
"操作步骤\n",
"1.设置所属项目划分\n",
"方法一:【组合件】界面——“组合件列表”页签-——选择已录入消耗量的组合件或组合件下的消耗量,鼠标右键——选择”设置所属项目划分“,在弹窗中选择项目划分,点击确定;\n",
"\n",
"批量设置\n",
"\n",
"方法二:对于组合件下单条工程量,在此工程量的所属项目划分列双击,在弹窗中选择项目划分,点击确定;\n",
"\n",
"单条设置\n",
"\n",
"内部补充\n",
"1.如工程量下方还有子级消耗量,需选择到父级消耗量进行操作,子级工程量的“设置所属项目”为灰色不可选。\n",
"\n",
"子级不可设置\n",
"\n",
" \n",
" \n",
"\n"
]
}
],
"source": [
"from bs4 import BeautifulSoup\n",
"\n",
"soup = BeautifulSoup(cleaned_img_text, \"html.parser\")\n",
"plain_text = soup.get_text()\n",
"print(plain_text)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "2802c4bd-34a7-4c61-bb15-4dd9739ecc1d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "dify_lab",
"language": "python",
"name": "dify_lab"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+183
View File
@@ -0,0 +1,183 @@
# import pandas as pd
#
# # 读取 CSV 文件
# csv_filename = "info_dify.csv"
# md_filename = "output.md"
#
# # 读取数据
# df = pd.read_csv(csv_filename)
#
# # 处理 path 列,去除前后 /
# df['path'] = df['path'].str.strip('/')
#
# # 构建树结构
# tree = {}
#
# def insert_path(tree, levels):
# """ 递归插入路径到树形结构,确保相同层级合并 """
# if not levels:
# return
# key = levels[0]
# if key not in tree:
# tree[key] = {}
# insert_path(tree[key], levels[1:])
#
# # 遍历 DataFrame 的 path 列
# for path in df['path']:
# levels = path.split('/') # 拆分层级
# insert_path(tree, levels) # 插入到树结构
#
# def generate_md(tree, level=1):
# """ 递归生成 Markdown 文本,合并相同路径 """
# md_text = []
# for key in sorted(tree.keys()): # 确保有序输出
# md_text.append(f"{'#' * level} {key}") # 根据层级添加 `#`
# md_text.extend(generate_md(tree[key], level + 1)) # 递归生成子项
# return md_text
#
# # 生成 Markdown 内容
# md_content = generate_md(tree)
#
# # 保存到 Markdown 文件
# with open(md_filename, mode='w', encoding='utf-8') as md_file:
# md_file.write("\n".join(md_content))
#
# print(f"Markdown 文件已保存为 {md_filename}")
###################################################################################################################
# import re
# import pandas as pd
#
# # 读取 CSV 文件
# input_file = 'info_data.csv' # 原始 CSV 文件路径
# output_file = 'info_data_cleaned.csv' # 处理后保存的 CSV 文件路径
#
# # 加载 CSV 到 DataFrame
# df = pd.read_csv(input_file, encoding='utf-8')
#
# # 检查 'title' 列是否存在
# if 'title' not in df.columns:
# raise ValueError("CSV 文件中没有找到 'title' 列,请检查文件内容")
#
# # 定义正则表达式:匹配括号及其中的内容
# pattern = re.compile(r'[()()].*?[()()]')
#
# # 遍历每一行,处理 'title' 列
# def clean_title(title):
# # 转换为 str 并删除括号内容
# cleaned_title = re.sub(pattern, '', str(title))
# # 去除多余空格
# return cleaned_title.strip()
#
# # 更新 'title' 列
# df['title'] = df['title'].apply(clean_title)
#
# # 保存到新的 CSV 文件
# df.to_csv(output_file, index=False, encoding='utf-8')
#
# print(f"处理完成!已保存到:{output_file}")
##################################################################################################################
# import pandas as pd
#
# # 读取 CSV 文件
# file_path = 'info_data_cleaned.csv' # 请替换为你的 CSV 文件路径
# df = pd.read_csv(file_path, encoding='utf-8')
#
# # 检查是否包含 'path' 列
# if 'path' not in df.columns:
# raise ValueError("CSV 文件中未找到 'path' 列,请检查文件内容。")
#
# # 将 'path' 列按 '/' 分割,并展开为多列
# split_columns = df['path'].str.split('/', expand=True)
#
# # 重命名列名为 title1, title2, ..., titlen
# split_columns.columns = [f'title{i+1}' for i in range(split_columns.shape[1])]
#
# # 合并原 DataFrame 和新拆分的列
# df = pd.concat([df, split_columns], axis=1)
#
# # 保存结果到新 CSV 文件
# output_file = 'info_data_cleaned_split.csv'
# df.to_csv(output_file, index=False, encoding='utf-8')
#
# print(f"处理完成,结果已保存到 {output_file}")
##################################################################################################################
# import re
# import pandas as pd
#
# # 读取 CSV 文件
# input_file = 'info_data_cleaned_split.csv' # 原始 CSV 文件路径
# output_file = 'info_data_cleaned_split2.csv' # 处理后保存的 CSV 文件路径
#
# # 加载 CSV 到 DataFrame
# df = pd.read_csv(input_file, encoding='utf-8')
#
# # 定义正则表达式:匹配括号及其中的内容
# pattern = re.compile(r'[()()].*?[()()]')
#
# # 清洗函数:删除括号及其中内容,并去除多余空格
# def clean_text(text):
# # 转换为 str 并删除括号内容
# cleaned_text = re.sub(pattern, '', str(text))
# # 去除多余空格
# return cleaned_text.strip()
#
# # 从第2列开始遍历并清洗
# for col in df.columns[1:]:
# df[col] = df[col].apply(clean_text)
#
# # 保存到新的 CSV 文件
# df.to_csv(output_file, index=False, encoding='utf-8')
#
# print(f"处理完成!已保存到:{output_file}")
#####################################################################################################################
# import pandas as pd
#
# # 加载CSV文件
# file_path = 'info_data_cleaned_split2.csv' # 请修改为你的文件路径
# df = pd.read_csv(file_path, encoding='utf-8')
#
# # 定义新列名
# new_column = 'Previous_Row'
#
# # 初始化新列
# df[new_column] = None
#
# # 遍历每一行
# for i in range(1, len(df)):
# # 检查每一列是否为 NaN
# if df.iloc[i].isna().any():
# # 将前一行的内容放入新列中
# df.at[i, new_column] = df.iloc[i-1].to_dict()
#
# # 输出处理后的 DataFrame
# print(df)
#
# # 如果需要保存结果到新CSV
# df.to_csv('info_data_cleaned_split3.csv', index=False, encoding='utf-8')
import pandas as pd
# 读取 CSV 文件
input_file = "info_dify_mini.csv" # 原始 CSV 文件
output_file = "info_dify_mini_2.csv" # 过滤后的新 CSV 文件
# 读取数据
df = pd.read_csv(input_file, encoding='utf-8')
# 过滤包含'费'字的行(假设'描述类'是列名)
filtered_df = df[df['描述类'].astype(str).str.contains('', na=False)]
# 保存到新 CSV 文件
filtered_df.to_csv(output_file, index=False, encoding='utf-8')
print(f"筛选完成,共找到 {len(filtered_df)} 行数据,并已保存到 {output_file}")
+194
View File
@@ -0,0 +1,194 @@
import os.path
import requests
import json
import time
from pathlib import Path
class WikijsTool:
BASE_URL = "http://10.1.16.39:8090/graphql"
HEADERS = {
"Authorization": "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcGkiOjcsImdycCI6MSwiaWF"
"0IjoxNzIzMDIwNzg4LCJleHAiOjE4MTc2OTM1ODgsImF1ZCI6InVybjp3aWtpLmpzIiwiaX"
"NzIjoidXJuOndpa2kuanMifQ.NSfE4tB7tkN8yapAs0CgkR-Yll6wc3gO3QGKMAv-TlGxx6A-9fJRmkwhRDTVMj_yPVG6"
"NXVy_AZpJtLapRXFGn0cvscsRJxq3fY1KgEyt8wO99jvd8DpNHpHhAIgrtyDelmHsBD2Wb5Ib3WJFsWC6d8Yhm9dkpx6tZ"
"vMAlFIKOg6UodMoMIry3YWiPGLaqJPQ0gcKmcnB2tC7sPXIIZnvfb5912GVM0n-4wvWobQnb_tXQuYZf99wH_leXjC_7BK8"
"8JSaAmB980i3rBxfejmaJ8E6D48zRxwwPFa0veVjjzRkVqHPwAjl1CXb2HE29pGtNmSEE1kLQVqOZD_ibOwKQ"
}
def __init__(self):
pass
@staticmethod
def init_url():
# 获取当前文件的路径
file_path = Path(__file__).resolve()
file_path = os.path.join(file_path.parent, 'wikiconfig.json')
if not os.path.exists(file_path):
return False
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
if 'url' in data:
WikijsTool.BASE_URL = data['url']
if 'Authorization' in data:
WikijsTool.HEADERS['Authorization'] = data['Authorization']
return True
@staticmethod
def get_all_documents() -> list[dict]:
query = """
query Pages {
pages {
list {
path
locale
title
contentType
id
isPublished
}
}
}
"""
# 构建请求数据
data = {
'query': query,
}
# 发送 POST 请求
response = requests.post(WikijsTool.BASE_URL, headers=WikijsTool.HEADERS, json=data)
if response.status_code == 200:
# 解析数据
list_info = json.loads(response.content)['data']['pages']['list']
return [item for item in list_info]
else:
raise ValueError(f"获取文档列表失败,原因:“{response.text}")
@staticmethod
def get_all_doc_by_path(path: str, add_path_end: bool = True) -> list[dict]:
list_document = WikijsTool.get_all_documents()
all_document_list = []
if add_path_end:
temp_path = path + '/'
else:
temp_path = path
for document_info in list_document:
document_path = str(document_info["path"])
# 根据路径过滤出对应的所有文档
if not document_path.startswith(temp_path):
continue
all_document_list.append(document_info)
return all_document_list
ill_char = ['+', '.', '?', "%", '#', '&', '=', '<', '>', '"', '{', '}', '|', '^', '~', '[', ']', ' ']
@staticmethod
def judge_path_is_ill(path) -> (bool, str):
# 判断路径是否非法
set1 = set(WikijsTool.ill_char)
set2 = set(path)
# 判断两个集合是否有交集
if bool(set1 & set2):
new_list = WikijsTool.ill_char[:]
new_list.pop()
error = ', '.join(new_list) + ", 空格。"
return False, f"路径中包含非法字符,非法字符包括:{error}"
return True, ""
@staticmethod
def search_document(query_str: str) -> list[dict]:
graphql_query = f"""
query Pages {{
pages {{
search(query: "{query_str}") {{
results {{
id
path
locale
title
}}
}}
}}
}}
"""
# 构建请求数据
data = {
'query': graphql_query,
}
# 发送 POST 请求
response = requests.post(WikijsTool.BASE_URL, headers=WikijsTool.HEADERS, json=data)
if response.status_code == 200:
# 解析数据
search_results = json.loads(response.content)['data']['pages']['search']['results']
return search_results
else:
raise ValueError(f"查询文档失败,原因:“{response.text}")
@staticmethod
def query_doc_info(doc_id: int) -> dict:
query = """
query singlePages($doc_id: Int!) {
pages {
single(id: $doc_id) {
id
path
title
isPublished
content
contentType
isPrivate
updatedAt
createdAt
}
}
}
"""
# 构建请求数据
variables = {
'doc_id': doc_id,
}
data = {
'query': query,
'variables': variables
}
# 发送 POST 请求
response = requests.post(WikijsTool.BASE_URL, headers=WikijsTool.HEADERS, json=data)
if "errors" in response.text:
result = json.loads(response.content)['errors'][0]['message']
return {}
else:
return json.loads(response.content)['data']['pages']['single']
WikijsTool.init_url()
if __name__ == "__main__":
info = WikijsTool.get_all_documents()
import csv
# 定义 CSV 文件名
csv_filename = "info_data.csv"
# 写入 CSV 文件
with open(csv_filename, mode='w', newline='', encoding='utf-8') as file:
writer = csv.writer(file)
# 写入表头
writer.writerow(['path', 'title', 'id'])
# 写入数据
for i in info:
writer.writerow([i['path'], i['title'], i['id']])
print(f"数据已保存到 {csv_filename}")
# print(WikijsTool.query_doc_info(2064)['content'])
# print(WikijsTool.rename_directory("配网知识库/配网造价软件", "配网知识库/配网造价软件1"))
+308
View File
@@ -0,0 +1,308 @@
import os.path
import requests
import json
import time
from pathlib import Path
class WikijsTool:
BASE_URL = "http://10.1.16.39:8090/graphql"
HEADERS = {
"Authorization": "Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcGkiOjcsImdycCI6MSwiaWF"
"0IjoxNzIzMDIwNzg4LCJleHAiOjE4MTc2OTM1ODgsImF1ZCI6InVybjp3aWtpLmpzIiwiaX"
"NzIjoidXJuOndpa2kuanMifQ.NSfE4tB7tkN8yapAs0CgkR-Yll6wc3gO3QGKMAv-TlGxx6A-9fJRmkwhRDTVMj_yPVG6"
"NXVy_AZpJtLapRXFGn0cvscsRJxq3fY1KgEyt8wO99jvd8DpNHpHhAIgrtyDelmHsBD2Wb5Ib3WJFsWC6d8Yhm9dkpx6tZ"
"vMAlFIKOg6UodMoMIry3YWiPGLaqJPQ0gcKmcnB2tC7sPXIIZnvfb5912GVM0n-4wvWobQnb_tXQuYZf99wH_leXjC_7BK8"
"8JSaAmB980i3rBxfejmaJ8E6D48zRxwwPFa0veVjjzRkVqHPwAjl1CXb2HE29pGtNmSEE1kLQVqOZD_ibOwKQ"
}
def __init__(self):
pass
@staticmethod
def init_url():
# 获取当前文件的路径
file_path = Path(__file__).resolve()
file_path = os.path.join(file_path.parent, 'wikiconfig.json')
if not os.path.exists(file_path):
return False
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
if 'url' in data:
WikijsTool.BASE_URL = data['url']
if 'Authorization' in data:
WikijsTool.HEADERS['Authorization'] = data['Authorization']
return True
@staticmethod
def get_all_documents() -> list[dict]:
query = """
query Pages {
pages {
list {
path
locale
title
contentType
id
isPublished
}
}
}
"""
# 构建请求数据
data = {
'query': query,
}
# 发送 POST 请求
response = requests.post(WikijsTool.BASE_URL, headers=WikijsTool.HEADERS, json=data)
if response.status_code == 200:
# 解析数据
list_info = json.loads(response.content)['data']['pages']['list']
return [item for item in list_info]
else:
raise ValueError(f"获取文档列表失败,原因:“{response.text}")
@staticmethod
def get_all_doc_by_path(path: str, add_path_end: bool = True) -> list[dict]:
list_document = WikijsTool.get_all_documents()
all_document_list = []
if add_path_end:
temp_path = path + '/'
else:
temp_path = path
for document_info in list_document:
document_path = str(document_info["path"])
# 根据路径过滤出对应的所有文档
if not document_path.startswith(temp_path):
continue
all_document_list.append(document_info)
return all_document_list
ill_char = ['+', '.', '?', "%", '#', '&', '=', '<', '>', '"', '{', '}', '|', '^', '~', '[', ']', ' ']
@staticmethod
def judge_path_is_ill(path) -> (bool, str):
# 判断路径是否非法
set1 = set(WikijsTool.ill_char)
set2 = set(path)
# 判断两个集合是否有交集
if bool(set1 & set2):
new_list = WikijsTool.ill_char[:]
new_list.pop()
error = ', '.join(new_list) + ", 空格。"
return False, f"路径中包含非法字符,非法字符包括:{error}"
return True, ""
@staticmethod
def search_document(query_str: str) -> list[dict]:
graphql_query = f"""
query Pages {{
pages {{
search(query: "{query_str}") {{
results {{
id
path
locale
title
}}
}}
}}
}}
"""
# 构建请求数据
data = {
'query': graphql_query,
}
# 发送 POST 请求
response = requests.post(WikijsTool.BASE_URL, headers=WikijsTool.HEADERS, json=data)
if response.status_code == 200:
# 解析数据
search_results = json.loads(response.content)['data']['pages']['search']['results']
return search_results
else:
raise ValueError(f"查询文档失败,原因:“{response.text}")
@staticmethod
def query_doc_info(doc_id: int) -> dict:
query = """
query singlePages($doc_id: Int!) {
pages {
single(id: $doc_id) {
id
path
title
isPublished
content
contentType
isPrivate
updatedAt
createdAt
}
}
}
"""
# 构建请求数据
variables = {
'doc_id': doc_id,
}
data = {
'query': query,
'variables': variables
}
# 发送 POST 请求
response = requests.post(WikijsTool.BASE_URL, headers=WikijsTool.HEADERS, json=data)
if "errors" in response.text:
result = json.loads(response.content)['errors'][0]['message']
return {}
else:
return json.loads(response.content)['data']['pages']['single']
import html2text
import re
def to_markdown(input1):
html_content = """
<h1>Title</h1>
<p>This is a <strong>bold</strong> and <em>italic</em> text.</p>
<ul>
<li>Item 1</li>
<li>Item 2</li>
</ul>
"""
markdown_converter = html2text.HTML2Text()
markdown_converter.ignore_links = False # 保留链接
markdown_content = markdown_converter.handle(input1)
markdown_content = re.sub(r'!\[.*?\]\(.*?\)\s*[^\n]*', '', markdown_content)
return markdown_content
WikijsTool.init_url()
info = WikijsTool.get_all_documents()
# import csv
#
# # 定义 CSV 文件名
# csv_filename = "info_data2.26.csv"
#
# # 写入 CSV 文件
# with open(csv_filename, mode='w', newline='', encoding='utf-8') as file:
# writer = csv.writer(file)
#
# # 写入表头
# writer.writerow(['path', 'title', 'id'])
#
# # 写入数据
# for i in info:
# writer.writerow([i['path'], i['title'], i['id']])
#
# print(f"数据已保存到 {csv_filename}")
# print(info)
xizang_index = [8461, 8401, 8191, 8705, 8399, 8202, 8356, 8269, 8268, 8266, 8671, 8267, 8476, 8472, 8094, 8851, 8271, 8479,
8418, 8093, 8417, 8473, 8419, 8462, 8482, 8856, 8669, 8484, 8422, 8264, 8265, 8432, 7847, 8204, 8405, 8707,
8704, 8682, 8352, 8273, 8720, 8474, 8272, 8801, 8677, 8235, 8243, 8490, 8155, 8460, 8165, 8732, 8246, 8493,
8354, 8733, 8513, 8481, 8492, 8598, 8231, 8240, 8742, 8820, 8431, 8200, 8357, 8350, 8223, 8198, 8369, 8827,
7931, 8753, 8136, 8877, 8741, 8852, 8039, 8463, 8863, 8731, 8435, 8815, 7495, 8286, 8129, 7930, 7752, 8128,
7929, 8728, 8816, 8694, 8693, 8698, 8689, 8690, 7740, 7739, 8058, 8812, 8744, 8455, 7738, 7845, 7846, 8063,
8069, 8133, 7833, 8066, 7834, 8287, 8679, 8087, 7956, 8850, 8743, 8060, 8748, 8746, 8458, 8067, 8754, 8404,
8055, 8065, 8723, 8400, 8403, 8064, 8755, 8363, 8503, 7949, 7948, 8500, 8355, 8391, 8402, 8757, 8756, 8379,
8174, 8459, 8367, 8450, 8192, 8120, 8193, 8791, 8413, 8747, 8270, 8424, 8796, 8249, 8250, 8478, 8475, 8483,
8499, 8098, 8079, 8103, 8251, 8374, 8252, 8049, 8253, 8506, 8057, 7989, 8056, 7945, 8842, 8410, 8409, 8480,
8497, 8862, 8514, 8507, 8512, 8502, 7762, 8515, 8501, 8373, 8360, 8217, 8713, 8092, 8095, 8100, 8438, 8745,
8068, 8072, 8070, 8408, 8075, 8806, 8071, 8434, 8074, 8433, 8421, 8511, 8496, 8495]
down_index = [5362, 5409, 2500, 3926, 2389, 3611, 5430, 2166, 4296, 6545, 5396, 4080, 4171, 8836, 6785, 6448, 3745, 6029,
6846, 4718, 5250, 5898, 6924, 5341, 5286, 7387, 7133, 7437, 7395, 5484, 7273, 5279, 5254, 5271, 6419, 5143,
5779, 5454, 5139, 2608, 6587, 7277, 6835, 2148, 6308, 7564, 5234, 2382, 5752, 6169, 2503, 6551, 5104, 7230,
6537, 3849, 5771, 5190, 6503, 6534, 6122, 7178, 2641, 6301, 2427, 5410, 5494, 5493, 5422, 7034, 5121, 6257,
4114, 3764, 7446, 6601, 5385, 6041, 5423, 6961, 5151, 6592, 5394, 5303, 3757, 6925, 3747, 5474, 5027, 3759,
8831, 4784, 2604, 3573, 5119, 8861, 7016, 7013, 7014, 7565, 4990, 5926, 5380, 5135, 5345, 3570, 3854, 7566]
# a = WikijsTool.query_doc_info(8401)
# print(to_markdown(a['title']))
# print(to_markdown(a['content']))
# 读取 txt 文件
# with open("ceshi.txt", "r", encoding="utf-8") as file:
#
# lines = file.read().splitlines() # 按行读取并去除换行符
#
# # 转换为数字列表(int 或 float)
# numbers = [int(line) if line.isdigit() else float(line) for line in lines if line.strip()]
#
# # 输出列表
# print(numbers)
# 目标文件夹
# output_dir = "DM_data/xizang_kg"
# os.makedirs(output_dir, exist_ok=True) # 确保目录存在
#
# for doc_id in xizang_index:
# a = WikijsTool.query_doc_info(doc_id) # 获取文档信息
# title = to_markdown(a['title']).strip() # 获取标题
# content = to_markdown(a['content']).strip() # 获取内容
#
# if content: # 只有内容非空时才保存
# filename = f"{title}.txt"
# filepath = os.path.join(output_dir, filename)
#
# # 确保文件名合法(避免非法字符)
# filename = "".join(c if c.isalnum() or c in " _-" else "_" for c in filename)
# filepath = os.path.join(output_dir, filename)
#
# with open(filepath, "w", encoding="utf-8") as f:
# f.write(content)
#
# print(f"保存成功: {filepath}")
# else:
# print(f"跳过 ID {doc_id}: 内容为空")
import os
import re
# 目标文件夹
output_dir = "DM_data/down_kg"
os.makedirs(output_dir, exist_ok=True) # 确保目录存在
def sanitize_filename(filename):
"""去除或替换非法文件名字符"""
filename = re.sub(r'[<>:"/\\|?*]', '_', filename) # 替换非法字符
return filename.strip() # 去除首尾空格
for doc_id in down_index:
a = WikijsTool.query_doc_info(doc_id) # 获取文档信息
title = to_markdown(a['title']).strip() # 获取标题
content = to_markdown(a['content']).strip() # 获取内容
if content: # 只有内容非空时才保存
safe_title = sanitize_filename(title) # 处理非法字符
filename = f"{safe_title}.txt"
filepath = os.path.join(output_dir, filename)
with open(filepath, "w", encoding="utf-8") as f:
f.write(content)
print(f"保存成功: {filepath}")
else:
print(f"跳过 ID {doc_id}: 内容为空")
View File
Binary file not shown.
Binary file not shown.
+229
View File
@@ -0,0 +1,229 @@
# 行业相关知识
## 变电基本知识
### 变电主要设备
### 变电站分类
### 变电站电气系统
### 变电站的建筑物和构建物
### 变电的概念
### 建筑物和构建物
### 换流站的建筑物和构筑物
## 电力系统简介
### 发电厂
### 变电站
### 工程建筑全过程
### 电力系统构成
### 电力系统构成概念
### 电力负荷
### 电源系统
### 输电系统
## 电力行业背景
### 南方电网公司
### 县级供电公司
### 发电公司
### 国建电网公司
### 市级供电局
### 电力设计院
### 电厂
### 省电力公司
### 省电建公司
### 省送变电工程公司
## 送电线路基本知识
### 分裂数
### 同塔架设双回路
### 导线
### 已有一回,新建一回
### 接地钢材
### 架空地线(避雷线)和拉线
### 架空送电线路金具
### 架线工程内容
### 混凝土预制品
### 绝缘子
### 金具、绝缘子、零星钢材
### 铁塔工程量统计
### 附件工程内容
### 工地运输计算(造价2008线路)
## 造价基础知识
### 《火电发电工程建设预算编制与计算标准》的适用范围
### 《电网工程建设预算编制与计算标准》定义
### 《电网工程建设预算编制与计算标准》的适用范围
### 其他费用的构成
### 勘察费/附加调整系数“35KV及以下送电线路”是相乘/累乘
### 勘察费方法二/工程勘察设计收费标准(2002年修订本)
### 勘察费方法二(2002修订本)附加调整系数/气温系数需累加,同时该项与测土壤电阻率是相乘关系
### 工程造价的定义
### 工程造价的职能
### 工程造价计价顺序
### 工程造价计算6步走
### 工程造价费用构成
### 建设期贷款利息——专业知识篇
### 拦标价
### 材料费、消耗性材料和装置性材料费(计价材料和未计价材料)
### 电力行业定额定义
### 直接工程费
### 编制年价差
### 设备购置费的构成/设备运杂费
### 间接费
### 06预规)人工费
### 06预规)施工机械使用费
### (设计费)设计费方法二/工程勘察设计收费标准(2002年修订本)
## 配网设计基础知识
### (配网设计)最大允许应力
### 架空线路的比载
### 架空输电线路建设原则
### 架空输电线路的结构
### 架空配电网线路设计
### 电缆井
### 电缆头
### (配网设计)各种比载的计算
### (配网设计)回路/单回路/双回路
### (配网设计)图元、图例、图框
### (配网设计)垂直比载
### (配网设计)孤立档计算
### (配网设计)年平均运行应力
### (配网设计)新线系数
### (配网设计)最大使用应力
### (配网设计)杆塔
### (配网设计)杆塔型号中对应的字母含义
### (配网设计)杆塔按材料的分类
### (配网设计)正常安全系数
### (配网设计)水平比载
### (配网设计)电缆的敷设方式
### (配网设计)直线杆塔
### (配网设计)线路路径的选择原则
### (配网设计)终端杆塔、跨越杆塔、换位杆塔
### (配网设计)综合比载
### (配网设计)耐张杆塔
### (配网设计)耐张段、孤立档
### (配网设计)设计气象条件三要素
### (配网设计)设计的基本流程
### (配网设计)路径方案的选定
### (配网设计)转角杆塔
### (配网设计)输电线路
### (配网设计)金具的分类及作用
## 砌砖、砌块套用定额
# 配网编制D3
## 南网专版
### (广西清单)自动统计出土质、地形、运输的项目特征值
### (配网计价通D3南网版)“典造分析”高级功能
### (配网计价通D3南网版)“控制线指标”高级功能
### (配网计价通D3南网版)典造分析
### (配网计价通D3南网版)典造分析常见问题
### (配网计价通D3南网版)导入/导出特征信息
### (配网计价通D3南网版)工程转换:清单预算转清单计价/清单计价转清单预算
### (配网计价通D3南网版)广西配网扩大工程量清单组合件编码规则
### (配网计价通D3南网版)批量设置设计类型
### (配网计价通D3南网版)控制线指标
### (配网计价通D3南网版)设置分项特征
### (配网计价通D3)导出南网规约接口文件
## 河南专版
### (配网计价通D3)河南专版与通用版本区别/项目管理
## 贵州专版
### (配网D3软件)锁内许可证名称/锁专业及后缀
## 辽宁专版
### (辽宁配网D3)新建清单批次/框架清单
### (辽宁配网计价通D3)显示工程内容
### (辽宁配网计价通D3)显示消耗量名称
### (辽宁配网计价通D3)清单批次:一键调价
### (配网D3辽宁清单批次)批量导出工程量清单报表
### (配网计价通D3)辽宁专版与通用版本区别
## 配网工程计价通D3
## 主页
### (辽宁配网计价通D3)工程转换:预算转清单/清单转预算
### (配网计价通D3)设置选项:主材设备的供货方默认设置/批次工程计算不自动统计技经指标
### (配网计价通D3)一般计税转简易计税
### (配网计价通D3)升级16版定额组合件库
### (配网计价通D3)备份管理
### (配网计价通D3)导入/导出物料库
### (配网计价通D3)导入/导出组合件库
### (配网计价通D3)导入云物料
### (配网计价通D3)工程模板管理
### (配网计价通D3)工程量合并
### (配网计价通D3)新建批次工程
### (配网计价通D3)新建自定义物料库
### (配网计价通D3)新建自定义组合件库
### (配网计价通D3)湖北费用统计切块表
### (配网计价通D3)统计分析-物料统计
### (配网计价通D3)统计分析-费用统计
### (配网计价通D3)统计分析设置报表单位为万元
### (配网计价通D3)设置选项:自动保存工程/报表生成方式
### (配网计价通D3)费用统计-添加报表/导入导出报表模板
### (配网计价通D3)费用统计中是按工程文件名称的数值排序
### (配网计价通D3)费用统计批量设置小数位数
## 配网工程计价通D3
## 典型问题
### (配网计价通D3)甲供设备不计入造价
### (配网计价通D3)编制说明工程投资按元显示
### (配网计价通D3)老版定额升级/配网2017升级为2022工程
### (配网计价通D3)自由表显示元金额
## 配网工程计价通D3
## 多工程批量设置
### (配网计价通D3)多工程报表导出
### (配网计价通D3)批量导出多个工程报表,不显示总算表(表一)
## 配网工程计价通D3
## 工程信息
### (配网计价通D3)2022定额调差系数中,通信站、通信线路调差系数为0
### (配网计价通D3)占地类型
### (配网计价通D3)只开展竣工结算审核/施工过程造价咨询及竣工结算审核费
### (配网计价通D3)地区类型
### (配网计价通D3)工程审核
### (配网计价通D3)建筑电缆工程采用电缆沟、电缆隧道
### (配网计价通D3)建设预算性质/基本预备费
### (配网计价通D3)技经参数
### (配网计价通D3)技经参数统计自动规则
### (配网计价通D3)材料配送费
### (配网计价通D3)特殊地区
### (配网计价通D3)综合地形增加费/批次工程计算综合地形增加费
### (配网计价通D3)设备配送费
### (配网计价通D3)设置土质比例
### (配网计价通D3)设置密码/工程加密
### (配网计价通D3)运输设置
## 配网工程计价通D3
## 工程费用
### (配网计价通D3)建贷利息
### (配网计价通D3)一键清空费率
### (配网计价通D3)一键调价
### (配网计价通D3)价差预备费
### (配网计价通D3)其他费用界面勾选输出
### (配网计价通D3)勘察费/专项勘察费/其中勘察费
### (配网计价通D3)基本设计费/设计费
### (配网计价通D3)工程勘察只进行一般性定位测量作业
### (配网计价通D3)工程监理费
### (配网计价通D3)建设场地征用及清理费
### (配网计价通D3)总算表/表一以元为单位显示
### (配网计价通D3)总算表中显示施工费
###
## 配网清单D3
### (配网清单D3)清单结算工程:查看中标清单组价
### (配网清单计价通D3)专业暂估价
### (配网清单计价通D3)显示价格为0的清单
### (配网清单计价通D3)显示清单计算式
### (配网清单计价通D3)显示物料编号
### (配网清单计价通D3)暂列金额
### (配网清单计价通D3)材料设备暂估价
### (配网清单计价通D3)结算如何解锁
### (配网清单计价通D3)设置报表参数/显示项目划分及清单
### (配网计价通D3)清单折扣比例
### (配网计价通D3)清空所有项目特征
### (配网计价通D3)调整清单码
### (配网计价通D3)锁定清单属性
+164
View File
@@ -0,0 +1,164 @@
# import pandas as pd
# import chardet
#
# # 检测文件编码的函数
# def detect_encoding(file_path):
# with open(file_path, 'rb') as f:
# result = chardet.detect(f.read())
# return result['encoding']
#
# # Step 1: 读取 A.csv 并提取 input 列
# A_encoding = detect_encoding('D:/工作簿3.csv')
# A_df = pd.read_csv('D:/工作簿3.csv', encoding=A_encoding)
# list1 = A_df['input'].tolist()
#
# # Step 2: 读取 B.csv,基于 list1 中的内容查找匹配项
# B_encoding = detect_encoding('D:/工作簿2.csv')
# B_df = pd.read_csv('D:/工作簿2.csv', encoding=B_encoding)
#
# # 创建 list2 存储匹配到的 answer 列内容
# list2 = []
#
# # 遍历 list1,查找 B.csv 中的匹配项
# for item in list1:
# match = B_df[B_df['query'] == item]
# if not match.empty:
# # 如果有匹配项,取第一个匹配的 answer 值
# list2.append(match['answer'].values[0])
# else:
# # 如果没有匹配项,填充为 NaN 或其他默认值
# list2.append(None)
#
# # Step 3: 将 list2 添加到 A.csv 的 output 列
# A_df['output'] = list2
#
# # 保存修改后的 A.csv 文件
# A_df.to_csv('D:/A_updated2.csv', index=False, encoding='utf-8') # 保存时也指定编码,确保兼容性
#
# print("A.csv 已成功更新为 A_updated.csv")
import pandas as pd
import re
# 读取CSV文件
# df = pd.read_csv('D:/2.10.15.11.csv', encoding='utf-8')
#
# # 提取'点彩原因'列内容到list
# dian_cai_list = df['点彩原因'].tolist()
#
# # 初始化'点彩原因2'列
# df['点彩原因2'] = ''
#
# # 遍历列表,查找并提取"回答错误"或"回答正确"之后的内容,并修改原列
# for idx, content in enumerate(dian_cai_list):
# if isinstance(content, str): # 确保内容是字符串
# match = re.match(r'(回答错误|回答正确|回答不出)[,]?\s*(.*)', content)
# if match:
# # 提取关键词(回答错误或回答正确)
# keyword = match.group(1)
# # 提取关键词后的内容
# extracted_content = match.group(2).strip()
#
# # 更新'点彩原因2'列
# df.at[idx, '点彩原因2'] = extracted_content
# # 修改'点彩原因'列,只保留关键词
# df.at[idx, '点彩原因'] = keyword
#
# # 保存更新后的CSV文件
# df.to_csv('D:/updated_file3.csv', index=False, encoding='utf-8')
# import requests
#
# url = "http://10.1.16.39:2333/intent_recognition"
# data = {
# "query": "BDY3是什么软件做的工程"
# }
#
# response = requests.post(url, json=data)
# print(response.json())
# import wikipedia
#
#
# def get_wikipedia_summary(term, lang="zh", sentences=2):
# """
# 使用 Wikipedia API 获取输入名词的解释或描述。
#
# 参数:
# - term: 需要查询的名词(字符串)
# - lang: 语言(默认 "zh" 为中文)
# - sentences: 返回的句子数量(默认 2 句)
#
# 返回:
# - Wikipedia 解释(字符串)
# """
# try:
# wikipedia.set_lang(lang) # 设置语言
# summary = wikipedia.summary(term, sentences=sentences)
# return summary
# except wikipedia.exceptions.DisambiguationError as e:
# return f"查询词 '{term}' 可能指多个内容:{', '.join(e.options[:5])}..."
# except wikipedia.exceptions.PageError:
# return f"未找到 '{term}' 的相关 Wikipedia 页面。"
# except Exception as e:
# return f"发生错误:{e}"
#
#
# # 测试示例
# print(get_wikipedia_summary("人工智能")) # 获取 "人工智能" 的解释
# import wikipedia
#
# def get_wikipedia_summary(term, lang="zh"):
# """
# 查询 Wikipedia API 获取名词的摘要信息。
#
# :param term: 要查询的名词
# :param lang: 语言(默认中文 'zh'
# :return: 该名词的 Wikipedia 摘要信息
# """
# try:
# wikipedia.set_lang(lang) # 设置语言
# summary = wikipedia.summary(term, sentences=3) # 获取前3句话摘要
# return summary
# except wikipedia.exceptions.DisambiguationError as e:
# return f"查询 '{term}' 有多个可能的结果,请更具体:\n{e.options[:5]}"
# except wikipedia.exceptions.PageError:
# return f"未找到 '{term}' 的 Wikipedia 词条。"
# except Exception as e:
# return f"查询失败,错误信息: {e}"
#
# import zhconv
#
# # 示例调用
# term = "历史版本"
# result = get_wikipedia_summary(term)
# traditional_text = zhconv.convert(result, 'zh-cn')
# print(traditional_text)
# # print(result)
import pandas as pd
# 读取 CSV 文件
file_path = "D:/测试集3.3.csv" # 请替换为你的文件路径
df = pd.read_csv(file_path, encoding='utf-8')
# 确保 'query' 列存在
if 'query' in df.columns:
# 筛选包含 '西藏' 的行
filtered_df = df[df['query'].str.contains('西藏', na=False)]
# 保存到新的 CSV 文件
output_path = "D:/测试集_西藏.csv"
filtered_df.to_csv(output_path, index=False, encoding='utf-8')
print(f"筛选后的数据已保存到 {output_path}")
else:
print("CSV 文件中没有 'query' 列,请检查文件格式!")
+108
View File
@@ -0,0 +1,108 @@
5362
5409
2500
3926
2389
3611
5430
2166
4296
6545
5396
4080
4171
8836
6785
6448
3745
6029
6846
4718
5250
5898
6924
5341
5286
7387
7133
7437
7395
5484
7273
5279
5254
5271
6419
5143
5779
5454
5139
2608
6587
7277
6835
2148
6308
7564
5234
2382
5752
6169
2503
6551
5104
7230
6537
3849
5771
5190
6503
6534
6122
7178
2641
6301
2427
5410
5494
5493
5422
7034
5121
6257
4114
3764
7446
6601
5385
6041
5423
6961
5151
6592
5394
5303
3757
6925
3747
5474
5027
3759
8831
4784
2604
3573
5119
8861
7016
7013
7014
7565
4990
5926
5380
5135
5345
3570
3854
7566
+38
View File
@@ -0,0 +1,38 @@
"""
===================================
@AutherWenZ
@Company: BooWay
@projectdify_lab
===================================
"""
import pandas as pd
def read_title_column(csv_file: str) -> list:
"""
读取CSV文件中的'title'列,并返回一个列表,使用UTF-8编码。
:param csv_file: CSV文件的路径
:return: 包含title列数据的列表
"""
try:
df = pd.read_csv(csv_file, encoding='utf-8') # 使用 UTF-8 编码读取 CSV
if 'title' in df.columns:
return df['title'].dropna().tolist() # 去除缺失值并转换为列表
else:
raise ValueError("CSV文件中未找到'title'")
except Exception as e:
print(f"读取文件时发生错误: {e}")
return []
titles = read_title_column("info_data.csv")
from langchain_huggingface import HuggingFaceEmbeddings
embedding_path = "D:/迅雷下载/模型权重/bge-m3"
embeddings = HuggingFaceEmbeddings(model_name=embedding_path)
from langchain_community.vectorstores import Chroma
chroma_archived = "chroma_titles"
vectorstore_txt_chroma = Chroma.from_texts(titles, embeddings, persist_directory=chroma_archived)
Binary file not shown.
Binary file not shown.
+88
View File
@@ -0,0 +1,88 @@
# coding:utf-8
# @Time : 2024/8/27 下午3:08
# @Author : ouyangyouzhang
# @FileName : export_like_msg.py
# @Describe : 导出点赞的消息
import psycopg2
from psycopg2 import sql
import os
import pandas as pd
class ExportMsg:
def __init__(self):
try:
# 连接数据库
self.connection = psycopg2.connect(
user="postgres",
password="difyai123456",
host="172.20.0.145",
port=5432,
database="dify"
)
except (Exception, psycopg2.Error) as error:
print("Error while connecting to PostgreSQL", error)
def export_like_msg_id(self, user_id: str) -> list:
cursor = self.connection.cursor()
# 构建SQL查询
query = sql.SQL("""
SELECT message_id, from_end_user_id, rating
FROM message_feedbacks
WHERE from_end_user_id = %s AND rating = %s
""")
# 执行查询并获取结果
cursor.execute(query, (user_id, "like"))
records = cursor.fetchall()
if cursor:
cursor.close()
return [item[0] for item in records]
def export_QA_to_excel(self, msg_id_list, save_file) -> bool:
q_list = []
a_list = []
cursor = self.connection.cursor()
for item in msg_id_list:
# 构建SQL查询
query = sql.SQL("""
SELECT id, query, answer
FROM messages
WHERE id = %s
""")
# 执行查询并获取结果
cursor.execute(query, (item,))
records = cursor.fetchall()
q_list.append(records[0][1])
a_list.append(records[0][2])
if cursor:
cursor.close()
data = {"query": q_list, "answer": a_list}
df = pd.DataFrame(data)
df.to_excel(save_file, index=False)
def main(arg1: list) -> dict:
if arg1 is None or len(arg1) == 0:
return {
"result": [],
}
new_list = [{"title": item["title"], "content": item["content"]} for item in arg1]
return {
"result": new_list,
}
if __name__ == '__main__':
print(e)
ex = ExportMsg()
msg_list = ex.export_like_msg_id("de014752-5509-4c9e-a26d-72d7d955f895")
ex.export_QA_to_excel(msg_list, "D:\\Code\\PGSql\\like_msg.xlsx")
print('Python')
+56
View File
@@ -0,0 +1,56 @@
"""
===================================
@AutherWenZ
@Company: BooWay
@projectdify_lab
===================================
"""
import requests
import json
import urllib3
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
def info_input(input_str):
# API 端点
url = "https://172.20.0.145/v1/chat-messages"
# 确保 API Key 正确(去掉大括号)
api_key = "app-ziMDuhVoATbd5vF3Lb7iyjVD"
# 认证信息
headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
# 请求数据
payload = {
"inputs": {},
"query": "",
"response_mode": "blocking",
"conversation_id": "",
"user": "abc-123",
"files": [
{
"type": "image",
"transfer_method": "remote_url",
"url": "https://cloud.dify.ai/logo/logo-site.png"
}
]
}
payload["query"] = input_str
# 发送 POST 请求
response = requests.post(url, headers=headers, data=json.dumps(payload), verify=False) # verify=False 忽略 SSL 证书验证
response_dict = json.loads(response.text)
return response_dict['conversation_id']
# print(info_input("多个工程需要统一修改定额中材料的单价是否可以呢"))
# print(info_input("多个工程需要统一修改定额中材料的单价是否可以呢"))
+2
View File
@@ -0,0 +1,2 @@
清单计价, 技改, 电缆, 税率, 审核, 统计分析, 设置, 工程精度, 组合件, 线路, 架空线路, 导入, 字体, 取费, 材料表, 运输, 信息, 迁改, 消耗量, 项目, 更新, 备份管理, 建设场地征用费, 调试费用, 全费用综合单价, 行业清单工程, 锂电池, 拆除工程, 许可证, 清单消耗量, 社会保险, 调差, 设备配送费, 软件申请, 配网, 电力, 预算, 报表, 页眉, 运输方式, 资源管理, 增值税, 保管, 匹配, 跨越, 损耗, 材料, 金额, 设备, 价差, 监造费, 合并, 调整, 勘察费, 固定综合单价, 主材设备互转, 取费表, 变更, 技术经济, 模板, 经济作物, 生成, 转换, 工程审核, 迁改费用, 安全文明施工费, 表单, 删除, 运距, 备份, 贷款利息, 项目划分, 定额, 物料, 计算预览, 刷新, 施工总承包, 汇总, 储能电站, 设备类别, 规费, 分类, 隐藏, 其他费用, 密码, 建筑工程量清单, 修改, 计算, 钢筋量, 国网2023规范, 合同价, 分析, 批量, 费用模板, 设备性材料, 工程调差, 物料库, 公路, 工程加密, 工程, 市场价, 计算式, 打印, 输电线路, 格式, 混凝土, 输变电汇总, 主材, 综合单价, 计价规范, 卸车, 页面, 导出, 电池储能, 同步, 编号, 升级, 监理费, 预规, 博微储能, 软件, 限价, 显示, 清水混凝土, 管理, 组合件库, 工程量, 修改记录, 投标报价, 清单, 市场价系数, 施工服务费, 检修, 五金计算, 费率, 页码, 超高, 储能, 可抵扣增值税, 查询, 招标控制价, 预算控制价, 增值, 费率调整, 解锁, 指标分析, 结算价, 供货方, 汇总级别, 页脚, 添加, 统计, 工地运输, 工程费用, 定额调整, 清理, 材料暂估价, 设计费, 参数, 造价, 土质比例, 铁路, 泵车浇制, 服务业增值税率, 组件库, 住房公积金, 输变电, 设备暂估价, 施工费, 人工, 文件, 模板参数, 运杂费, 施工, 费用, 应急措施费, 特殊地区增加费, 施工图预算, 估算, 信息价库, 云工程, 比例, 税率调整, 储能计价
+295
View File
@@ -0,0 +1,295 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "e2b31c11-d818-483d-b99d-6b9c8477dacd",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"# 读取 CSV 文件\n",
"csv_filename = \"info_dify.csv\"\n",
"md_filename = \"output.md\"\n",
"\n",
"# 读取数据\n",
"df = pd.read_csv(csv_filename)\n",
"\n",
"# 处理 path 列,去除前后 /\n",
"df['path'] = df['path'].str.strip('/')\n",
"\n",
"# 构建树结构\n",
"tree = {}\n",
"\n",
"def insert_path(tree, levels):\n",
" \"\"\" 递归插入路径到树形结构,确保相同层级合并 \"\"\"\n",
" if not levels:\n",
" return\n",
" key = levels[0]\n",
" if key not in tree:\n",
" tree[key] = {}\n",
" insert_path(tree[key], levels[1:])\n",
"\n",
"# 遍历 DataFrame 的 path 列\n",
"for path in df['path']:\n",
" levels = path.split('/') # 拆分层级\n",
" insert_path(tree, levels) # 插入到树结构\n",
"\n",
"def generate_md(tree, level=1):\n",
" \"\"\" 递归生成 Markdown 文本,合并相同路径 \"\"\"\n",
" md_text = []\n",
" for key in sorted(tree.keys()): # 确保有序输出\n",
" md_text.append(f\"{'#' * level} {key}\") # 根据层级添加 `#`\n",
" md_text.extend(generate_md(tree[key], level + 1)) # 递归生成子项\n",
" return md_text\n",
"\n",
"# 生成 Markdown 内容\n",
"md_content = generate_md(tree)\n",
"\n",
"# 保存到 Markdown 文件\n",
"with open(md_filename, mode='w', encoding='utf-8') as md_file:\n",
" md_file.write(\"\\n\".join(md_content))\n",
"\n",
"print(f\"Markdown 文件已保存为 {md_filename}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "796cc4fe-4912-4cdc-9b80-f217f46b5487",
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"import pandas as pd\n",
"\n",
"# 读取 CSV 文件\n",
"input_file = 'info_data.csv' # 原始 CSV 文件路径\n",
"output_file = 'info_data_cleaned.csv' # 处理后保存的 CSV 文件路径\n",
"\n",
"# 加载 CSV 到 DataFrame\n",
"df = pd.read_csv(input_file, encoding='utf-8')\n",
"\n",
"# 检查 'title' 列是否存在\n",
"if 'title' not in df.columns:\n",
" raise ValueError(\"CSV 文件中没有找到 'title' 列,请检查文件内容\")\n",
"\n",
"# 定义正则表达式:匹配括号及其中的内容\n",
"pattern = re.compile(r'[()()].*?[()()]')\n",
"\n",
"# 遍历每一行,处理 'title' 列\n",
"def clean_title(title):\n",
" # 转换为 str 并删除括号内容\n",
" cleaned_title = re.sub(pattern, '', str(title))\n",
" # 去除多余空格\n",
" return cleaned_title.strip()\n",
"\n",
"# 更新 'title' 列\n",
"df['title'] = df['title'].apply(clean_title)\n",
"\n",
"# 保存到新的 CSV 文件\n",
"df.to_csv(output_file, index=False, encoding='utf-8')\n",
"\n",
"print(f\"处理完成!已保存到:{output_file}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cf63d138-4d16-496a-ad3c-5d67008412f6",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"# 读取 CSV 文件\n",
"file_path = 'info_data_cleaned.csv' # 请替换为你的 CSV 文件路径\n",
"df = pd.read_csv(file_path, encoding='utf-8')\n",
"\n",
"# 检查是否包含 'path' 列\n",
"if 'path' not in df.columns:\n",
" raise ValueError(\"CSV 文件中未找到 'path' 列,请检查文件内容。\")\n",
"\n",
"# 将 'path' 列按 '/' 分割,并展开为多列\n",
"split_columns = df['path'].str.split('/', expand=True)\n",
"\n",
"# 重命名列名为 title1, title2, ..., titlen\n",
"split_columns.columns = [f'title{i+1}' for i in range(split_columns.shape[1])]\n",
"\n",
"# 合并原 DataFrame 和新拆分的列\n",
"df = pd.concat([df, split_columns], axis=1)\n",
"\n",
"# 保存结果到新 CSV 文件\n",
"output_file = 'info_data_cleaned_split.csv'\n",
"df.to_csv(output_file, index=False, encoding='utf-8')\n",
"\n",
"print(f\"处理完成,结果已保存到 {output_file}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7520603c-90cf-4982-9273-2f130614ca96",
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"import pandas as pd\n",
"\n",
"# 读取 CSV 文件\n",
"input_file = 'info_data_cleaned_split.csv' # 原始 CSV 文件路径\n",
"output_file = 'info_data_cleaned_split2.csv' # 处理后保存的 CSV 文件路径\n",
"\n",
"# 加载 CSV 到 DataFrame\n",
"df = pd.read_csv(input_file, encoding='utf-8')\n",
"\n",
"# 定义正则表达式:匹配括号及其中的内容\n",
"pattern = re.compile(r'[()()].*?[()()]')\n",
"\n",
"# 清洗函数:删除括号及其中内容,并去除多余空格\n",
"def clean_text(text):\n",
" # 转换为 str 并删除括号内容\n",
" cleaned_text = re.sub(pattern, '', str(text))\n",
" # 去除多余空格\n",
" return cleaned_text.strip()\n",
"\n",
"# 从第2列开始遍历并清洗\n",
"for col in df.columns[1:]:\n",
" df[col] = df[col].apply(clean_text)\n",
"\n",
"# 保存到新的 CSV 文件\n",
"df.to_csv(output_file, index=False, encoding='utf-8')\n",
"\n",
"print(f\"处理完成!已保存到:{output_file}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "f03c96de-8bff-4b41-933a-9d9775bb7ad8",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"# 加载CSV文件\n",
"file_path = 'info_data_cleaned_split2.csv' # 请修改为你的文件路径\n",
"df = pd.read_csv(file_path, encoding='utf-8')\n",
"\n",
"# 定义新列名\n",
"new_column = 'Previous_Row'\n",
"\n",
"# 初始化新列\n",
"df[new_column] = None\n",
"\n",
"# 遍历每一行\n",
"for i in range(1, len(df)):\n",
" # 检查每一列是否为 NaN\n",
" if df.iloc[i].isna().any():\n",
" # 将前一行的内容放入新列中\n",
" df.at[i, new_column] = df.iloc[i-1].to_dict()\n",
"\n",
"# 输出处理后的 DataFrame\n",
"print(df)\n",
"\n",
"# 如果需要保存结果到新CSV\n",
"df.to_csv('info_data_cleaned_split3.csv', index=False, encoding='utf-8')"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5838e3fb-0546-4433-aa82-0ed367ad05e0",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"id": "5f91c155-92b7-4c82-863f-6c942e8df58a",
"metadata": {},
"source": [
"## 2.20"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "c708560b-13c7-494e-b6fa-96c038ae36f5",
"metadata": {},
"outputs": [],
"source": [
"synonyms_file = \"synonyms.json\"\n",
"with open(synonyms_file, \"r\", encoding=\"utf-8\") as f:\n",
" synonyms = json.load(f)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "2f6e0cc6-7d6e-46cd-a2bc-561fb193ae92",
"metadata": {},
"outputs": [],
"source": [
"synonym_to_standard = {}\n",
"for standard, values in synonyms.items():\n",
" for value in values:\n",
" synonym_to_standard[value] = standard # 反向映射 { \"表单\": \"报表\" }"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "4199840f-1d36-4328-969c-f973103b437e",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "122c3629-ffb1-4deb-8df1-4be71baf82d0",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "7e88830a-663e-44b6-9b4d-452ac174e046",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "e5273065-8470-4a77-8e5c-2cca33cf2e5f",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "dify_lab",
"language": "python",
"name": "dify_lab"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+441
View File
@@ -0,0 +1,441 @@
d0369d15-d253-4145-9602-bf6b0e569702
18029854-ab4a-4690-a472-9829f1928973
ee4ee32f-9934-458a-a39b-11cd8cb58e09
19c625da-37a4-407e-afef-af65eb597b54
664f26bb-cf00-4451-8f0f-4f681d0467dc
31b2818e-8b80-472a-90f6-d31460d7dde7
b53dc306-6a0c-4282-963b-e42fea08a3b4
8a57d156-99be-423e-964b-15b4887cfc0d
37cf1ef5-0712-4529-ab51-26ef0f7854b5
196a52c8-bfcb-49e9-bc34-4b1ebe487c27
ae04ccee-7b70-4f32-bef3-3877a66158d1
e43ea3c1-65a0-40ba-a3ac-2e44b331b5cb
808765cf-f424-4c41-9cc7-983594e4cb85
a5b94380-b4f9-4be5-9b0b-8eb4f8f39553
ed554c95-2cb4-449f-8622-432c975c6b44
2dff208a-e909-476a-a152-737c8ab613f1
5c7f8af1-d560-4b98-828c-ba11fc55bb6f
d5b05fc3-a24c-4149-ac92-1feb3ac06a7b
d9e76e20-b017-408c-a425-85c0f023a58a
325a853d-93fb-4e9f-9eba-db08cb777f42
ca92a77c-d19c-4b71-8eb6-7f836aff3daa
d49f60b1-f780-41c3-b176-138bcd120e62
28f0c17f-5bb1-4a71-b07d-44716f594c26
82fcc53f-9638-4f2d-81f6-b50b2500fe26
55477319-b092-43b5-a1dc-d892991e4cdd
c6cf2912-f670-4b4a-9a48-e4815ab6df3d
ad0ff32d-4be4-4734-9dcf-068661babef0
24811a53-68bb-47b9-b521-fddb80ca3b23
97acf39a-76a5-4097-9552-1b750315a36e
8e408ff4-bd34-4787-aadf-97b8fdd41378
9fb79f6f-1b83-480b-b088-50a3e38bd06e
23793f4e-732b-435f-aa51-029314bc6d1b
2158079d-3a46-4a59-bda8-b7a9f790e068
cadca83e-20c7-460e-873e-df791121b5fe
80154dd6-8e62-4a39-b210-4545c3d1a121
69d99d72-f4f8-47cb-a34b-d22cfa35c381
8f1c18df-a59c-428a-b0ad-95ff698b34e8
4224d229-d84b-45cc-bf07-b9144ab85b4d
9d0fe1ed-cc2b-4fd4-b171-c430ac22955f
5393d050-f873-45ff-9fc9-be3c058bf81e
ec2563ef-21c6-4ac2-bba0-40b5e5f8e085
326ebfe2-76c3-4653-a214-2a6983d992ad
e4cf9794-e8d4-4eb5-a6f0-3114c0851539
2da1b373-7500-46e2-80cd-dc96e00123f3
ce0e8610-4e24-4442-a008-087ca0348cde
57465182-d89e-44f2-8a7d-f56ed7edafa2
ab51c34d-7d2a-4495-9d46-06b284031291
3d425f5a-eb29-42d6-8568-6666f31cca94
765f4020-3d8f-4594-ba67-534d103e5ca4
db02acc2-efee-40eb-be51-e0cda52f5c48
7363a712-9b73-47ba-85fa-d64fb6956b34
56c5856f-b0de-48af-8d8b-ea2285cf9480
35f33074-57ba-4bae-ae6a-69ef6b155e3f
5c773f50-03b0-4283-affb-ff66cf2e60b3
ef78199d-d19e-4907-aa6d-9c05b23010c7
4e4a7d18-673c-4ab3-9806-29f4072883b8
30971e2c-42a1-4daa-a936-de652478337b
95471a7e-8d93-43d9-813b-25396af40e90
78a5c788-cdf6-4edf-94fc-060262df3f7b
4123c61e-0cd2-455d-a47b-34028044112d
e2d7aa26-2dc4-404e-b04f-feb3fb74bee7
4c88fd6f-85dc-4f14-a53e-90178d58d159
2d145c43-320b-45f4-8fdd-85ca667fee03
8ade9a72-0ad4-4cb5-b6b3-94ccfd84c75f
d7627ff3-2631-47fc-8587-3ebb966b8dc0
348cbb07-ac51-42c1-b2d7-9f62a16f6e44
eb9f798e-5037-427f-a6e7-4f99e16eb77e
f580fc6f-c395-423d-8c2b-56fd091dec02
e672c95e-c8ef-473c-9fef-cc6bbf62adbf
bb9354e0-cc2a-4313-a848-44ae5d21170a
21e5b255-6579-422c-81a8-44f047a4c038
b4acfe9b-ec3b-4be6-b6e0-329e14a70efb
39a95292-0c49-465c-8f62-783719c3b961
5cb49b96-c897-4c74-b5e2-95652e50af61
248e139e-5aa1-423a-9262-66838c8399d5
c9d76601-8a1f-48b2-88ae-5d1e14a45744
8281c84b-42fc-4e44-ac5c-4dd8d01854eb
ca8fc70b-7b90-40bd-82a2-5ec78cda7804
76c116fe-8945-4c08-886e-2910ded66c64
689ed3aa-e724-4cc1-b661-c20a307d1a7d
8ad2c706-6ffd-4c09-90e6-0a604402834d
a3082758-1293-4aab-93d1-661024335dc7
a48abdc4-ee68-4147-a997-2ff0f029bcd0
bd04889a-1a78-43d7-9019-bbd6999e7084
8d2fffec-6dcc-4e23-869b-8e0246de6ea0
b0be2218-9cb9-42fa-b748-c6786bfe8c36
e087504c-8ded-4b36-834f-3937aa6189f2
be55b648-b419-476d-a255-921f06209f49
202cff0a-6120-4f63-a459-1eeb4c7c336d
ad5216aa-48fd-40ce-8a7c-e135b72a7160
f028924d-208f-4533-8000-687ae1778d06
f0ef075e-4a23-4929-a49d-9e7e5337b228
1102d13c-bd21-4461-b990-e25aacc20e33
420b1ad6-a2b7-4927-a2c0-b19c47092dbf
1594fd3d-32ec-4bf9-b711-7c116dfbc286
d4394716-abce-4c0b-aa16-95730a3ec5f7
aca6cd34-ff12-4cb5-bcba-83e692a61d03
26b5046b-51b8-4937-bcdd-60d81c7cec97
ef412e6d-3721-4aa3-b2ca-9cd9020e676b
9a5bbdcb-8109-4673-b360-0b6487bda6f3
179b9b24-3a77-49a1-bbab-23126dbe6c17
bb5494bf-2280-4964-a3a3-ba6d60d1741b
fa8d0f07-ee02-4c14-94e6-d4cc4eabe44a
4af23d3c-65b5-4624-bf5e-f0a30b49f29f
5190cc9f-c441-4bf5-8074-1a558db564cc
79518452-7d09-4717-872b-0fbad9b3423b
896e2cf0-ee33-4c14-a315-f24e8ea51630
87e683f7-16de-4919-9bbb-3a636fb079aa
00235513-4f02-4591-a10f-db5ee9c13d19
1ae6b07a-10e3-4af0-942b-095ccc5b7d2d
614164d8-bc1c-45b8-b5f1-7fa4feba3db8
83a18c7f-983c-411e-82be-3cbbffde0d3d
cee8cbba-2ac3-4b46-bb4a-53e83b1a463b
fbdc354d-0447-4e38-b923-2e3d64827c21
d5b65458-dbe0-47b2-b773-a7c615dd6695
652239cf-a7ab-49ee-ba3c-ae5da0ff6158
31fdf585-dd5e-421c-8b71-b4ac96385a34
906a964e-bb27-439e-a6ca-d69ca746f214
2a60ed3c-a52d-4575-84de-795b3c1dd150
64e51a9a-e3b8-4392-a674-1b9bb4eead45
0ca4f4f1-fb48-4299-826f-a069ed992c32
6bd5b1cf-df44-4448-887f-641ef79f7c3d
fd6f027d-a121-4cbc-813c-3c398d25abc0
fc468c88-47d2-48f4-96ec-b1227d86bf4e
fe93a0d3-5787-400d-9e07-ee25ad84ae32
549c50ea-4460-4b3e-88e9-0888df390a49
eb215fd8-1bf2-4dd8-abca-58f6dd01c8ac
81ccbf4d-8ffb-4da0-a7de-556fb9763ee9
d6874b8b-f7f5-4c72-86bb-9819d9a74a68
257a9f97-f14f-4689-8d4f-042127d6f10d
25add8c7-1fac-4f77-a918-91038bc723ce
fb270bb0-09d9-49b2-bad8-e70888aae82d
46713dd6-bea9-471d-806d-1bb38631cc2f
86e7cf08-2019-412a-be38-e2d5d9ed94a1
e39e8d6c-a85f-4268-b586-4840e144178c
9e5fd306-05af-4101-b1f4-cd7553a16808
a48561fa-907a-4916-ac4b-5d641f8805a3
4f2b2280-3a16-4380-b323-e79971dd7d0f
417c81ee-ef55-4b3f-86ba-d2bccc7b36b4
5d65ecca-5ffd-495c-a7cc-066761b96301
817fe65c-4828-4222-a44f-f696cb2b2cdc
5c5bf073-174a-43c6-b401-743322a9ef1e
eb537d4e-55f0-41f2-8636-3d571a6fa331
9b46d796-dadc-4e59-97b5-b8c21f877c4d
389f387c-4b77-44d1-a262-2a6a4e31bb45
58857488-fb47-48b9-9039-c28c93b43c99
7ec6a2ac-0414-4cce-adfd-2808545f7d27
ae2c7997-c16c-4f40-9037-615fdfae852c
4f3b650d-cb61-44b5-a611-d907cf56a210
14fed434-4806-41e0-a935-e74be3512660
febc3586-bcbb-4417-98d5-66d8dce2d30c
fdef0a02-f996-48c2-b15b-0bccbe53acad
21b435af-a4ab-4165-b3f5-da0d37fe4812
42ca21a7-b679-4ca3-ab20-e15dce9a0e5c
e7584381-b093-407c-adbc-6a25ba7759d6
f17778bf-087a-43c2-979d-d36b56d298a7
b2102504-8990-4aff-9703-1c61e45a257f
cad9f667-fa47-482d-a843-7f6c5cb1cf70
be2a1ea6-529e-4210-a141-382fce6609b0
a0209cbf-23ed-4771-bd8d-10ef56995825
e82cfeda-9b4a-4140-9f6e-e0b2f9d115b9
3b7df38f-746d-4fc4-b401-fd904ef81d89
bec2694f-4be4-43ea-b2af-28134f765d61
7b91071a-ec3e-409a-806f-18eb16303b3b
f3442f6a-d4c5-45e3-b61c-9677958a0469
65c33e78-23c9-4b54-a119-d3d7631b810d
dc66a039-fe55-49eb-bad2-80cdde2c42f3
584edf9a-2df7-4ffb-8e49-7c15e3181e1a
f8e8d517-5d76-480d-80be-695fc07bdb3c
ff60770d-b556-4cf6-9e8e-09673e10f8ec
a911cbbd-eb78-47f1-be3f-bea4a2055109
628fbdc8-8917-45a3-a2ab-20ef3092523c
6fd5e9f6-d8a0-4238-9870-4292abd9eedb
931c0e78-35c5-4fc3-a079-23a2a70ee9ec
bb278688-aa52-43a2-ac44-8d27d934e0a3
ded99b2b-90a2-4d5c-9137-e06e2ae809f2
e0058e0e-f1c5-4daa-ba26-10a1471a1be4
0345f2b3-a6e4-4c47-88a9-e291f24f63eb
d7eb12d3-39be-4104-a35d-73c41701582e
3c6bf0a4-edc3-4711-924e-b05da900c581
e4e8d500-e6e7-4bb3-abf9-553c736688cf
7d9c1eaf-4658-483b-b872-b4c1fb31378d
c0e0beba-29d2-4d7c-86b1-b679d463c430
3d74e8e5-1c82-453b-883b-373aa8b43304
56dc2047-898a-4fd2-8856-d7994e10339c
408cfe56-8e3d-4b0f-846a-0746e4eb14f5
06b97953-a4d9-4d3b-821c-6ee525706d51
4aecaa73-950e-46ab-ad52-6495a0cc26e6
90a2624a-5365-444c-943c-7163f43183cf
db400ef3-c974-4e5a-aaa0-e3d1718b657d
ca9dfd7a-832f-4888-9416-74953e47960b
6e714b69-2d02-4a11-b04e-6a72057f8b04
6d44bede-24de-4244-a984-d8601d3d1596
062c83d5-8180-47d1-b9ca-433461e5259d
423f5c43-b349-415d-a861-6ac558c764f9
757b01fa-bc4e-4feb-84c3-ed09c1ed3eef
f37bddaf-a028-4bb3-9031-83f7269690fe
63244d62-99dd-4652-87b5-40347c6da3a6
181f6bce-efb8-4b21-a9b7-646502bd5342
27d53909-40ea-4e74-af3f-8f0dc7858bf5
5af834f7-b82f-4c47-9e22-cff09718aa7f
582bb110-4c07-4ea1-b87c-371df6b6a2ba
3a3d25fc-cc39-4172-bc6d-359446406899
4f573fb3-9ce5-40ce-bf70-7f35e3635f86
5e9c9fb1-4f1b-45dd-bff7-d130a847dd26
56fe5603-3acf-4088-ab0f-f5b321e5e8e1
c71bdd6e-083f-4ebb-b3e0-14065ee3d998
4318d1bb-6b6e-45cb-8235-50b3032c74d0
ea7aea43-5420-4cea-9381-98e69c1eeff1
6f82d565-b937-4b41-806c-419c4f5afa15
86447bd9-0f6c-46bf-9ca2-0b2b501792b1
80e36a66-bcd9-490d-8924-d4fe8219e638
d9be8e27-87e0-4128-81e0-179d7dfe7476
25c20bbb-11ac-456d-bcc9-d42707979b81
003fb98c-744e-48f6-bc71-1af624524d8b
346940a8-953a-4288-b851-03845c6ef374
68e13e53-0dc1-4704-acda-9931c8b139fa
48fbea63-6cf5-46b4-911d-b75b6527e1b5
2a03b815-1462-4b51-bd55-26e13d812f26
5532912f-6d9f-4a54-a399-c1d28d1e6e7a
eaca5d90-6397-4a44-8483-1f6ee5a1f128
2557bb5a-6962-40e7-8861-ff1f4671e95e
98c9cc38-d3bb-4db7-8aa1-2349a0ef8efe
41c9dec9-5f7e-4816-b40d-4a89444ba5e1
42a50bd3-6646-4651-a2ea-e3ff9ed90113
d6de3227-f693-461e-9043-2ec490c130e6
01ba5e24-b56a-4c30-80c9-2daf5d5d4e57
10c8ff0f-b825-4d7d-80e1-725635c16059
8fea73cd-7ba6-4a8a-be4e-8a629d781b31
790d1266-decd-4e71-a995-7f4c13ec8f07
c99a5cb7-70f4-4540-a840-3622354c607c
314f532f-cc0b-4af7-93d6-a00c6d24379c
c5332cf2-8130-4e17-ae47-844ee4e3f47a
05611430-217e-49d3-861f-3a8915c7ea3f
2914f17e-9f93-4da0-9a34-b15c8ccf4137
3a85b346-1a12-463a-b6dc-d7784c051623
c8f608fe-10cc-4fcd-8eac-f732a6bb6377
37a890b4-69b3-43b9-9b9a-937bca09fe75
f4669c65-c4af-459c-a7d8-e941c0fddb7e
157e852a-4db5-4e1a-9196-3b949ae8ffef
67d208ea-3fc6-47aa-be38-4a5239d47bda
da920819-421d-4971-aa8a-47dd7b97719e
d56ef45a-8799-4a94-aecf-f01e1e731f95
634be1db-15fa-40f8-a3f7-6eb797495385
fcac387f-9198-4f50-9e01-34e3cb9a710f
31181838-da43-41c7-80df-ccf4ca4fe6be
01fcb94e-997a-4823-97ec-0af80ece7788
205cbb2b-3e0c-42ce-a056-ab57b99d720d
c05f7c63-cdab-49d7-9344-2f13d4804d82
3616dc1f-5263-48eb-a72c-f94be077dbe4
7f977683-36c0-4183-a187-0bb747720bbc
31a4d452-b23f-4f37-b2cb-8b659aee4a2f
bd625689-2601-404d-abf4-20511cdcb2f7
a60e9b27-bb50-455a-bee8-63b171dd712f
8954ec6b-a34c-48b4-b03a-dd0a43ed1748
070534fd-031b-4a5d-836b-56499dbb354a
cdaada11-d68f-4c9d-b71f-d73adcf7b839
a9807f0c-e28c-4172-b88c-7e20c09b08aa
bb7e07d4-fce1-493c-88f6-e0c3cfaf6347
129020e6-df41-4bf2-94ce-0710925aeff2
f5e13f61-816c-4b68-ba68-d7a7a360721e
20d10002-e9f0-435e-b3e9-05a41718505d
beb3135a-615d-40eb-9fd4-d3155dd15835
6c571fff-ebe0-4203-b54d-85b794776f3b
5c1819b2-2734-4b3d-aff7-57199e3f9c0a
4658a37e-952c-4117-8674-10b31d8cda98
69139b58-5efa-40f2-a490-4ad56aa3a0a5
3a908824-c383-42e2-8e46-61b11b5f1a7c
18a4121b-511d-49d2-89a1-fee8f843af9c
ea6ecbf8-e9ca-4d9b-a0fc-ce626af55960
ff9f61a8-b511-4050-8ae9-9f1c9755b712
10ab67dd-214a-4358-9cd9-15c174d6d824
1c098581-744c-4d7a-84d9-7c0f1e440afe
5e2f036a-cac0-433f-9465-a018951a4579
353aef73-fb43-47a1-bd18-4840f241254d
52199a90-57aa-416d-8fcc-dc625e47dd60
615ffa8c-c618-454e-9542-2b8903393a32
6c832574-3de7-4d2c-9440-b25637af7877
921c4b58-c85c-4d06-9292-7746ac1ff7fe
78dfc8e8-ce40-4b82-b7d2-b0cad73e91a2
988b10be-27a5-4690-bffd-dca42e9a6b21
c6d9a1c4-fa6c-4d36-bb0b-582fb63dec3e
f86676f1-6b8c-4045-984d-71b8ab83629a
8d0caebd-c615-42eb-afb7-e177994aec9e
c42de5bb-0c79-4210-9eb7-6afdbc51cea8
390739f4-cc37-4de8-b192-199560930d0d
73fc4983-1b5a-4e0e-91db-f54ff5b59e32
b7fbb359-fbac-41ab-b48f-ca6764cc86d3
3910c36b-4e62-473f-94ab-b3df4d2e1562
563c39fb-7cec-42f5-a0c4-d6d44c2007d1
259d2651-e506-41be-a33f-d907af6d2328
73d6f40d-d2ab-4ce2-aa6d-a3791ad6fa3e
ae803e91-cf1d-49ed-bec7-2bf50d478c83
2cf39547-b572-42b2-a2cc-4e3f100d8813
d18f2802-8faf-4180-ae8b-31c6d7084e32
21bdcdf0-d4ef-4a8d-b61a-b6d94b92b7e3
24583d1a-7c48-4ba7-a8a6-3a5af7b62f03
00b45041-8c34-4021-83de-108fc79a0e6d
cd9c7811-dec1-417a-96fe-6771ba7e9e84
d6939938-81b0-4d77-8e77-361db0d00f73
f61ea806-5ebd-4293-9fae-f1dd93c6c245
2bfd0976-4059-4e11-b807-748092701c05
ed082668-ff1b-4ff2-bd6b-e5d6a900cff7
c02cb5f3-7746-4614-8afa-db7d0a6252e9
c2675ee6-0c51-4fbc-8193-505f0375c22b
3128dd78-2174-4e7c-9b65-513bba4f4de7
09dfb2fc-7230-40e0-ac58-c7a61ed2eb93
727ca945-d962-4001-ba85-cd15bb71911c
7d20e74b-5648-44bd-994a-d4b48c811502
309c8c84-cb5e-45a5-baee-3242c70044ea
49c6a6f9-4546-4b63-80c9-446f9d099adc
037599ce-e548-4fd2-97f0-c7718d4f5b0d
093e68d1-3121-4de4-b045-207fd5ea593c
ea6acd7a-5301-4591-90a1-08c478c753e1
fdcfa500-2ff4-4954-8470-b914ff674bcd
13bd19c4-e8bc-4928-9d8a-529c9efbc105
334ab8bc-9cab-4bbe-a99f-60b8921f9425
a549e5dd-2e89-410f-8db2-fbc4c1e7b809
6bdffafb-8653-4c94-bc32-5156eb9a1e2e
79c3511c-443c-4c01-9f32-b28790416324
81e9b592-f360-46ce-996d-250f4cbf09c5
a0e91035-df1b-4d70-ae7b-6fa396a63d03
9ac79064-bdf2-4481-bcdb-da2660eb64c5
3eac01d8-caaa-48a1-b444-df33519cbb15
03fc36d6-c9f7-4e95-9b1f-0f1e3f4199bb
cb738e51-6acf-45a0-97ae-941b6629bc76
616016c2-217e-4a2e-a38f-1cba1a37bb7f
be770126-cfbc-4bdb-9867-89f7e1ecf84b
9aee2109-7609-42be-9753-09a61a2703c4
8a533dba-a3bb-4687-9504-fec345c6b44f
6aded4d0-792e-48fd-943d-3b767cb1a1c1
8ab380fd-1211-41b8-a5ed-4d59dcec1739
337a389e-eb56-47d7-aa3d-55a417d40a60
b483f28e-065e-4868-a5c1-97fcf7259975
cbc7b7c6-e1df-46f8-9c1f-067e9d068a2c
fee9021a-9ae7-41e0-bfc8-38e2f947ef31
3f492562-9260-4234-83f0-8cc2793a0180
0c5869ba-7738-41de-884d-5bfe28595ad9
4fd1224e-6129-4349-917f-32fcffadeeec
a2d2d468-6646-441a-af64-57b50a32d9df
a0a592da-8e55-491c-8cee-50524d7dcbd9
cac9a838-175c-47dc-8a5d-3814d1522d27
382c27a4-d7da-4249-9dbb-63a83f6c1f52
3b8b3585-a060-4dd1-92d9-30ebbe29a50a
28fc5bdc-33bf-4df2-aded-dd9c75fd8208
3ca9322b-3a87-4558-91f9-cecbfde2ab94
dc055225-a985-43e8-ba69-0c46cafd099a
85455c5d-ad2f-4e56-8871-73927b65c246
d74140d3-eb57-477b-a803-afd47c04e137
89207279-3966-486f-b65b-371f9a5539d4
8f3c8487-8b67-4593-95a3-a2ee462c2bff
1822fdfb-a9ec-40b4-b023-2afb6dc7d9d2
77ab87be-85ea-44a9-be2d-6498104652b8
53b00c62-cd37-4ea6-9dac-27e1f3f8101c
31dc049e-2909-4d0d-9570-d7dedd92ab92
ef1e5f65-db44-4d02-937c-4ff7a39cedc1
ecb0d5bc-36dc-485a-a158-a1a3b595d560
4063aca1-28e9-4e82-88f0-d75426264944
ebb82c59-09ba-4c37-94f4-85be70db1379
e60471eb-5577-4b4e-aff6-5d858fb8428a
fd5640f4-4f11-44f8-8bf2-53cb6a6065ad
79e58ddd-f113-4360-9038-b6f157751571
700685e9-3a08-4a1e-a9ea-e500a78bffde
142164e0-ab18-4222-9340-825ad191edf5
9a4482a4-858c-43ac-aab2-86ff863549fe
45d4fee3-a569-4859-8d33-19087b1e2239
431233ef-00af-4f99-833f-635ddcc7839a
27a8f7bd-734e-4f10-8f32-a026b09b87c3
ccd9e4bb-1a41-4ea5-a824-e90ca3e141d5
3dfd93d2-77f8-4b56-9896-3ea75121eb4c
40d44188-db10-479d-bae0-e1926e7bd38d
9cfc49c2-c7cb-4297-8c28-f3d0c4461ac4
c4626e11-4ca0-498d-bd93-bad14fd7377b
f80d5d85-775f-4e0c-8fcd-ac07ad70b8d9
1909e015-dc65-445d-84ef-51d1ed18c63e
2deb46a2-b0db-4474-9e71-7991c4211553
16216eee-35be-4799-99ab-5d616ea73966
76126880-9093-442b-a260-5f1d0c21fc5f
7288f7a5-f329-461f-bfb0-0ca54a769630
e26a8c1c-8d13-46cb-b8f4-c59ad0f6831b
5174f68e-0ff0-4eb3-a436-69322488ac79
6dc7e825-7eb2-4fbd-a410-b2ef7c5df66d
3207d292-9fae-4f23-9bc5-eac3fa1c9418
3eda814d-f419-44c3-bf4a-c9889eb4a7f3
32336f00-6f93-4211-97da-c3745be76af0
c6b11e03-5579-4083-9874-f22eac0f9e2e
e990a336-e411-48e8-bb17-c41493273c6d
41c50bdc-ffbc-4fa6-8043-c7be10cc29ea
92b44c1e-b46d-40b7-b44f-bfe4d5be60af
9bb9302f-c6fb-413a-ab9e-10202b493392
95cd68c3-be40-4ec0-9321-379a4d6aa324
d77fd84a-0333-45aa-87ad-9a13fee8855c
8e191529-9cc1-4e5e-99b7-dc591fa7c87b
18ed97dd-e4c5-4391-9659-c75f988452d0
7f0918be-9b0c-4611-8247-1caa5f828160
71a41492-941e-4584-93b7-607fec9ec618
b06c5997-4c76-495b-ba39-4675d4c17ffc
7738fe46-cdf8-47f4-ab55-9631b03ffec0
3bb32b59-0811-4094-8e8c-face1ddd753b
b09e8441-8bc4-4090-8096-b6ef3128ff39
df8b41ec-536b-476b-a728-3080dfc8fa7e
da516f6f-5b96-4915-b9ca-37576414030c
277636e5-bcfe-41f2-9554-59b065c032e3
f7759b18-83a7-49c9-9e46-1dad7446af87
bbf1634d-9ff0-4ef4-be9b-1615521b59d7
569655a9-3251-4ca7-bb7d-b02af27be2e3
14379419-caaa-463f-a5b8-82839f5c1741
550081a0-6fa5-487e-9b4e-a1e30005f808
c53bba9a-223b-4c73-9cb4-0c437c09fffd
060463b9-304e-4950-8884-6dd6214242d2
621c16e9-1857-4384-a67a-23e4a445d713
d324495f-ef51-4e75-bfa8-098eeee6d696
e88ca2e1-de4f-4ada-803d-769501a22c04
de1ed77a-c1f1-4a4f-a221-d6170f7c101b
fc5ec76e-88fe-4db7-b839-3b0e89f7bf12
1148fe68-7008-4bee-9a42-4f8f562ba4b4
248dc1ca-6001-4428-85f7-09dab9aa89d7
c94c6528-dd11-4b9c-8559-1d256ff8938f
c4adc93b-f2bc-496d-b0e9-64bd9fb22703
74f81d52-3467-4e26-bcc9-a21898c32e65
76941d34-be23-47c3-ae81-b87f1da9e05a
03fb90f1-7ab7-47e5-8f40-d3736ec1b621
27f2cdcb-54f7-4faf-aa36-0abd20bb0681
3a59551e-1107-4a13-891b-a5b1c389d83a
3557d35e-370f-4ee2-a33b-41b519ea098f
d871811f-676d-481a-bcb1-d33b747ceed3
215842dd-b87b-4fe9-8678-ce6d932da45c
884703d0-aac1-4b92-bb48-ff299019fcc2
04163972-b57c-4d50-a642-7680ebb01fb2
6a2fa446-6e27-4e51-8833-5f34f3a0ebcd
b3d17404-33e9-4e89-86ef-cd5f669439cf
ab4db8c4-f3dd-4413-a1c2-73e1ea5f4268
8bb6beec-933f-4077-a66c-d169276c8928
1a94c845-bfbc-4c24-a101-dfdea64beb0e
25cf0605-6e56-4a03-9824-fe392c79a411
be184369-1278-49c1-abe0-d161bb38fbea
faaadec1-6fb8-4eee-9ae6-84f6dc1e931f
b8bc190d-6133-41c1-84a9-fe1eb9603d52
fbd34ed3-e5ff-4626-a7a0-1f94e4ccde24
dbcab2d8-69e3-4253-87fb-4ef826ae6a60
362de6fc-4891-472b-ab14-3545e2859659
+316
View File
@@ -0,0 +1,316 @@
import psycopg2
from psycopg2 import sql
import os
import json
from datetime import timezone, timedelta
class PgSql:
def __init__(self):
self.connection = None
self.connect_sql()
def connect_sql(self):
try:
# 连接数据库
self.connection = psycopg2.connect(
user="postgres",
password="difyai123456",
host="172.20.0.145",
port=5432,
database="dify"
)
except (Exception, psycopg2.Error) as error:
print("Error while connecting to PostgreSQL", error)
def find_rating_by_message_id(self, message_id):
cursor = None
try:
cursor = self.connection.cursor()
# 构建SQL查询
query = sql.SQL("""
SELECT rating
FROM message_feedbacks
WHERE message_id = %s;
""")
# 执行查询并获取结果
cursor.execute(query, (message_id,))
record = cursor.fetchone()
if record:
return record[0]
else:
return None
except (Exception, psycopg2.Error) as error:
print("Error while fetching data from PostgreSQL", error)
finally:
if cursor:
cursor.close()
return None
def find_message(self, conversation_id, app_id, output_dir, date=None):
cursor = None
try:
cursor = self.connection.cursor()
# 构建SQL查询,根据是否传入日期和app_id进行条件过滤
if app_id and date:
# 如果 app_id 和日期都存在
query = sql.SQL("""
SELECT id, conversation_id, query, answer, workflow_run_id, created_at
FROM messages
WHERE conversation_id = %s AND app_id = %s AND DATE(created_at) = %s
ORDER BY created_at ASC;
""")
query_params = (conversation_id, app_id, date)
elif app_id:
# 如果只有 app_id 存在
query = sql.SQL("""
SELECT id, conversation_id, query, answer, workflow_run_id, created_at
FROM messages
WHERE conversation_id = %s AND app_id = %s
ORDER BY created_at ASC;
""")
query_params = (conversation_id, app_id)
elif date:
# 如果只有日期存在
query = sql.SQL("""
SELECT id, conversation_id, query, answer, workflow_run_id, created_at
FROM messages
WHERE conversation_id = %s AND DATE(created_at) = %s
ORDER BY created_at ASC;
""")
query_params = (conversation_id, date)
else:
# 如果 app_id 和日期都不存在
query = sql.SQL("""
SELECT id, conversation_id, query, answer, workflow_run_id, created_at
FROM messages
WHERE conversation_id = %s
ORDER BY created_at ASC;
""")
query_params = (conversation_id,)
# 执行查询并获取结果
cursor.execute(query, query_params)
records = cursor.fetchall()
# 打开文件并写入查询结果
output_file = os.path.join(output_dir, "conversation.md")
with open(output_file, 'w', encoding='utf-8') as file:
for record in records:
msg_id, query, workflow_run_id, answer = record[0], record[2], record[4], record[3]
# 假设原来的时间是 UTC 时间,给它加上 UTC 时区信息
if record[5].tzinfo is None:
record_utc = record[5].replace(tzinfo=timezone.utc) # 添加 UTC 时区信息
else:
record_utc = record[5]
# 将时间转换为 UTC+8 时区
utc_plus_8 = timezone(timedelta(hours=8))
create_data = record_utc.astimezone(utc_plus_8).strftime("%Y-%m-%d %H:%M:%S")
message_rating = self.find_rating_by_message_id(msg_id)
if message_rating:
file.write(f"## ({message_rating}) Query: {query}\n\n")
else:
file.write(f"## Query: {query}\n\n")
file.write(f"Workflow_run_id: {workflow_run_id}\n\n")
file.write(f"create_data: {create_data}\n\n")
file.write(f"**Answer:**\n\n{answer}\n\n")
# 调用find_workflow_node_executions函数生成workflow_run_id的md文件
temp = self.find_workflow_node_executions(workflow_run_id, output_dir)
return temp
# print(f"Data has been written to {output_file}")
except (Exception, psycopg2.Error) as error:
print("Error while fetching data from PostgreSQL", error)
finally:
if cursor:
cursor.close()
def find_conversation_ids_by_message_date(self, date, app_id=None):
cursor = None
try:
# 创建游标
cursor = self.connection.cursor()
# 根据 app_id 是否为空,动态生成 SQL 查询
if app_id:
# 如果 app_id 存在,查询指定日期和 app_id 的 conversation_id
query = sql.SQL("""
SELECT DISTINCT conversation_id
FROM messages
WHERE DATE(created_at) = %s AND app_id = %s;
""")
query_params = (date, app_id)
else:
# 如果没有指定 app_id,只查询日期对应的 conversation_id
query = sql.SQL("""
SELECT DISTINCT conversation_id
FROM messages
WHERE DATE(created_at) = %s;
""")
query_params = (date,)
# 执行查询
cursor.execute(query, query_params)
conversation_ids = cursor.fetchall()
# 将结果转换为列表并去重(防止极端情况下仍有重复)
unique_conversation_ids = list(set([conversation_id[0] for conversation_id in conversation_ids]))
return unique_conversation_ids
except (Exception, psycopg2.Error) as error:
print("Error while fetching data from PostgreSQL", error)
finally:
if cursor:
cursor.close()
def find_conversation_ids_by_date(self, date, app_id):
cursor = None
try:
cursor = self.connection.cursor()
# 构建SQL查询,如果app_id为空则不包含app_id条件
if app_id:
query = sql.SQL("""
SELECT id
FROM conversations
WHERE DATE(updated_at) = %s
AND invoke_from = 'web-app'
AND app_id = %s;
""")
query_params = (date, app_id)
else:
query = sql.SQL("""
SELECT id
FROM conversations
WHERE DATE(updated_at) = %s
AND invoke_from = 'web-app';
""")
query_params = (date,)
# 执行查询并获取结果
cursor.execute(query, query_params)
conversation_ids = cursor.fetchall()
# 打印或返回结果
for conversation_id in conversation_ids:
print(conversation_id[0])
return [conversation_id[0] for conversation_id in conversation_ids]
except (Exception, psycopg2.Error) as error:
print("Error while fetching data from PostgreSQL", error)
finally:
if cursor:
cursor.close()
def find_workflow_node_executions(self, workflow_run_id, output_dir_):
cursor = None
try:
cursor = self.connection.cursor()
# 构建SQL查询
query = sql.SQL("""
SELECT id, workflow_run_id, node_type, title, inputs, process_data, outputs, finished_at
FROM workflow_node_executions
WHERE workflow_run_id = %s
ORDER BY finished_at ASC;
""")
# 执行查询并获取结果
cursor.execute(query, (workflow_run_id,))
records = cursor.fetchall()
# 构建文件名并写入数据
output_file = os.path.join(f"{output_dir_}", "workflow_run_logs", f"{workflow_run_id}.md")
os.makedirs(os.path.join(f"{output_dir_}", "workflow_run_logs"), exist_ok=True)
with open(output_file, 'w', encoding='utf-8') as file:
for record in records:
id, workflow_run_id, node_type, title, inputs, process_data, outputs, finished_at = record
try:
file.write(f"# Node Type: {node_type}(**{title}**)\n\n")
file.write(f"Node ID: {id}\n\n")
if inputs is not None:
inputs_data = json.loads(inputs)
file.write(
f"Inputs: \n```json\n{json.dumps(inputs_data, ensure_ascii=False, indent=4)}\n```\n\n")
if process_data is not None:
process_data = json.loads(process_data)
file.write(
f"process_data: \n```json\n{json.dumps(process_data, ensure_ascii=False, indent=4)}\n```\n\n")
if outputs is not None:
outputs_data = json.loads(outputs)
file.write(
f"outputs: \n```json\n{json.dumps(outputs_data, ensure_ascii=False, indent=4)}\n```\n\n")
# 提取出LLM的回答
if node_type == "http-request" and title == "RagasHTTP":
body = json.loads(outputs).get("body", {})
answer = json.loads(body)[0].get('answer', {})
file.write(
f"answer: \n\n{answer}\n\n")
file.write(f"Finished At: {finished_at}\n\n")
except Exception as e:
if e:
print(f"file write has error:{e}")
# print(output_file)
return output_file
except (Exception, psycopg2.Error) as error:
print("Error while fetching data from PostgreSQL", error)
finally:
if cursor:
cursor.close()
def close_connection(self):
if self.connection:
self.connection.close()
# print("PostgreSQL connection is closed")
def export_by_conversation_id(conversation_id, app_id: str):
pgsql = PgSql()
output_dir = os.path.join(".", "conversion_infos", f"{conversation_id}")
os.makedirs(output_dir, exist_ok=True)
finall = pgsql.find_message(conversation_id=conversation_id, app_id=app_id, output_dir=output_dir)
return finall
pgsql.close_connection()
def export_by_data(data: str, app_id: str):
pgsql = PgSql()
list_conversation = pgsql.find_conversation_ids_by_message_date(data, app_id)
for conversation_id in list_conversation:
output_dir = os.path.join(".", "conversion_infos", data, f"{conversation_id}")
os.makedirs(output_dir, exist_ok=True)
pgsql.find_message(conversation_id=conversation_id, app_id=app_id, output_dir=output_dir, date=data)
pgsql.close_connection()
# 执行脚本
if __name__ == '__main__':
# export_by_data("2024-08-21", "your_app_id")
a = export_by_conversation_id("d0369d15-d253-4145-9602-bf6b0e569702", "")
print(a)
+1
View File
@@ -0,0 +1 @@
pyinstaller -F -w Dialog.py -n Analysis.exe --version-file=version_info.txt --distpath ".\Analysis"
File diff suppressed because it is too large Load Diff
+7
View File
@@ -0,0 +1,7 @@
# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
package = []
[metadata]
lock-version = "2.0"
python-versions = "^3.11"
content-hash = "81b2fa642d7f2d1219cf80112ace12d689d053d81be7f7addb98144d56fc0fb2"
+947
View File
@@ -0,0 +1,947 @@
{
"cells": [
{
"cell_type": "markdown",
"id": "d53385f4-0763-4d6a-a3de-4269a044115d",
"metadata": {},
"source": [
"# 1. 数据读取"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "88be1d08-21a6-4ad5-bdce-d77821cd790c",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"def read_specified_rows(file_path, row_index):\n",
" if file_path.endswith('.csv'):\n",
" df = pd.read_csv(file_path, header=None, encoding='utf-8')\n",
" else:\n",
" raise ValueError(\"仅支持 CSV 文件\")\n",
"\n",
" # 选取第一列并转换为字符串列表\n",
" selected_data = df.iloc[row_index:, 0].astype(str).tolist()\n",
" return selected_data\n",
"\n",
"# 示例用法\n",
"file_path = \"D:/博微知识助手400问分类_2.13.16.33.csv\" # 可替换为 Excel 文件\n",
"row_index = 2 # 读取第 1, 3, 5, 7 行(索引从 0 开始)\n",
"\n",
"result_list = read_specified_rows(file_path, row_index)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6f208b20-778d-4923-a463-4283bed21160",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"('多个工程需要统一修改定额中材料的单价是否可以呢', '可以导入多个投标报价新建全口径预算工程吗', 444)"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_list[0], result_list[-1], len(result_list)"
]
},
{
"cell_type": "markdown",
"id": "dd23a946-ae58-4ff7-b611-a37c61ca1504",
"metadata": {},
"source": [
"# 2.prefix_re"
]
},
{
"cell_type": "markdown",
"id": "1ae7e16e-2870-450f-a0b2-8ba2925e1cdd",
"metadata": {},
"source": [
"## 2.1 检测后缀名"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "b7866697-0c75-4abb-aeb2-f467b3eca50f",
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"\n",
"def detect_fields(input_str):\n",
" # 定义要检测的字段列表\n",
" fields = [\n",
" \"xzwb\", \"bxqd2\", \"bpz17\", \"zwqd\", \"bwpw\", \"BJGX\", \"bt2\", \"BDQ3\", \"BT2\", \n",
" \"gec5\", \"BDY3\", \"dwg\", \"bwpwz\", \"BDD3\", \"bt1\", \"bphq18\", \"zwzj\", \"bczc2\", \n",
" \"BPQ\", \"BPY\", \"BDQ3\", \"SXZB23\", \"SXZ\", \"xzwb2\", \"bpz17\" \n",
" ]\n",
" \n",
" # 构建正则表达式模式,匹配大小写不敏感且前面可能带有.\n",
" # 去掉 \\b 以允许字段是其他字符串的一部分\n",
" pattern = r'(?:\\.?)(' + '|'.join(re.escape(field) for field in fields) + r')'\n",
" \n",
" # 使用 re.IGNORECASE 标志来忽略大小写\n",
" if re.search(pattern, input_str, re.IGNORECASE):\n",
" return True\n",
" else:\n",
" return False\n",
"\n",
"index1 = []\n",
"for i in range(len(result_list)):\n",
" if detect_fields(result_list[i]):\n",
" # print(i, result_list[i])\n",
" index1.append(i)"
]
},
{
"cell_type": "markdown",
"id": "5854667e-f149-4eb3-b97b-5daa4056a19c",
"metadata": {},
"source": [
"## 2.2 检测“锁”"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "c3be0a91-2a50-4669-95b4-925d84bbad0a",
"metadata": {},
"outputs": [],
"source": [
"def word_query(input_str, target_word=\"软件\"):\n",
" pattern = rf\"{target_word}\" \n",
" return bool(re.search(pattern, input_str)) \n",
"\n",
"index2 = []\n",
"for i in range(len(result_list)):\n",
" if word_query(result_list[i]) == True:\n",
" # print(i, result_list[i])\n",
" index2.append(i)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "c137ffce-aebe-41bc-afb6-fe31788b35c2",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"def word_query(input_str, target_word=\"工程\"):\n",
" pattern = rf\"{target_word}\" \n",
" return bool(re.search(pattern, input_str)) \n",
"\n",
"index3=[]\n",
"for i in range(len(result_list)):\n",
" if word_query(result_list[i]):\n",
" # print(i, result_list[i])\n",
" index3.append(i)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "b6a68005-ce58-48e3-88d7-dd9564d7a81c",
"metadata": {},
"outputs": [],
"source": [
"def word_query(input_str, target_word=\"计价\"):\n",
" pattern = rf\"{target_word}\" \n",
" return bool(re.search(pattern, input_str)) \n",
"\n",
"index4=[]\n",
"for i in range(len(result_list)):\n",
" if word_query(result_list[i]):\n",
" # print(i, result_list[i])\n",
" index4.append(i)"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "1c053ae1-dff0-4ff8-9039-c64d8246e402",
"metadata": {},
"outputs": [],
"source": [
"def word_query(input_str, target_word=\"配网\"):\n",
" pattern = rf\"{target_word}\" \n",
" return bool(re.search(pattern, input_str)) \n",
"\n",
"index5=[]\n",
"for i in range(len(result_list)):\n",
" if word_query(result_list[i]):\n",
" # print(i, result_list[i])\n",
" index5.append(i)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "de72b59d-137e-4507-b91d-776f23f2cd1b",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"def word_query(input_str, target_word=\"清单\"):\n",
" pattern = rf\"{target_word}\" \n",
" return bool(re.search(pattern, input_str)) \n",
"\n",
"index6=[]\n",
"for i in range(len(result_list)):\n",
" if word_query(result_list[i]):\n",
" # print(i, result_list[i])\n",
" index6.append(i)"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "80704f93-65b3-439e-b688-d8ea74217e9d",
"metadata": {},
"outputs": [],
"source": [
"def word_query(input_str, target_word=\"定额\"):\n",
" pattern = rf\"{target_word}\" \n",
" return bool(re.search(pattern, input_str)) \n",
"\n",
"index7=[]\n",
"for i in range(len(result_list)):\n",
" if word_query(result_list[i]):\n",
" # print(i, result_list[i])\n",
" index7.append(i)"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "ff0213ff-eddb-4d25-90b6-d009edf94bfc",
"metadata": {},
"outputs": [],
"source": [
"def word_query(input_str, target_word=\"施工\"):\n",
" pattern = rf\"{target_word}\" \n",
" return bool(re.search(pattern, input_str)) \n",
"\n",
"index8=[]\n",
"for i in range(len(result_list)):\n",
" if word_query(result_list[i]):\n",
" # print(i, result_list[i])\n",
" index8.append(i)"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "5f08eedc-d642-45bb-82ec-3526f9616d68",
"metadata": {},
"outputs": [],
"source": [
"def word_query(input_str, target_word=\"技改\"):\n",
" pattern = rf\"{target_word}\" \n",
" return bool(re.search(pattern, input_str)) \n",
"\n",
"index9=[]\n",
"for i in range(len(result_list)):\n",
" if word_query(result_list[i]):\n",
" # print(i, result_list[i])\n",
" index9.append(i)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "789bd94a-7d21-4382-8c01-0f0373a66455",
"metadata": {},
"outputs": [],
"source": [
"def word_query(input_str, target_word=\"计算\"):\n",
" pattern = rf\"{target_word}\" \n",
" return bool(re.search(pattern, input_str)) \n",
"\n",
"index10=[]\n",
"for i in range(len(result_list)):\n",
" if word_query(result_list[i]):\n",
" # print(i, result_list[i])\n",
" index10.append(i)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "8cd839bc-a72f-4c35-bcf5-1f755bd16740",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"25 .bpz17对应的哪个锁\n",
"79 软件锁怎么自动注册不起了呢\n",
"80 问下锁激活怎么弄?\n",
"102 zwqd请问这是用什么锁做的\n",
"109 网络锁怎么登录\n",
"151 19-029758帮查下一下锁号\n",
"164 怎么激活锁保险\n",
"172 打开工程提示需要检修高级版锁\n",
"194 清单锁住了,怎么解锁\n",
"210 我想知道这个锁有问题吗,为什么激活不了呢\n",
"214 你好,显示我的锁号注册失败是什么原因\n",
"246 查询锁许可证号锁号19-079728,19-079718\n",
"249 我的清单锁不好用了,总是找不到可用许可证\n",
"251 配网设计锁不好使\n",
"254 2009年版的软件,插上了锁,怎么打不开\n",
"262 配网2017软件读不到锁,打不开软件\n",
"264 软件锁激活\n",
"297 插了电建的锁 识别不出来,打开软件首行灰色\n",
"303 软件是识别不到锁\n",
"310 主网造价国网清单的锁,接入结算后,费用和投标时候的投标价格不一样\n",
"319 想调定额量,怎么解锁\n",
"332 新的软件锁插入显示锁中未找到本软件可用的许可证,请问应该怎么解决呢\n",
"351 可以用网络锁登陆吗\n",
"360 清单怎么解锁\n",
"377 锁怎么激活呢\n",
"392 我有个新的锁,怎么注册?\n",
"419 请问清单如何解锁\n",
"425 我想咨询这我个工程后缀是zwzj 要用什么锁打开\n"
]
}
],
"source": [
"import re\n",
"\n",
"def word_query(input_str, target_word=\"锁\"):\n",
" pattern = rf\"{target_word}\" \n",
" return bool(re.search(pattern, input_str)) \n",
"\n",
"index11=[]\n",
"for i in range(len(result_list)):\n",
" if word_query(result_list[i]):\n",
" print(i, result_list[i])\n",
" index11.append(i)"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "54e94090-3acb-4550-80bc-01c2f8191ec2",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"306"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"index = index1 + index2 + index3 + index4 + index5 + index6 + index7 + index8 + index9 + index10 + index11\n",
"len(set(index))"
]
},
{
"cell_type": "code",
"execution_count": 32,
"id": "1b8b8559-ea36-454f-915d-da2122b8a620",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"other_index = []\n",
"other_content = []\n",
"for i in range(len(result_list)):\n",
" if i not in set(index):\n",
" other_index.append(i+3)\n",
" other_content.append(result_list[i])\n",
" # print((i + 3),result_list[i])"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "09634a84-0df0-4bf7-9926-3fccf30b588f",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"138"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(other_index)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "1229f23d-17de-4f1e-a48c-6bd8f2b77eeb",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(10, '怎样新增取费表')"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"other_index[0], other_content[0]"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "7dfaabd3-7956-4842-882c-16f2566563b0",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "ab78d90d-deaa-46e4-9629-212b1be5991c",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "4b0ce7ae-b9dd-4713-b39a-6ddd820914e3",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "2d0517bb-106f-410b-b34c-891597440219",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 6,
"id": "1166e47b-741f-44a3-a341-14440e015309",
"metadata": {},
"outputs": [],
"source": [
"from langchain_openai import ChatOpenAI\n",
"\n",
"\n",
"# Qwen/Qwen2.5-72B-Instruct\n",
"# deepseek-ai/DeepSeek-R1\n",
"# deepseek-ai/DeepSeek-V3\n",
"qwen_llm = ChatOpenAI(\n",
" openai_api_base=\"https://api.siliconflow.cn/v1\",\n",
" model_name=\"Qwen/Qwen2.5-72B-Instruct\",\n",
" openai_api_key=\"sk-muuqautpcyuowjtgfecbnivqodlhzydtfslqkmwbknawejsx\"\n",
")\n"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "03871a43-8859-47bb-bc67-6b137b3a7205",
"metadata": {},
"outputs": [],
"source": [
"from langchain_core.prompts import ChatPromptTemplate\n",
"from langchain_core.output_parsers import StrOutputParser"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "85179eaa-303e-44a1-aa3e-f7d84ac8be08",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|████████████████████████████████████████████████████████████████| 144/144 [03:48<00:00, 1.59s/it]\n"
]
}
],
"source": [
"PromptTemplate2 = \"\"\"\n",
"你是博微公司的电力造价员专家,需要将后续用户输入的对于多款软件产品使用和业务方面的咨询问题转化。\n",
"请站在电力造价领域将用户的问题转为书面化咨询语句,不要假设上下文,更不要尝试回答问题\n",
"\n",
"# 用户输入\n",
"{query}\n",
"\n",
"# 注意,不要扩展礼貌用词等等\n",
"\"\"\"\n",
"\n",
"Prompt2 = ChatPromptTemplate.from_template(PromptTemplate2)\n",
"\n",
"Chain2 = Prompt2 | qwen_llm | StrOutputParser()\n",
"\n",
"\n",
"from tqdm import tqdm\n",
"\n",
"id_info2 = []\n",
"for i in tqdm(result_list[300:]):\n",
" id_info2.append(Chain2.invoke({\"query\":i}))"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "6247506d-f131-4b7d-b04b-79e24f078989",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"144"
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"len(id_info2)"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "eccc683c-98b0-4132-8c85-5beeb871a640",
"metadata": {},
"outputs": [],
"source": [
"def save_list_to_txt(filename, data_list):\n",
" with open(filename, \"w\", encoding=\"utf-8\") as f:\n",
" f.write(\"\\n\".join(data_list)) # 每个元素换行\n",
"\n",
"save_list_to_txt(\"ceshi100.txt\", id_info2)"
]
},
{
"cell_type": "markdown",
"id": "4be940c3-e2ee-4efb-87e3-328cfaf59602",
"metadata": {},
"source": [
"# prompt"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "33c374c9-0ac7-460d-90ce-2a617a20e081",
"metadata": {},
"outputs": [],
"source": [
"PromptTemplate1 = \"\"\"\n",
"你是博微公司的电力造价员专家,需要将后续用户输入的对博微公司多款软件产品使用和业务方面的咨询问题进行意图分类。\n",
"并且站在电力造价领域角度上,将用户的问题理解意图后采用以下指定槽位结构填充将用户问题转化为JSON格式输出。\n",
"如果问题中没有给出对应槽位的值则为未知,而不要假设上下文,更不要尝试回答问题。\n",
"\n",
"# 用户输入\n",
"{query}\n",
"\n",
"# 一级意图\n",
"[操作指南, 规范解读, 费用构成, 其他]\n",
"\n",
"# 二级意图\n",
"## 操作指南\n",
"下载安装注册(系统环境要求、安装包的下载和安装步骤、激活码获取与绑定,离线激活流程、版本冲突、操作西戎适配性等)\n",
"软件使用操作(如 新建/打开工程, 数据相关操作, 报表生成与导出、版本兼容性处理等;以及计价通软件、造价软件相关操作咨询)\n",
"数据管理(如 数据备份与恢复,版本兼容性处理,多人协作权限设置)\n",
"\n",
"### 规范解读\n",
"国家规范(如 《电力建设工程概预算编制规定》, 《电网工程建设预算编制与计算标准》,预估相关)\n",
"行业标准(如 变电工程定额应用, 线路工程取费规则,主网和配网和技改检修的定额)\n",
"地方政策(如 地区人工费调整系数, 特殊材料价差处理)\n",
"行业知识查询(行业知识的解读查询)\n",
"\n",
"### 费用构成\n",
"费用类别解析(如 建筑工程费, 安装工程费, 设备购置费)\n",
"费用计算逻辑(如 直接费(人工、材料、机械), 间接费(企业管理费、规费), 利润与税金)\n",
"特殊场景费用(如 临时设施费, 冬季施工增加费)\n",
"\n",
"# 意图类别的槽位结构:\n",
"\n",
"## 操作指南 \n",
"一级意图: \n",
"二级意图:\n",
"software: 用户提到的具体软件产品名称。\n",
"functionality: 用户询问的具体功能或操作步骤。\n",
"specifics: 关于问题的具体描述或背景信息(如果有提及)。\n",
"version: 软件版本号(如果有提及)。\n",
"standard: 具体的电力造价规范或标准(如果有提及)。\n",
"context: 问题的上下文或应用场景,例如特定项目、合同条款等。\n",
"\n",
"## 规范解读\n",
"一级意图:\n",
"二级意图: \n",
"standard: 具体的电力造价规范或标准名称,例如《电力工程建设预算编制与计算规定》等等。\n",
"section: 规范中的具体章节或条款编号。\n",
"interpretation: 用户希望解读的具体内容或条款,例如某一条款的具体含义、适用范围等。\n",
"context: 问题的具体上下文或应用场景,例如某个项目、合同条款、特定工程阶段等。\n",
"software: 如果涉及软件操作,具体使用的博微公司软件产品名称(如果有提及)。\n",
"version: 软件版本号(如果有提及)。\n",
"specifics: 关于问题的具体描述或背景信息。\n",
"example: 是否需要具体的示例来帮助理解条款的应用。\n",
"\n",
"## 费用构成\n",
"一级意图: \n",
"二级意图:\n",
"software: 用户提到的具体软件产品名称(隐含为博微公司的某款电力造价软件)。\n",
"functionality: 用户询问的具体功能或操作步骤(如果适用)。\n",
"specifics: 关于问题的具体描述或背景信息。\n",
"version: 软件版本号(如果有提及)。\n",
"fee_type: 费用类型,如设计费、施工图预算编制费、竣工图文件编制费等。\n",
"cost_component: 具体的成本组成部分,例如直接成本、间接成本、管理费等。\n",
"inclusion: 是否包含特定费用项,例如是否包含某项费用或是否需要单独计列。\n",
"basis: 计算依据或标准,例如按工程造价的百分比、固定金额等。\n",
"context: 问题的上下文或应用场景,例如某个项目、合同条款、特定工程阶段等。\n",
"standard: 涉及的具体电力造价规范或标准,例如《电力工程建设预算编制与计算规定》等等。\n",
"\n",
"## 其他\n",
"一级意图: \n",
"二级意图:\n",
"qa: 问题咨询\n",
"ty: 闲聊\n",
"\n",
"# 注意:\n",
"1. 请按JSON格式返回,未知字段填'未知'\n",
"2. json的keys,一定含有'一级意图'、'二级意图',且无论用户输入上下文多少,输出json只有一个\n",
"\n",
"\"\"\"\n",
"\n",
"Prompt1 = ChatPromptTemplate.from_template(PromptTemplate1)\n",
"\n",
"Chain1 = Prompt1 | qwen_llm | StrOutputParser()\n",
"\n",
"####################################################################\n",
"\n",
"PromptTemplate2 = \"\"\"\n",
"请在电力造价领域角度上,对用户的输入进行指定槽位的填充,并转换为JSON结构输出。\n",
"如果问题中没有给出对应槽位的值则为未知,而不要假设上下文,更不要尝试回答问题。\n",
"\n",
"# 用户输入\n",
"{query}\n",
"\n",
"# 槽位结构:\n",
"\n",
"## 操作指南 \n",
"一级意图: 操作指南\n",
"二级意图:下载安装注册\n",
"software: 用户提到的具体软件产品名称(如果适用)。\n",
"functionality: 用户询问的具体功能或操作步骤(如果适用)。\n",
"issueType: 问题的具体类型,如“咨询费用包含内容”、“操作方法不明”等。\n",
"specifics: 关于问题的具体描述或背景信息。\n",
"version: 软件版本号(如果适用)。\n",
"standard: 具体的电力造价规范或标准(如果有提及)。\n",
"context: 问题的上下文或应用场景,例如特定项目、合同条款等。\n",
"\n",
"# 注意:\n",
"1. 请按JSON格式返回,未知字段填'未知'\n",
"2. 一级意图: 操作指南 和 二级意图:下载安装注册 是固定不变的\n",
"3. 无论用户输入上下文多少,输出json只有一个\n",
"\n",
"\"\"\"\n",
"Prompt2 = ChatPromptTemplate.from_template(PromptTemplate2)\n",
"\n",
"Chain2 = Prompt2 | qwen_llm | StrOutputParser()\n",
"\n",
"\n",
"# query = \"多个工程要修改建筑的材料和机械价格\"\n",
"# result1 = Chain1.invoke({\"query\":query})\n",
"# print(result1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "35f36068-b2a9-491a-8b35-292565aa0542",
"metadata": {},
"outputs": [],
"source": [
"Chain1.invoke(\"\")"
]
},
{
"cell_type": "markdown",
"id": "7c197c26-3a5c-43b4-9940-0cdbfcc69622",
"metadata": {},
"source": [
"## pipe"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "d06537d9-6cd2-45bf-8fd5-a0620d95db86",
"metadata": {},
"outputs": [],
"source": [
"import re\n",
"\n",
"def detect_fields(input_str):\n",
" # 定义要检测的字段列表\n",
" fields = [\n",
" \"xzwb\", \"bxqd2\", \"bpz17\", \"zwqd\", \"bwpw\", \"BJGX\", \"bt2\", \"BDQ3\", \"BT2\", \n",
" \"gec5\", \"BDY3\", \"dwg\", \"bwpwz\", \"BDD3\", \"bt1\", \"bphq18\", \"zwzj\", \"bczc2\", \n",
" \"BPQ\", \"BPY\", \"BDQ3\", \"SXZB23\", \"SXZ\", \"xzwb2\", \"bpz17\" \n",
" ]\n",
" \n",
" # 构建正则表达式模式,匹配大小写不敏感且前面可能带有.\n",
" # 去掉 \\b 以允许字段是其他字符串的一部分\n",
" pattern = r'(?:\\.?)(' + '|'.join(re.escape(field) for field in fields) + r')'\n",
" \n",
" # 使用 re.IGNORECASE 标志来忽略大小写\n",
" if re.search(pattern, input_str, re.IGNORECASE):\n",
" return True\n",
" else:\n",
" return False\n",
"\n",
"def word_query(input_str, target_word=\"锁\"):\n",
" pattern = rf\"{target_word}\" \n",
" return bool(re.search(pattern, input_str)) "
]
},
{
"cell_type": "code",
"execution_count": 84,
"id": "c836c925-e143-4c49-8366-da265fe70dd9",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 94/94 [14:30<00:00, 9.26s/it]\n"
]
}
],
"source": [
"from tqdm import tqdm\n",
"\n",
"id_info1 = []\n",
"for i in tqdm(result_list[350:]):\n",
" if detect_fields(i) or word_query(i):\n",
" id_info1.append(Chain2.invoke({\"query\":i})) \n",
" else:\n",
" id_info1.append(Chain1.invoke({\"query\":i})) \n",
" "
]
},
{
"cell_type": "code",
"execution_count": 86,
"id": "dfdbdb26-0c81-481a-8694-ca7339781ff5",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(94, 94)"
]
},
"execution_count": 86,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import re\n",
"\n",
"# 正则表达式匹配 \"一级意图\" 和 \"二级意图\" 的值\n",
"pattern = r'\"(一级意图|二级意图)\"\\s*:\\s*\"([^\"]+)\"'\n",
"\n",
"# 初始化两个列表\n",
"list1 = []\n",
"list2 = []\n",
"\n",
"for i in id_info1:\n",
" matches = re.findall(pattern, i)\n",
"\n",
" for key, value in matches:\n",
" if key == \"一级意图\":\n",
" list1.append(value)\n",
" elif key == \"二级意图\":\n",
" list2.append(value)\n",
"\n",
"len(list1), len(list2)"
]
},
{
"cell_type": "code",
"execution_count": 80,
"id": "5ecc2d23-bb2c-4d40-bb8b-9e0e6f7f88e3",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'我在取费标修改完 报表输出没有变化'"
]
},
"execution_count": 80,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"result_list[349]"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "0054ca5b-0464-462e-be10-43a419d401af",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 48,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"detect_fields(result_list[332]) "
]
},
{
"cell_type": "code",
"execution_count": 68,
"id": "d8dae57a-3303-49d8-b7bb-396c83449bde",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"```json\n",
"{\n",
" \"操作指南\": \"操作指南\",\n",
" \"二级意图\": \"下载安装注册\",\n",
" \"software\": \"bphq18\",\n",
" \"functionality\": \"未知\",\n",
" \"issueType\": \"咨询费用包含内容\",\n",
" \"specifics\": \"后缀是什么软件\",\n",
" \"version\": \"未知\",\n",
" \"standard\": \"未知\",\n",
" \"context\": \"未知\"\n",
"}\n",
"```\n"
]
}
],
"source": [
"print(id_info1[5])"
]
},
{
"cell_type": "code",
"execution_count": 88,
"id": "2a1767a1-0578-4645-ad2b-032ae22073fc",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"文件已保存:list1.txt 和 list2.txt\n"
]
}
],
"source": [
"# 保存列表数据到 txt 文件\n",
"def save_list_to_txt(filename, data_list):\n",
" with open(filename, \"w\", encoding=\"utf-8\") as f:\n",
" f.write(\"\\n\".join(data_list)) # 每个元素换行\n",
"\n",
"# 保存到 txt 文件\n",
"save_list_to_txt(\"list1.txt\", list1)\n",
"save_list_to_txt(\"list2.txt\", list2)\n",
"\n",
"print(\"文件已保存:list1.txt 和 list2.txt\")"
]
},
{
"cell_type": "code",
"execution_count": 76,
"id": "d93f91c7-0116-4408-99bd-f8a06ccfc50d",
"metadata": {},
"outputs": [],
"source": [
"def save_list_to_txt(filename, data_list):\n",
" with open(filename, \"w\", encoding=\"utf-8\") as f:\n",
" f.write(\"\\n\".join(data_list)) # 每个元素换行\n",
"\n",
"save_list_to_txt(\"ceshi100.txt\", id_info1)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5e06817a-18d1-451d-99a0-a9be3df1a46e",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "dify_lab",
"language": "python",
"name": "dify_lab"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.9"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
+19
View File
@@ -0,0 +1,19 @@
[tool.poetry]
name = "difyAnalysis"
version = "0.1.0"
description = ""
authors = ["ouyangyouzhang <ouyangyouzhang@booway.com.cn>"]
readme = "README.md"
[tool.poetry.dependencies]
python = "^3.11"
[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
[[tool.poetry.source]]
name = "ali-mirrors"
url = "http://mirrors.aliyun.com/pypi/simple/"
priority = "primary"
Binary file not shown.
+777
View File
@@ -0,0 +1,777 @@
{
"报表": [
"表单",
"统计表",
"文档"
],
"导出": [
"生成",
"下载",
"输出"
],
"导入": [
"上传",
"读取",
"载入"
],
"批量": [
"整体",
"一次性",
"多选"
],
"打印": [
"生成PDF",
"输出纸质",
"复印"
],
"设置": [
"配置",
"调整",
"修改"
],
"物料": [
"材料",
"设备",
"主材"
],
"工程": [
"造价工程",
"项目",
"施工"
],
"计算": [
"评估",
"统计",
"运算"
],
"调整": [
"变更",
"修正",
"修改"
],
"市场价": [
"成本价",
"售价",
"物价"
],
"费率": [
"收费比例",
"税率",
"计费标准"
],
"设备": [
"机器",
"器材",
"装置"
],
"材料": [
"物料",
"资源",
"原料"
],
"定额": [
"基准",
"标准",
"规范"
],
"汇总": [
"合计",
"统计",
"计算总额"
],
"合并": [
"整合",
"归类",
"统一"
],
"模板": [
"标准格式",
"格式",
"样式"
],
"查询": [
"检索",
"搜索",
"查找"
],
"删除": [
"移除",
"去除",
"清除"
],
"添加": [
"补充",
"插入",
"录入"
],
"修改": [
"变更",
"更改",
"调整"
],
"刷新": [
"更新",
"重置",
"同步"
],
"备份": [
"存档",
"存储",
"复制"
],
"同步": [
"更新",
"一致",
"对齐"
],
"运距": [
"运输距离",
"运送范围",
"物流范围"
],
"损耗": [
"消耗",
"损失",
"折损"
],
"增值税": [
"VAT",
"税负",
"税款"
],
"人工": [
"人工费",
"劳务",
"人工成本"
],
"卸车": [
"转运",
"装卸",
"搬运"
],
"保管": [
"存储",
"存放",
"储存"
],
"运输": [
"物流",
"运送",
"配送"
],
"计算式": [
"计算方法",
"公式",
"计算模型"
],
"金额": [
"费用",
"支出",
"成本"
],
"编号": [
"ID",
"标识",
"代码"
],
"价差": [
"差价",
"成本差",
"价格差"
],
"审核": [
"审查",
"检查",
"核对"
],
"管理": [
"运维",
"控制",
"维护"
],
"匹配": [
"对接",
"关联",
"适配"
],
"转换": [
"变换",
"转化",
"换算"
],
"参数": [
"变量",
"设定值",
"调节值"
],
"供货方": [
"提供商",
"销售方",
"供应商"
],
"页面": [
"窗口",
"界面",
"显示页"
],
"页码": [
"页数",
"页面编号",
"序号"
],
"字体": [
"字形",
"排版",
"文字样式"
],
"页眉": [
"标题栏",
"表头",
"上部标识"
],
"页脚": [
"表尾",
"底部信息",
"下部标识"
],
"汇总级别": [
"合计层次",
"归类等级",
"统计层级"
],
"配网": [
"电网",
"配电网络",
"供电系统"
],
"定额调整": [
"标准修改",
"费用修正",
"基准调整"
],
"市场价系数": [
"成本系数",
"价格调整",
"定价指数"
],
"文件": [
"文档",
"数据",
"资料"
],
"表单": [
"统计表",
"报表",
"登记表"
],
"信息": [
"数据",
"资料",
"消息"
],
"工程量": [
"施工量",
"工作量",
"项目量"
],
"费用": [
"成本",
"支出",
"开销"
],
"项目": [
"方案",
"工程",
"任务"
],
"施工": [
"作业",
"操作",
"建设"
],
"设备类别": [
"器材种类",
"装置类型",
"设备分类"
],
"组合件": [
"组件",
"组合部件",
"拼装部件"
],
"生成": [
"制作",
"创建",
"生产"
],
"统计": [
"计算",
"分析",
"汇总"
],
"公路": [
"道路",
"高速",
"国道"
],
"铁路": [
"铁道",
"铁路线",
"轨道"
],
"运杂费": [
"运输杂费",
"物流费用",
"配送费用"
],
"更新": [
"刷新",
"升级",
"同步"
],
"格式": [
"结构",
"模版",
"样式"
],
"显示": [
"展示",
"呈现",
"展现"
],
"隐藏": [
"隐藏",
"遮盖",
"不显示"
],
"分类": [
"类别",
"归类",
"分组"
],
"线路": [
"输电线路",
"电缆",
"电力线路"
],
"修改记录": [
"日志",
"变更记录",
"历史记录"
],
"主材": [
"核心物料",
"主要材料",
"基础材料"
],
"变更": [
"调整",
"改动",
"修改"
],
"经济作物": [
"经济植物",
"农作物",
"高价值作物"
],
"增值": [
"税收",
"附加值",
"价值提升"
],
"造价": [
"费用评估",
"成本估算",
"价格计算"
],
"超高": [
"超限",
"高度超标",
"额外高度"
],
"电力": [
"供电",
"能源",
"电气"
],
"电缆": [
"电线",
"电力线",
"输电线"
],
"跨越": [
"横跨",
"超越",
"跨界"
],
"社会保险": [
"社会福利",
"保险费",
"社保"
],
"住房公积金": [
"公积金",
"房贷基金",
"住房补贴"
],
"费率调整": [
"计费变更",
"价格调整",
"费用修改"
],
"密码": [
"访问码",
"口令",
"解锁码"
],
"解锁": [
"解除锁定",
"开锁",
"解封"
],
"软件": [
"平台",
"程序",
"系统"
],
"升级": [
"版本提升",
"更新",
"优化"
],
"许可证": [
"许可文件",
"授权",
"认证"
],
"清单": [
"项目信息",
"目录",
"明细"
],
"项目划分": [
"项目分类",
"工程分组",
"层级划分"
],
"技改": [
"升级改造",
"改造工程",
"技术改造"
],
"检修": [
"保养",
"维修",
"维护"
],
"迁改": [
"调整",
"改动",
"搬迁改造"
],
"预规": [
"预估",
"规划",
"预计"
],
"估算": [
"成本预测",
"费用测算",
"价格评估"
],
"取费": [
"费用标准",
"取费标准",
"费用计算"
],
"贷款利息": [
"借款利息",
"融资利息",
"建设期利息"
],
"模板参数": [
"设置参数",
"系统参数",
"配置参数"
],
"统计分析": [
"报表统计",
"数据分析",
"计算分析"
],
"调差": [
"价格调整",
"市场调整",
"费用变动"
],
"费用模板": [
"价格模板",
"费用标准",
"成本模版"
],
"工地运输": [
"施工运输",
"现场运输",
"工地物流"
],
"物料库": [
"设备库",
"材料数据库",
"物资存储"
],
"资源管理": [
"库存管理",
"物料控制",
"材料管理"
],
"调试费用": [
"系统测试费",
"试运行费",
"安装调试费"
],
"钢筋量": [
"钢筋工程量",
"钢筋用量",
"钢筋计算"
],
"五金计算": [
"五金费计算",
"五金材料核算",
"五金成本测算"
],
"迁改费用": [
"改造费用",
"调整费",
"搬迁成本"
],
"工程加密": [
"项目安全",
"工程保护",
"数据加密"
],
"工程审核": [
"费用审核",
"项目审查",
"施工审核"
],
"应急措施费": [
"突发事件费用",
"应急预备费",
"安全防范费"
],
"设备配送费": [
"设备运输费",
"器材配送费",
"设备物流成本"
],
"安全文明施工费": [
"施工安全费",
"文明施工成本",
"安全管理费"
],
"储能": [
"电池储能",
"储能系统",
"能源存储"
],
"锂电池": [
"储能电池",
"锂离子电池",
"蓄电池"
],
"储能电站": [
"电能存储站",
"储能设施",
"能源站"
],
"技术经济": [
"技经",
"技术参数",
"经济测算"
],
"工程调差": [
"成本变更",
"市场价修正",
"费用调整"
],
"电池储能": [
"蓄电池系统",
"电力存储",
"能量存储"
],
"混凝土": [
"水泥混凝土",
"施工混凝土",
"建筑混凝土"
],
"清水混凝土": [
"清洁混凝土",
"裸露混凝土",
"光面混凝土"
],
"泵车浇制": [
"机械浇筑",
"泵送浇筑",
"混凝土浇筑"
],
"消耗量": [
"用量",
"消耗数据",
"物料使用量"
],
"工程精度": [
"项目精细度",
"预算精度",
"施工精确度"
],
"勘察费": [
"地勘费用",
"调查费用",
"测绘费"
],
"设计费": [
"工程设计费用",
"规划设计费",
"技术设计成本"
],
"服务业增值税率": [
"行业税率",
"商业税率",
"企业增值税"
],
"其他费用": [
"杂项费用",
"额外开销",
"附加费用"
],
"特殊地区增加费": [
"区域加价",
"地理附加费",
"特区费用"
],
"取费表": [
"预算费用表",
"费用表",
"成本清单"
],
"计算预览": [
"成本预估",
"费用预览",
"估算检查"
],
"备份管理": [
"文件存档",
"历史记录",
"数据备份"
],
"软件申请": [
"使用许可",
"试用申请",
"授权申请"
],
"储能计价": [
"储能造价",
"储能预算",
"能源估算"
],
"信息价库": [
"市场价格库",
"报价数据库",
"定价库"
],
"组合件库": [
"工程部件库",
"组合结构库",
"组件库"
],
"主材设备互转": [
"材料调拨",
"主材设备转换",
"物资互换"
],
"输电线路": [
"线路建设",
"电力输送",
"输电工程"
],
"施工总承包": [
"工程承建",
"项目承包",
"工程总包"
],
"预算": [
"造价计算",
"成本估算",
"费用测算"
],
"投标报价": [
"投标成本",
"竞标价",
"标书报价"
],
"工程费用": [
"成本支出",
"建设费用",
"项目开支"
],
"拆除工程": [
"清理作业",
"旧建筑拆除",
"设备拆除"
],
"固定综合单价": [
"恒定单价",
"统一单价",
"综合单价锁定"
],
"全费用综合单价": [
"综合总价",
"全包单价",
"所有成本单价"
],
"行业清单工程": [
"行业工程量清单",
"行业定额清单",
"行业标准清单"
],
"建筑工程量清单": [
"工程施工清单",
"建设清单",
"施工清单"
],
"计价规范": [
"工程计价规范",
"造价标准",
"施工计价标准"
],
"国网2023规范": [
"2023版计价规则",
"国家电网规范",
"电力造价规范"
],
"土质比例": [
"土壤配比",
"地质成分",
"土壤类型"
],
"监造费": [
"采购监管费",
"设备监理费",
"质量监控费用"
],
"合同价": [
"合同总价",
"项目合同金额",
"签约价格"
],
"结算价": [
"决算价格",
"最终结算金额",
"结算款项"
],
"招标控制价": [
"竞标上限",
"投标最高限价",
"预算控制价"
]
}
+21
View File
@@ -0,0 +1,21 @@
import os
path = "C:/Users/oyyz/Desktop/新建文件夹"
total_chars = 0
# 遍历目录下的所有文件
for filename in os.listdir(path):
# 只处理txt文件
if filename.endswith('.txt'):
file_path = os.path.join(path, filename)
try:
with open(file_path, 'r', encoding='utf-8') as f:
content = f.read()
chars = len(content)
total_chars += chars
print(f"{filename}: {chars} 个字符")
except Exception as e:
print(f"读取文件 {filename} 时出错: {e}")
print(f"\n所有txt文件共包含 {total_chars} 个字符")
+33
View File
@@ -0,0 +1,33 @@
# UTF-8
#
# For the syntax of this file, see:
# https://pyinstaller.readthedocs.io/en/stable/spec-files.html#spec-file-version-info
VSVersionInfo(
ffi=FixedFileInfo(
filevers=(1, 0, 0, 1),
prodvers=(1, 0, 0, 1),
mask=0x3f,
flags=0x0,
OS=0x40004,
fileType=0x1,
subtype=0x0,
date=(0, 0)
),
kids=[
StringFileInfo(
[
StringTable(
'040904B0',
[StringStruct('CompanyName', '江西博微新技术有限公司'),
StringStruct('FileDescription', 'Analysis'),
StringStruct('FileVersion', '1.0.0.1'),
StringStruct('InternalName', 'Analysis'),
StringStruct('LegalCopyright', '© 江西博微新技术有限公司. All rights reserved.'),
StringStruct('OriginalFilename', 'Analysis.exe'),
StringStruct('ProductName', 'Analysis'),
StringStruct('ProductVersion', '1.0.0.1')])
]),
VarFileInfo([VarStruct('Translation', [1033, 1200])])
]
)
+4
View File
@@ -0,0 +1,4 @@
{
"url":"http://10.1.0.145:8090/graphql",
"Authorization":"Bearer eyJhbGciOiJSUzI1NiIsInR5cCI6IkpXVCJ9.eyJhcGkiOjEsImdycCI6MSwiaWF0IjoxNzIzNjMxMjcwLCJleHAiOjE4MTgzMDQwNzAsImF1ZCI6InVybjp3aWtpLmpzIiwiaXNzIjoidXJuOndpa2kuanMifQ.g5H1xVMtk7Q3uvrRdtD3aTm49dQkS11cYdDKIwXo7DthOOTGj9DmFO7yILNDU7XFACTZc1Ej6ryguYV_8vGqoc-Rc7LciwvqS_RHDYUKZNKENbv8df9UGDMB-F9DT_airGc1lGJXgVqypxejDL3fY8aRMGXm7GBIlZKY4JTeI2uJZxffgfqKGrOvc3EOtsGgJzKZo4OyQ8UInGtCTiuq6-mLj_Syix_1z52K1tgfnF4E4-rZH_zCD05hUlUMYUV-KWhPkeOEGR5xbRTrulfCvzDD4T0CX4pI-keSKmgVn1HYSSN4o1Tj_l9zsyhUoLRzhzPK29Q3uekIc9obrvCHrg"
}