首次提交:上传本地文件夹

This commit is contained in:
ruxia
2025-03-31 17:28:23 +08:00
commit 0de349447c
439 changed files with 36643 additions and 0 deletions
+280
View File
@@ -0,0 +1,280 @@
import pandas as pd
from neo4j import GraphDatabase
import networkx as nx
from pyvis.network import Network
import networkx as nx
URI = "bolt://10.1.6.34:7687"
AUTH = ("neo4j", "password")
def create_knowledge_graph(excel_file):
df = pd.read_excel(excel_file, engine="openpyxl")
driver = GraphDatabase.driver(URI, auth=AUTH)
def clear_database(tx):
"""清空Neo4j数据库中的所有节点和关系"""
tx.run("MATCH (n) DETACH DELETE n")
print("数据库已清空!")
def add_node(tx, label, name, properties=None, parent_path=""):
"""添加节点,使用父路径+名称作为唯一标识"""
if properties is None:
properties = {}
# 添加显示名称
properties["display_name"] = name
# 保存原始名称
properties["original_name"] = name
# 保存父路径信息
if parent_path:
properties["parent_path"] = parent_path
# 将标签也作为节点的属性存储
properties["node_type"] = label
# 创建唯一标识符:父路径+名称
unique_id = f"{parent_path}|{name}" if parent_path else name
# 使用唯一标识符作为name属性
query = f"MERGE (n:{label} {{name: $unique_id}}) SET n += $properties"
tx.run(query, unique_id=unique_id, properties=properties)
return unique_id
def add_relationship(tx, start_label, start_name, end_label, end_name, rel_type="包含"):
"""添加关系,使用唯一标识符"""
query = (
f"MATCH (a:{start_label} {{name: $start_name}}), "
f"(b:{end_label} {{name: $end_name}}) "
f"MERGE (a)-[r:{rel_type}]->(b)"
)
tx.run(query, start_name=start_name, end_name=end_name)
try:
with driver.session() as session:
# 清空数据库
session.write_transaction(clear_database)
# 验证数据库是否已清空
def verify_empty_database(tx):
result = tx.run("MATCH (n) RETURN count(n) as count")
count = result.single()["count"]
print(f"验证结果: 数据库中剩余节点数量 = {count}")
return count
node_count = session.write_transaction(verify_empty_database)
if node_count == 0:
print("数据库清空成功!")
else:
print(f"警告: 数据库清空不完全,仍有 {node_count} 个节点!")
# 创建根节点:配网D3软件
root_name = session.write_transaction(add_node, "软件", "配网D3软件", {})
# 批量处理数据
batch_size = 100 # 可以根据数据量调整批次大小
for i in range(0, len(df), batch_size):
batch_df = df.iloc[i:i+batch_size]
def process_batch(tx):
for idx, row in batch_df.iterrows():
# 提取数据并确保是字符串类型
module_1 = str(row.get("一级模块", "")) if not pd.isna(row.get("一级模块", "")) else ""
module_2 = str(row.get("二级模块", "")) if not pd.isna(row.get("二级模块", "")) else ""
module_3 = str(row.get("三级模块", "")) if not pd.isna(row.get("三级模块", "")) else ""
module_4 = str(row.get("四级模块", "")) if not pd.isna(row.get("四级模块", "")) else ""
function_name = str(row.get("功能名称", "")) if not pd.isna(row.get("功能名称", "")) else ""
description = str(row.get("功能说明", "")) if not pd.isna(row.get("功能说明", "")) else ""
# 现在可以安全地调用strip()
module_1 = module_1.strip()
module_2 = module_2.strip()
module_3 = module_3.strip()
module_4 = module_4.strip()
function_name = function_name.strip()
description = description.strip()
# 记录最后一个非空模块的标签和名称,用于连接功能名称节点
last_module_label = "软件"
last_module_name = root_name
# 构建路径,用于创建唯一标识符
path = "配网D3软件"
# 添加页面节点(一级模块)
if module_1:
module_1_name = add_node(tx, "页面", module_1, {}, path)
add_relationship(tx, last_module_label, last_module_name, "页面", module_1_name)
last_module_label = "页面"
last_module_name = module_1_name
path = f"{path}|{module_1}"
# 添加页面节点(二级模块)
if module_2:
module_2_name = add_node(tx, "TAB控件", module_2, {}, path)
add_relationship(tx, last_module_label, last_module_name, "TAB控件", module_2_name)
last_module_label = "TAB控件"
last_module_name = module_2_name
path = f"{path}|{module_2}"
# 添加TAB控件节点(三级模块)
if module_3:
module_3_name = add_node(tx, "分组控件", module_3, {}, path)
add_relationship(tx, last_module_label, last_module_name, "分组控件", module_3_name)
last_module_label = "分组控件"
last_module_name = module_3_name
path = f"{path}|{module_3}"
# 添加分组控件节点(四级模块)
if module_4:
module_4_name = add_node(tx, "属性控件", module_4, {}, path)
add_relationship(tx, last_module_label, last_module_name, "属性控件", module_4_name)
last_module_label = "属性控件"
last_module_name = module_4_name
path = f"{path}|{module_4}"
# 添加功能名称节点 - 使用路径确保唯一性
if function_name:
function_name_unique = add_node(
tx, "功能名称", function_name, {"描述": description}, path
)
add_relationship(tx, last_module_label, last_module_name, "功能名称", function_name_unique)
session.write_transaction(process_batch)
print(f"已处理 {min(i+batch_size, len(df))}/{len(df)} 条记录")
print("知识图谱构建完成!")
except Exception as e:
print(f"构建知识图谱时发生错误: {e}")
finally:
driver.close()
def export_graph_to_html(output_file="knowledge_graph.html", limit=1000):
"""
将Neo4j中的知识图谱导出为交互式HTML文件
参数:
output_file: 输出的HTML文件路径
limit: 限制节点数量,防止图过大导致浏览器卡顿
返回:
bool: 是否成功导出
"""
try:
# 连接Neo4j数据库
driver = GraphDatabase.driver(URI, auth=AUTH)
# 创建一个NetworkX图
G = nx.DiGraph()
with driver.session() as session:
# 获取所有节点 - 避免使用已弃用的id()函数
nodes_result = session.run(
f"MATCH (n) RETURN elementId(n) as id, labels(n) as labels, n.name as name, n.display_name as display_name, n.original_name as original_name LIMIT {limit}"
)
# 节点颜色映射
color_map = {
"软件": "#FF5733",
"页面": "#33FF57",
"页面控件": "#57FF33",
"TAB控件": "#3357FF",
"分组控件": "#FF33A8",
"属性控件": "#33FFF5",
"功能名称": "#F5FF33"
}
# 添加节点到图中
node_ids = [] # 存储所有节点ID用于后续查询
for record in nodes_result:
node_id = record["id"]
node_ids.append(node_id)
node_label = record["labels"][0] if record["labels"] else "Unknown"
node_name = record["original_name"] or record["name"] # 优先使用原始名称
display_name = record["display_name"] if record["display_name"] else node_name
# 添加到NetworkX图
G.add_node(
node_id,
label=display_name,
title=f"{node_label}: {display_name}",
color=color_map.get(node_label, "#CCCCCC")
)
# 获取所有关系 - 修复查询语法,不再使用未定义的path变量
if node_ids:
edges_result = session.run(
f"""
MATCH (a)-[r]->(b)
WHERE elementId(a) IN $node_ids AND elementId(b) IN $node_ids
RETURN elementId(a) as source, elementId(b) as target, type(r) as type
LIMIT {limit}
""",
node_ids=node_ids
)
# 添加边到图中
for record in edges_result:
source = record["source"]
target = record["target"]
rel_type = record["type"]
# 添加到NetworkX图
G.add_edge(source, target, title=rel_type)
# 创建Pyvis网络图
net = Network(height="800px", width="100%", directed=True, notebook=False)
# 从NetworkX图转换
net.from_nx(G)
# 设置物理布局选项
net.set_options("""
{
"physics": {
"forceAtlas2Based": {
"gravitationalConstant": -50,
"centralGravity": 0.01,
"springLength": 100,
"springConstant": 0.1
},
"maxVelocity": 50,
"solver": "forceAtlas2Based",
"timestep": 0.35,
"stabilization": {
"enabled": true,
"iterations": 1000
}
},
"interaction": {
"navigationButtons": true,
"keyboard": true
}
}
""")
# 保存为HTML文件
net.save_graph(output_file)
print(f"知识图谱已成功导出为HTML文件: {output_file}")
return True
except Exception as e:
print(f"导出知识图谱时发生错误: {e}")
return False
finally:
if 'driver' in locals():
driver.close()
# 创建知识图谱
create_knowledge_graph("E:\\文件\\LLM_model\\RAG\\code\\GraphRAG\\data\\博微配网工程计价通D3软件产品功能清单.xlsx")
# 导出为HTML文件
# export_graph_to_html("配网D3软件知识图谱.html")