import pandas as pd from neo4j import GraphDatabase import networkx as nx from pyvis.network import Network import networkx as nx URI = "bolt://10.1.6.34:7687" AUTH = ("neo4j", "password") def create_knowledge_graph(excel_file): df = pd.read_excel(excel_file, engine="openpyxl") driver = GraphDatabase.driver(URI, auth=AUTH) def clear_database(tx): """清空Neo4j数据库中的所有节点和关系""" tx.run("MATCH (n) DETACH DELETE n") print("数据库已清空!") def add_node(tx, label, name, properties=None, parent_path=""): """添加节点,使用父路径+名称作为唯一标识""" if properties is None: properties = {} # 添加显示名称 properties["display_name"] = name # 保存原始名称 properties["original_name"] = name # 保存父路径信息 if parent_path: properties["parent_path"] = parent_path # 将标签也作为节点的属性存储 properties["node_type"] = label # 创建唯一标识符:父路径+名称 unique_id = f"{parent_path}|{name}" if parent_path else name # 使用唯一标识符作为name属性 query = f"MERGE (n:{label} {{name: $unique_id}}) SET n += $properties" tx.run(query, unique_id=unique_id, properties=properties) return unique_id def add_relationship(tx, start_label, start_name, end_label, end_name, rel_type="包含"): """添加关系,使用唯一标识符""" query = ( f"MATCH (a:{start_label} {{name: $start_name}}), " f"(b:{end_label} {{name: $end_name}}) " f"MERGE (a)-[r:{rel_type}]->(b)" ) tx.run(query, start_name=start_name, end_name=end_name) try: with driver.session() as session: # 清空数据库 session.write_transaction(clear_database) # 验证数据库是否已清空 def verify_empty_database(tx): result = tx.run("MATCH (n) RETURN count(n) as count") count = result.single()["count"] print(f"验证结果: 数据库中剩余节点数量 = {count}") return count node_count = session.write_transaction(verify_empty_database) if node_count == 0: print("数据库清空成功!") else: print(f"警告: 数据库清空不完全,仍有 {node_count} 个节点!") # 创建根节点:配网D3软件 root_name = session.write_transaction(add_node, "软件", "配网D3软件", {}) # 批量处理数据 batch_size = 100 # 可以根据数据量调整批次大小 for i in range(0, len(df), batch_size): batch_df = df.iloc[i:i+batch_size] def process_batch(tx): for idx, row in batch_df.iterrows(): # 提取数据并确保是字符串类型 module_1 = str(row.get("一级模块", "")) if not pd.isna(row.get("一级模块", "")) else "" module_2 = str(row.get("二级模块", "")) if not pd.isna(row.get("二级模块", "")) else "" module_3 = str(row.get("三级模块", "")) if not pd.isna(row.get("三级模块", "")) else "" module_4 = str(row.get("四级模块", "")) if not pd.isna(row.get("四级模块", "")) else "" function_name = str(row.get("功能名称", "")) if not pd.isna(row.get("功能名称", "")) else "" description = str(row.get("功能说明", "")) if not pd.isna(row.get("功能说明", "")) else "" # 现在可以安全地调用strip() module_1 = module_1.strip() module_2 = module_2.strip() module_3 = module_3.strip() module_4 = module_4.strip() function_name = function_name.strip() description = description.strip() # 记录最后一个非空模块的标签和名称,用于连接功能名称节点 last_module_label = "软件" last_module_name = root_name # 构建路径,用于创建唯一标识符 path = "配网D3软件" # 添加页面节点(一级模块) if module_1: module_1_name = add_node(tx, "页面", module_1, {}, path) add_relationship(tx, last_module_label, last_module_name, "页面", module_1_name) last_module_label = "页面" last_module_name = module_1_name path = f"{path}|{module_1}" # 添加页面节点(二级模块) if module_2: module_2_name = add_node(tx, "TAB控件", module_2, {}, path) add_relationship(tx, last_module_label, last_module_name, "TAB控件", module_2_name) last_module_label = "TAB控件" last_module_name = module_2_name path = f"{path}|{module_2}" # 添加TAB控件节点(三级模块) if module_3: module_3_name = add_node(tx, "分组控件", module_3, {}, path) add_relationship(tx, last_module_label, last_module_name, "分组控件", module_3_name) last_module_label = "分组控件" last_module_name = module_3_name path = f"{path}|{module_3}" # 添加分组控件节点(四级模块) if module_4: module_4_name = add_node(tx, "属性控件", module_4, {}, path) add_relationship(tx, last_module_label, last_module_name, "属性控件", module_4_name) last_module_label = "属性控件" last_module_name = module_4_name path = f"{path}|{module_4}" # 添加功能名称节点 - 使用路径确保唯一性 if function_name: function_name_unique = add_node( tx, "功能名称", function_name, {"描述": description}, path ) add_relationship(tx, last_module_label, last_module_name, "功能名称", function_name_unique) session.write_transaction(process_batch) print(f"已处理 {min(i+batch_size, len(df))}/{len(df)} 条记录") print("知识图谱构建完成!") except Exception as e: print(f"构建知识图谱时发生错误: {e}") finally: driver.close() def export_graph_to_html(output_file="knowledge_graph.html", limit=1000): """ 将Neo4j中的知识图谱导出为交互式HTML文件 参数: output_file: 输出的HTML文件路径 limit: 限制节点数量,防止图过大导致浏览器卡顿 返回: bool: 是否成功导出 """ try: # 连接Neo4j数据库 driver = GraphDatabase.driver(URI, auth=AUTH) # 创建一个NetworkX图 G = nx.DiGraph() with driver.session() as session: # 获取所有节点 - 避免使用已弃用的id()函数 nodes_result = session.run( f"MATCH (n) RETURN elementId(n) as id, labels(n) as labels, n.name as name, n.display_name as display_name, n.original_name as original_name LIMIT {limit}" ) # 节点颜色映射 color_map = { "软件": "#FF5733", "页面": "#33FF57", "页面控件": "#57FF33", "TAB控件": "#3357FF", "分组控件": "#FF33A8", "属性控件": "#33FFF5", "功能名称": "#F5FF33" } # 添加节点到图中 node_ids = [] # 存储所有节点ID用于后续查询 for record in nodes_result: node_id = record["id"] node_ids.append(node_id) node_label = record["labels"][0] if record["labels"] else "Unknown" node_name = record["original_name"] or record["name"] # 优先使用原始名称 display_name = record["display_name"] if record["display_name"] else node_name # 添加到NetworkX图 G.add_node( node_id, label=display_name, title=f"{node_label}: {display_name}", color=color_map.get(node_label, "#CCCCCC") ) # 获取所有关系 - 修复查询语法,不再使用未定义的path变量 if node_ids: edges_result = session.run( f""" MATCH (a)-[r]->(b) WHERE elementId(a) IN $node_ids AND elementId(b) IN $node_ids RETURN elementId(a) as source, elementId(b) as target, type(r) as type LIMIT {limit} """, node_ids=node_ids ) # 添加边到图中 for record in edges_result: source = record["source"] target = record["target"] rel_type = record["type"] # 添加到NetworkX图 G.add_edge(source, target, title=rel_type) # 创建Pyvis网络图 net = Network(height="800px", width="100%", directed=True, notebook=False) # 从NetworkX图转换 net.from_nx(G) # 设置物理布局选项 net.set_options(""" { "physics": { "forceAtlas2Based": { "gravitationalConstant": -50, "centralGravity": 0.01, "springLength": 100, "springConstant": 0.1 }, "maxVelocity": 50, "solver": "forceAtlas2Based", "timestep": 0.35, "stabilization": { "enabled": true, "iterations": 1000 } }, "interaction": { "navigationButtons": true, "keyboard": true } } """) # 保存为HTML文件 net.save_graph(output_file) print(f"知识图谱已成功导出为HTML文件: {output_file}") return True except Exception as e: print(f"导出知识图谱时发生错误: {e}") return False finally: if 'driver' in locals(): driver.close() # 创建知识图谱 create_knowledge_graph("E:\\文件\\LLM_model\\RAG\\code\\GraphRAG\\data\\博微配网工程计价通D3软件产品功能清单.xlsx") # 导出为HTML文件 # export_graph_to_html("配网D3软件知识图谱.html")