首次提交：上传本地文件夹

2025-03-31 17:28:23 +08:00
commit 0de349447c
439 changed files with 36643 additions and 0 deletions
@@ -0,0 +1,280 @@
+import pandas as pd
+from neo4j import GraphDatabase
+import networkx as nx
+from pyvis.network import Network
+import networkx as nx
+
+
+
+URI = "bolt://10.1.6.34:7687"
+AUTH = ("neo4j", "password")
+
+def create_knowledge_graph(excel_file):
+
+    df = pd.read_excel(excel_file, engine="openpyxl")
+
+    driver = GraphDatabase.driver(URI, auth=AUTH)
+
+    def clear_database(tx):
+        """清空Neo4j数据库中的所有节点和关系"""
+        tx.run("MATCH (n) DETACH DELETE n")
+        print("数据库已清空！")
+
+    def add_node(tx, label, name, properties=None, parent_path=""):
+        """添加节点，使用父路径+名称作为唯一标识"""
+        if properties is None:
+            properties = {}
+
+        # 添加显示名称
+        properties["display_name"] = name
+        
+        # 保存原始名称
+        properties["original_name"] = name
+        
+        # 保存父路径信息
+        if parent_path:
+            properties["parent_path"] = parent_path
+        
+        # 将标签也作为节点的属性存储
+        properties["node_type"] = label
+        
+        # 创建唯一标识符：父路径+名称
+        unique_id = f"{parent_path}|{name}" if parent_path else name
+        
+        # 使用唯一标识符作为name属性
+        query = f"MERGE (n:{label} {{name: $unique_id}}) SET n += $properties"
+        tx.run(query, unique_id=unique_id, properties=properties)
+
+        return unique_id
+
+    def add_relationship(tx, start_label, start_name, end_label, end_name, rel_type="包含"):
+        """添加关系，使用唯一标识符"""
+        query = (
+            f"MATCH (a:{start_label} {{name: $start_name}}), "
+            f"(b:{end_label} {{name: $end_name}}) "
+            f"MERGE (a)-[r:{rel_type}]->(b)"
+        )
+        tx.run(query, start_name=start_name, end_name=end_name)
+
+    try:
+        with driver.session() as session:
+            # 清空数据库
+            session.write_transaction(clear_database)
+            
+            # 验证数据库是否已清空
+            def verify_empty_database(tx):
+                result = tx.run("MATCH (n) RETURN count(n) as count")
+                count = result.single()["count"]
+                print(f"验证结果: 数据库中剩余节点数量 = {count}")
+                return count
+            
+            node_count = session.write_transaction(verify_empty_database)
+            if node_count == 0:
+                print("数据库清空成功!")
+            else:
+                print(f"警告: 数据库清空不完全，仍有 {node_count} 个节点!")
+
+            # 创建根节点：配网D3软件
+            root_name = session.write_transaction(add_node, "软件", "配网D3软件", {})
+
+            # 批量处理数据
+            batch_size = 100  # 可以根据数据量调整批次大小
+            for i in range(0, len(df), batch_size):
+                batch_df = df.iloc[i:i+batch_size]
+                
+                def process_batch(tx):
+                    for idx, row in batch_df.iterrows():
+                        # 提取数据并确保是字符串类型
+                        module_1 = str(row.get("一级模块", "")) if not pd.isna(row.get("一级模块", "")) else ""
+                        module_2 = str(row.get("二级模块", "")) if not pd.isna(row.get("二级模块", "")) else ""
+                        module_3 = str(row.get("三级模块", "")) if not pd.isna(row.get("三级模块", "")) else ""
+                        module_4 = str(row.get("四级模块", "")) if not pd.isna(row.get("四级模块", "")) else ""
+                        function_name = str(row.get("功能名称", "")) if not pd.isna(row.get("功能名称", "")) else ""
+                        description = str(row.get("功能说明", "")) if not pd.isna(row.get("功能说明", "")) else ""
+
+                        # 现在可以安全地调用strip()
+                        module_1 = module_1.strip()
+                        module_2 = module_2.strip()
+                        module_3 = module_3.strip()
+                        module_4 = module_4.strip()
+                        function_name = function_name.strip()
+                        description = description.strip()
+
+                        # 记录最后一个非空模块的标签和名称，用于连接功能名称节点
+                        last_module_label = "软件"
+                        last_module_name = root_name
+                        
+                        # 构建路径，用于创建唯一标识符
+                        path = "配网D3软件"
+
+                        # 添加页面节点（一级模块）
+                        if module_1:
+                            module_1_name = add_node(tx, "页面", module_1, {}, path)
+                            add_relationship(tx, last_module_label, last_module_name, "页面", module_1_name)
+                            last_module_label = "页面"
+                            last_module_name = module_1_name
+                            path = f"{path}|{module_1}"
+
+                        # 添加页面节点（二级模块）
+                        if module_2:
+                            module_2_name = add_node(tx, "TAB控件", module_2, {}, path)
+                            add_relationship(tx, last_module_label, last_module_name, "TAB控件", module_2_name)
+                            last_module_label = "TAB控件"
+                            last_module_name = module_2_name
+                            path = f"{path}|{module_2}"
+
+                        # 添加TAB控件节点（三级模块）
+                        if module_3:
+                            module_3_name = add_node(tx, "分组控件", module_3, {}, path)
+                            add_relationship(tx, last_module_label, last_module_name, "分组控件", module_3_name)
+                            last_module_label = "分组控件"
+                            last_module_name = module_3_name
+                            path = f"{path}|{module_3}"
+
+                        # 添加分组控件节点（四级模块）
+                        if module_4:
+                            module_4_name = add_node(tx, "属性控件", module_4, {}, path)
+                            add_relationship(tx, last_module_label, last_module_name, "属性控件", module_4_name)
+                            last_module_label = "属性控件"
+                            last_module_name = module_4_name
+                            path = f"{path}|{module_4}"
+
+                        # 添加功能名称节点 - 使用路径确保唯一性
+                        if function_name:
+                            function_name_unique = add_node(
+                                tx, "功能名称", function_name, {"描述": description}, path
+                            )
+                            add_relationship(tx, last_module_label, last_module_name, "功能名称", function_name_unique)
+                
+                session.write_transaction(process_batch)
+                print(f"已处理 {min(i+batch_size, len(df))}/{len(df)} 条记录")
+
+        print("知识图谱构建完成！")
+    except Exception as e:
+        print(f"构建知识图谱时发生错误: {e}")
+    finally:
+        driver.close()
+
+
+def export_graph_to_html(output_file="knowledge_graph.html", limit=1000):
+    """
+    将Neo4j中的知识图谱导出为交互式HTML文件
+    
+    参数:
+        output_file: 输出的HTML文件路径
+        limit: 限制节点数量，防止图过大导致浏览器卡顿
+    
+    返回:
+        bool: 是否成功导出
+    """
+    try:
+        # 连接Neo4j数据库
+        driver = GraphDatabase.driver(URI, auth=AUTH)
+        
+        # 创建一个NetworkX图
+        G = nx.DiGraph()
+        
+        with driver.session() as session:
+            # 获取所有节点 - 避免使用已弃用的id()函数
+            nodes_result = session.run(
+                f"MATCH (n) RETURN elementId(n) as id, labels(n) as labels, n.name as name, n.display_name as display_name, n.original_name as original_name LIMIT {limit}"
+            )
+            
+            # 节点颜色映射
+            color_map = {
+                "软件": "#FF5733",
+                "页面": "#33FF57",
+                "页面控件": "#57FF33",  
+                "TAB控件": "#3357FF",
+                "分组控件": "#FF33A8",
+                "属性控件": "#33FFF5",
+                "功能名称": "#F5FF33"
+            }
+            
+            # 添加节点到图中
+            node_ids = []  # 存储所有节点ID用于后续查询
+            for record in nodes_result:
+                node_id = record["id"]
+                node_ids.append(node_id)
+                node_label = record["labels"][0] if record["labels"] else "Unknown"
+                node_name = record["original_name"] or record["name"]  # 优先使用原始名称
+                display_name = record["display_name"] if record["display_name"] else node_name
+                
+                # 添加到NetworkX图
+                G.add_node(
+                    node_id, 
+                    label=display_name, 
+                    title=f"{node_label}: {display_name}", 
+                    color=color_map.get(node_label, "#CCCCCC")
+                )
+            
+            # 获取所有关系 - 修复查询语法，不再使用未定义的path变量
+            if node_ids:
+                edges_result = session.run(
+                    f"""
+                    MATCH (a)-[r]->(b) 
+                    WHERE elementId(a) IN $node_ids AND elementId(b) IN $node_ids
+                    RETURN elementId(a) as source, elementId(b) as target, type(r) as type 
+                    LIMIT {limit}
+                    """,
+                    node_ids=node_ids
+                )
+                
+                # 添加边到图中
+                for record in edges_result:
+                    source = record["source"]
+                    target = record["target"]
+                    rel_type = record["type"]
+                    
+                    # 添加到NetworkX图
+                    G.add_edge(source, target, title=rel_type)
+        
+        # 创建Pyvis网络图
+        net = Network(height="800px", width="100%", directed=True, notebook=False)
+        
+        # 从NetworkX图转换
+        net.from_nx(G)
+        
+        # 设置物理布局选项
+        net.set_options("""
+        {
+          "physics": {
+            "forceAtlas2Based": {
+              "gravitationalConstant": -50,
+              "centralGravity": 0.01,
+              "springLength": 100,
+              "springConstant": 0.1
+            },
+            "maxVelocity": 50,
+            "solver": "forceAtlas2Based",
+            "timestep": 0.35,
+            "stabilization": {
+              "enabled": true,
+              "iterations": 1000
+            }
+          },
+          "interaction": {
+            "navigationButtons": true,
+            "keyboard": true
+          }
+        }
+        """)
+        
+        # 保存为HTML文件
+        net.save_graph(output_file)
+        print(f"知识图谱已成功导出为HTML文件: {output_file}")
+        return True
+        
+    except Exception as e:
+        print(f"导出知识图谱时发生错误: {e}")
+        return False
+    finally:
+        if 'driver' in locals():
+            driver.close()
+
+
+# 创建知识图谱
+create_knowledge_graph("E:\\文件\\LLM_model\\RAG\\code\\GraphRAG\\data\\博微配网工程计价通D3软件产品功能清单.xlsx")
+
+# 导出为HTML文件
+# export_graph_to_html("配网D3软件知识图谱.html")