GraphRAG/graph/graph_neo.py

import pandas as pd
from neo4j import GraphDatabase
import networkx as nx
from pyvis.network import Network
import networkx as nx


URI = "bolt://10.1.6.34:7687"
AUTH = ("neo4j", "password")

def create_knowledge_graph(excel_file):

    df = pd.read_excel(excel_file, engine="openpyxl")

    driver = GraphDatabase.driver(URI, auth=AUTH)

    def clear_database(tx):
        """清空Neo4j数据库中的所有节点和关系"""
        tx.run("MATCH (n) DETACH DELETE n")
        print("数据库已清空！")

    def add_node(tx, label, name, properties=None, parent_path=""):
        """添加节点，使用父路径+名称作为唯一标识"""
        if properties is None:
            properties = {}

        # 添加显示名称
        properties["display_name"] = name

        # 保存原始名称
        properties["original_name"] = name

        # 保存父路径信息
        if parent_path:
            properties["parent_path"] = parent_path

        # 将标签也作为节点的属性存储
        properties["node_type"] = label

        # 创建唯一标识符：父路径+名称
        unique_id = f"{parent_path}|{name}" if parent_path else name

        # 使用唯一标识符作为name属性
        query = f"MERGE (n:{label} {{name: $unique_id}}) SET n += $properties"
        tx.run(query, unique_id=unique_id, properties=properties)

        return unique_id

    def add_relationship(tx, start_label, start_name, end_label, end_name, rel_type="包含"):
        """添加关系，使用唯一标识符"""
        query = (
            f"MATCH (a:{start_label} {{name: $start_name}}), "
            f"(b:{end_label} {{name: $end_name}}) "
            f"MERGE (a)-[r:{rel_type}]->(b)"
        )
        tx.run(query, start_name=start_name, end_name=end_name)

    try:
        with driver.session() as session:
            # 清空数据库
            session.write_transaction(clear_database)

            # 验证数据库是否已清空
            def verify_empty_database(tx):
                result = tx.run("MATCH (n) RETURN count(n) as count")
                count = result.single()["count"]
                print(f"验证结果: 数据库中剩余节点数量 = {count}")
                return count

            node_count = session.write_transaction(verify_empty_database)
            if node_count == 0:
                print("数据库清空成功!")
            else:
                print(f"警告: 数据库清空不完全，仍有 {node_count} 个节点!")

            # 创建根节点：配网D3软件
            root_name = session.write_transaction(add_node, "软件", "配网D3软件", {})

            # 批量处理数据
            batch_size = 100  # 可以根据数据量调整批次大小
            for i in range(0, len(df), batch_size):
                batch_df = df.iloc[i:i+batch_size]

                def process_batch(tx):
                    for idx, row in batch_df.iterrows():
                        # 提取数据并确保是字符串类型
                        module_1 = str(row.get("一级模块", "")) if not pd.isna(row.get("一级模块", "")) else ""
                        module_2 = str(row.get("二级模块", "")) if not pd.isna(row.get("二级模块", "")) else ""
                        module_3 = str(row.get("三级模块", "")) if not pd.isna(row.get("三级模块", "")) else ""
                        module_4 = str(row.get("四级模块", "")) if not pd.isna(row.get("四级模块", "")) else ""
                        function_name = str(row.get("功能名称", "")) if not pd.isna(row.get("功能名称", "")) else ""
                        description = str(row.get("功能说明", "")) if not pd.isna(row.get("功能说明", "")) else ""

                        # 现在可以安全地调用strip()
                        module_1 = module_1.strip()
                        module_2 = module_2.strip()
                        module_3 = module_3.strip()
                        module_4 = module_4.strip()
                        function_name = function_name.strip()
                        description = description.strip()

                        # 记录最后一个非空模块的标签和名称，用于连接功能名称节点
                        last_module_label = "软件"
                        last_module_name = root_name

                        # 构建路径，用于创建唯一标识符
                        path = "配网D3软件"

                        # 添加页面节点（一级模块）
                        if module_1:
                            module_1_name = add_node(tx, "页面", module_1, {}, path)
                            add_relationship(tx, last_module_label, last_module_name, "页面", module_1_name)
                            last_module_label = "页面"
                            last_module_name = module_1_name
                            path = f"{path}|{module_1}"

                        # 添加页面节点（二级模块）
                        if module_2:
                            module_2_name = add_node(tx, "TAB控件", module_2, {}, path)
                            add_relationship(tx, last_module_label, last_module_name, "TAB控件", module_2_name)
                            last_module_label = "TAB控件"
                            last_module_name = module_2_name
                            path = f"{path}|{module_2}"

                        # 添加TAB控件节点（三级模块）
                        if module_3:
                            module_3_name = add_node(tx, "分组控件", module_3, {}, path)
                            add_relationship(tx, last_module_label, last_module_name, "分组控件", module_3_name)
                            last_module_label = "分组控件"
                            last_module_name = module_3_name
                            path = f"{path}|{module_3}"

                        # 添加分组控件节点（四级模块）
                        if module_4:
                            module_4_name = add_node(tx, "属性控件", module_4, {}, path)
                            add_relationship(tx, last_module_label, last_module_name, "属性控件", module_4_name)
                            last_module_label = "属性控件"
                            last_module_name = module_4_name
                            path = f"{path}|{module_4}"

                        # 添加功能名称节点 - 使用路径确保唯一性
                        if function_name:
                            function_name_unique = add_node(
                                tx, "功能名称", function_name, {"描述": description}, path
                            )
                            add_relationship(tx, last_module_label, last_module_name, "功能名称", function_name_unique)

                session.write_transaction(process_batch)
                print(f"已处理 {min(i+batch_size, len(df))}/{len(df)} 条记录")

        print("知识图谱构建完成！")
    except Exception as e:
        print(f"构建知识图谱时发生错误: {e}")
    finally:
        driver.close()


def export_graph_to_html(output_file="knowledge_graph.html", limit=1000):
    """
    将Neo4j中的知识图谱导出为交互式HTML文件

    参数:
        output_file: 输出的HTML文件路径
        limit: 限制节点数量，防止图过大导致浏览器卡顿

    返回:
        bool: 是否成功导出
    """
    try:
        # 连接Neo4j数据库
        driver = GraphDatabase.driver(URI, auth=AUTH)

        # 创建一个NetworkX图
        G = nx.DiGraph()

        with driver.session() as session:
            # 获取所有节点 - 避免使用已弃用的id()函数
            nodes_result = session.run(
                f"MATCH (n) RETURN elementId(n) as id, labels(n) as labels, n.name as name, n.display_name as display_name, n.original_name as original_name LIMIT {limit}"
            )

            # 节点颜色映射
            color_map = {
                "软件": "#FF5733",
                "页面": "#33FF57",
                "页面控件": "#57FF33",
                "TAB控件": "#3357FF",
                "分组控件": "#FF33A8",
                "属性控件": "#33FFF5",
                "功能名称": "#F5FF33"
            }

            # 添加节点到图中
            node_ids = []  # 存储所有节点ID用于后续查询
            for record in nodes_result:
                node_id = record["id"]
                node_ids.append(node_id)
                node_label = record["labels"][0] if record["labels"] else "Unknown"
                node_name = record["original_name"] or record["name"]  # 优先使用原始名称
                display_name = record["display_name"] if record["display_name"] else node_name

                # 添加到NetworkX图
                G.add_node(
                    node_id,
                    label=display_name,
                    title=f"{node_label}: {display_name}",
                    color=color_map.get(node_label, "#CCCCCC")
                )

            # 获取所有关系 - 修复查询语法，不再使用未定义的path变量
            if node_ids:
                edges_result = session.run(
                    f"""
                    MATCH (a)-[r]->(b)
                    WHERE elementId(a) IN $node_ids AND elementId(b) IN $node_ids
                    RETURN elementId(a) as source, elementId(b) as target, type(r) as type
                    LIMIT {limit}
                    """,
                    node_ids=node_ids
                )

                # 添加边到图中
                for record in edges_result:
                    source = record["source"]
                    target = record["target"]
                    rel_type = record["type"]

                    # 添加到NetworkX图
                    G.add_edge(source, target, title=rel_type)

        # 创建Pyvis网络图
        net = Network(height="800px", width="100%", directed=True, notebook=False)

        # 从NetworkX图转换
        net.from_nx(G)

        # 设置物理布局选项
        net.set_options("""
        {
          "physics": {
            "forceAtlas2Based": {
              "gravitationalConstant": -50,
              "centralGravity": 0.01,
              "springLength": 100,
              "springConstant": 0.1
            },
            "maxVelocity": 50,
            "solver": "forceAtlas2Based",
            "timestep": 0.35,
            "stabilization": {
              "enabled": true,
              "iterations": 1000
            }
          },
          "interaction": {
            "navigationButtons": true,
            "keyboard": true
          }
        }
        """)

        # 保存为HTML文件
        net.save_graph(output_file)
        print(f"知识图谱已成功导出为HTML文件: {output_file}")
        return True

    except Exception as e:
        print(f"导出知识图谱时发生错误: {e}")
        return False
    finally:
        if 'driver' in locals():
            driver.close()


# 创建知识图谱
create_knowledge_graph("E:\\文件\\LLM_model\\RAG\\code\\GraphRAG\\data\\博微配网工程计价通D3软件产品功能清单.xlsx")

# 导出为HTML文件
# export_graph_to_html("配网D3软件知识图谱.html")