KG_generation/build_kg_ontolo.py

"""
通过本体层文件构建知识图谱
"""

from py2neo import Graph, Node, Relationship, NodeMatcher
from neo4j import GraphDatabase
import json
import os
import logging
import re
import configparser
import glob
import time

# 设置日志
logging.basicConfig(level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s")
logger = logging.getLogger(__name__)

# 全局变量
graph = None


def read_config(config_file="config.ini"):
    """
    读取配置文件

    Args:
        config_file: 配置文件路径

    Returns:
        config: 配置对象
    """
    config = configparser.ConfigParser()
    config.read(config_file, encoding="utf-8")
    return config


def connect_to_neo4j(uri, user, password):
    """
    连接到Neo4j数据库

    Args:
        uri: 数据库URI
        user: 用户名
        password: 密码

    Returns:
        graph: 数据库连接对象
    """
    global graph
    try:
        graph = Graph(uri, auth=(user, password))
        logger.info("成功连接到Neo4j数据库")
        return True
    except Exception as e:
        logger.error(f"连接Neo4j数据库失败: {e}")
        graph = None
        return False


def clear_database():
    """
    清空数据库
    """
    if graph is None:
        logger.error("数据库连接不可用")
        return False

    try:
        graph.run("MATCH (n) DETACH DELETE n")
        logger.info("已清空数据库")

        # 删除所有约束
        try:
            # 获取所有约束
            constraints = graph.run("SHOW CONSTRAINTS").data()
            for constraint in constraints:
                constraint_name = constraint.get("name")
                if constraint_name:
                    graph.run(f"DROP CONSTRAINT {constraint_name}")
                    logger.info(f"已删除约束: {constraint_name}")
        except Exception as e:
            logger.warning(f"删除约束失败: {e}")

        return True
    except Exception as e:
        logger.error(f"清空数据库失败: {e}")
        return False


# 解析本体层文件，获取实体类型、属性和关系定义
def parse_ontology_file(file_path="Ontology_Layer.txt"):
    """
    解析本体层文件，获取实体类型、属性和关系定义

    Args:
        file_path: 本体层文件路径

    Returns:
        entity_types: 实体类型和属性的字典 {实体类型名称: {属性名称: 属性类型}}
        entity_relationships: 实体间关系的列表 [(源实体类型, 关系类型, 目标实体类型)]
    """
    try:
        with open(file_path, "r", encoding="utf-8") as f:
            content = f.read()

        # 分割实体类型和关系部分
        parts = content.split("2. 实体间的关系")

        if len(parts) != 2:
            logger.error("本体层文件格式错误，无法找到实体类型和关系部分")
            return {}, []

        entity_types_content = parts[0]
        relationships_content = parts[1]

        # 解析实体类型和属性
        entity_types = {}
        current_entity = None

        # 移除 "1.实体类型" 标题行
        entity_types_content = entity_types_content.replace("1.实体类型", "").strip()

        # 按行分割
        lines = entity_types_content.strip().split("\n")

        for line in lines:
            line = line.strip()
            if not line:
                continue

            # 如果行不包含冒号，视为实体类型
            if ":" not in line:
                current_entity = line
                entity_types[current_entity] = {}
            # 否则视为属性定义
            else:
                if current_entity is None:
                    continue

                parts = line.split(":", 1)
                if len(parts) == 2:
                    attr_name = parts[0].strip()
                    attr_type = parts[1].strip()
                    entity_types[current_entity][attr_name] = attr_type

        # 解析实体间的关系
        entity_relationships = []

        # 匹配关系定义行: (:EntityType)-[:RELATIONSHIP]->(:EntityType)
        relationship_pattern = r"\(:(\w+)\)-\[:(\w+)\]->\(:(\w+)\)"

        for line in relationships_content.strip().split("\n"):
            line = line.strip()
            if not line:
                continue

            match = re.match(relationship_pattern, line)
            if match:
                source_entity, relationship_type, target_entity = match.groups()
                entity_relationships.append((source_entity, relationship_type, target_entity))

        logger.info(f"从本体层文件中解析出 {len(entity_types)} 个实体类型和 {len(entity_relationships)} 个关系定义")
        return entity_types, entity_relationships

    except Exception as e:
        logger.error(f"解析本体层文件失败: {e}")
        return {}, []


# 创建约束和索引以提高性能 - 现在不创建任何约束
def create_constraints_and_indexes():
    # 不创建任何约束
    logger.info("跳过创建约束")
    pass


# 获取实体之间的关系类型
def get_relationship_type(source_entity_type, target_entity_type, entity_relationships):
    """
    根据源实体类型和目标实体类型获取关系类型

    Args:
        source_entity_type: 源实体类型
        target_entity_type: 目标实体类型
        entity_relationships: 实体间关系的列表

    Returns:
        relationship_type: 关系类型，如果没找到则返回None
    """
    for source, rel_type, target in entity_relationships:
        if source == source_entity_type and target == target_entity_type:
            return rel_type

    # 如果没有找到匹配的关系，返回None
    logger.warning(f"未找到从 {source_entity_type} 到 {target_entity_type} 的关系定义，不创建关系")
    return None


# 创建根节点
def create_root_node():
    root = Node("EngineeringData", name="工程")
    graph.create(root)
    logger.info("创建根节点: 工程")
    return root


# --------------------------*项目划分*--------------------------
# 处理ProjectDivisionSet
def process_project_division_set(data, parent_node, entity_relationships):
    # 根据您提供的JSON结构，正确访问projectDivision数据
    if "projectData" in data and "projectDivision" in data["projectData"]:
        project_division = data["projectData"]["projectDivision"]
    elif "projectDivision" in data:
        project_division = data["projectDivision"]
    else:
        logger.warning("JSON中未找到projectDivision数据")
        logger.info(f"JSON顶层键: {list(data.keys())}")
        return

    logger.info(f"开始处理projectDivision，包含 {len(project_division)} 个顶级项目")

    # 创建新的ProjectDivisionSet节点 - 项目划分集
    division_set = Node("ProjectDivisionSet", name="项目划分集")
    graph.create(division_set)
    # 添加sortid属性，固定为"1"
    division_set["sortid"] = "1"
    graph.push(division_set)

    # 从本体层获取关系类型
    relationship_type = get_relationship_type("EngineeringData", "ProjectDivisionSet", entity_relationships)
    if relationship_type:  # 只有当关系类型不为None时才创建关系
        graph.create(Relationship(parent_node, relationship_type, division_set))

    # 处理ProjectDivisionTree
    tree_idx = 1  # 用于跟踪ProjectDivisionTree的序号

    for first_level_name, first_level_content in project_division.items():
        # 处理第一层下的内容，直接创建合并后的ProjectDivisionTree节点
        if isinstance(first_level_content, dict):
            # 处理一级名称，去掉"工程"字样
            processed_first_level = first_level_name.replace("工程", "")

            for second_level_name, second_level_content in first_level_content.items():
                # 确定最终节点名称
                if second_level_name == processed_first_level:
                    # 如果二级名称与处理后的一级名称相同，直接使用二级名称
                    final_name = second_level_name
                else:
                    # 否则组合二级名称和处理后的一级名称
                    final_name = f"{second_level_name}{processed_first_level}"

                # 创建ProjectDivisionTree节点
                division_tree = Node("ProjectDivisionTree", name=final_name)

                # 设置sortid属性，格式为"1.树索引"，确保每个树有唯一的sortid
                current_sort_path = [1, tree_idx]
                division_tree["sortid"] = ".".join(map(str, current_sort_path))
                tree_idx += 1  # 增加树索引，确保下一个树有不同的sortid

                # 保存原始名称作为属性
                division_tree["original_first_level"] = first_level_name
                division_tree["original_second_level"] = second_level_name

                # 如果有GUID，添加到节点属性
                guid = None
                if isinstance(first_level_content, dict) and "GUID" in first_level_content:
                    division_tree["first_level_GUID"] = first_level_content["GUID"]
                    guid = first_level_content["GUID"]

                graph.create(division_tree)
                graph.push(division_tree)  # 保存属性

                # 从本体层获取关系类型
                relationship_type = get_relationship_type(
                    "ProjectDivisionSet", "ProjectDivisionTree", entity_relationships
                )
                if relationship_type:
                    graph.create(Relationship(division_set, relationship_type, division_tree))

                # 移除GUID关系建立代码，避免重复创建
                # 关系将在establish_relationships函数中批量创建

                # 处理第二层下的ProjectDivisionItem列表
                if isinstance(second_level_content, list):
                    for item_idx, item in enumerate(second_level_content, start=1):
                        # 传递当前排序路径，为子项添加第三级索引
                        process_project_division_item(
                            item, division_tree, entity_relationships, current_sort_path + [item_idx]
                        )
                else:
                    logger.warning(f"ProjectDivisionTree {final_name} 的内容类型未知: {type(second_level_content)}")
        else:
            logger.warning(f"第一层 {first_level_name} 的内容类型未知: {type(first_level_content)}")


# 处理ProjectDivisionItem
def process_project_division_item(item, parent_node, entity_relationships, current_sort_path=None):
    # 提取必要属性
    guid = item.get("GUID", item.get("guid", ""))
    name = item.get("项目名称", "")

    if not guid and not name:
        logger.warning("ProjectDivisionItem缺少GUID和项目名称")
        return

    # 创建ProjectDivisionItem节点
    item_node = Node("ProjectDivisionItem", GUID=guid, name=name)

    # 设置sortid属性
    if current_sort_path:
        item_node["sortid"] = ".".join(map(str, current_sort_path))

    # 添加path属性，表示从ProjectDivisionItem到ProjectDivisionTree的路径，不包含节点类型
    if isinstance(parent_node, Node) and "ProjectDivisionTree" in parent_node.labels:
        # 如果父节点是ProjectDivisionTree，使用"父节点名称/当前节点名称"作为路径
        item_node["path"] = f"{parent_node['name']}/{name}"
        # logger.info(f"为ProjectDivisionItem {name} 设置path: {item_node['path']}")
    else:
        # 如果父节点是ProjectDivisionItem，使用"父节点path/当前节点名称"作为路径
        parent_path = parent_node.get("path", "")
        if parent_path:
            item_node["path"] = f"{parent_path}/{name}"
        else:
            # 如果父节点没有path属性（不应该发生，但为了健壮性）
            item_node["path"] = name
        # logger.info(f"为ProjectDivisionItem {name} 设置path: {item_node['path']}")

    # 添加其他属性
    for key, value in item.items():
        if key not in ["GUID", "项目名称", "children"] and value is not None:
            # 检查是否为资源库列表
            if key == "资源库列表" and isinstance(value, list):
                # 将资源库列表转换为分号分隔的字符串
                resource_names = []
                for resource in value:
                    if isinstance(resource, dict) and "资源库名称" in resource:
                        resource_names.append(resource["资源库名称"])
                item_node["资源库名称"] = "；".join(resource_names)
                # logger.info(f"将资源库列表转换为字符串: {item_node['资源库名称']}")
            # 检查值是否为基本类型
            elif isinstance(value, (str, int, float, bool)):
                item_node[key] = value
            # 如果是列表，尝试转换为分号分隔的字符串
            elif isinstance(value, list):
                try:
                    if all(isinstance(x, (str, int, float, bool)) for x in value):
                        item_node[key] = "；".join(str(x) for x in value)
                    else:
                        # 对于包含复杂对象的列表，尝试提取关键信息
                        extracted_values = []
                        for item_in_list in value:
                            if isinstance(item_in_list, dict):
                                # 尝试提取字典中的名称或标识符
                                for name_key in ["名称", "name", "标识", "id", "ID"]:
                                    if name_key in item_in_list:
                                        extracted_values.append(str(item_in_list[name_key]))
                                        break
                                else:
                                    # 如果没有找到名称键，使用第一个键值对
                                    if item_in_list:
                                        first_key = next(iter(item_in_list))
                                        extracted_values.append(f"{first_key}:{item_in_list[first_key]}")
                            else:
                                extracted_values.append(str(item_in_list))
                        item_node[key] = "；".join(extracted_values)
                except Exception as e:
                    logger.warning(f"无法将列表属性 {key} 转换为字符串: {e}")
            # 如果是字典，尝试转换为字符串
            elif isinstance(value, dict):
                try:
                    # 提取字典中的关键信息
                    extracted_info = []
                    for dict_key, dict_value in value.items():
                        if isinstance(dict_value, (str, int, float, bool)):
                            extracted_info.append(f"{dict_key}:{dict_value}")
                    item_node[key] = "；".join(extracted_info)
                except Exception as e:
                    logger.warning(f"无法将字典属性 {key} 转换为字符串: {e}")

    graph.create(item_node)
    graph.push(item_node)  # 保存属性
    # logger.info(f"创建ProjectDivisionItem节点: {name} (GUID: {guid})")

    # 创建与父节点的关系
    if isinstance(parent_node, Node) and "ProjectDivisionTree" in parent_node.labels:
        # 从本体层获取关系类型
        relationship_type = get_relationship_type("ProjectDivisionTree", "ProjectDivisionItem", entity_relationships)
        if relationship_type:
            graph.create(Relationship(parent_node, relationship_type, item_node))
        #     logger.info(f"创建关系: {parent_node['name']} {relationship_type} {item_node['name']}")
        # else:
        #     logger.info(f"不创建关系: {parent_node['name']} 到 {item_node['name']}")
    else:
        # 从本体层获取关系类型
        relationship_type = get_relationship_type("ProjectDivisionItem", "ProjectDivisionItem", entity_relationships)
        if relationship_type:
            graph.create(Relationship(parent_node, relationship_type, item_node))
        #     logger.info(f"创建关系: {parent_node['name']} {relationship_type} {item_node['name']}")
        # else:
        #     logger.info(f"不创建关系: {parent_node['name']} 到 {item_node['name']}")

    # 移除GUID关系建立代码，避免重复创建
    # 关系将在establish_relationships函数中批量创建

    # 处理子项
    if "children" in item and item["children"]:
        children = item["children"]
        # logger.info(f"ProjectDivisionItem {name} 有 {len(children)} 个子项")

        for child_idx, child in enumerate(children, start=1):
            child_type = child.get("type", child.get("类型", ""))

            # 创建子节点的排序路径
            child_sort_path = current_sort_path + [child_idx] if current_sort_path else None

            if child_type == "项目划分":
                # 递归处理子ProjectDivisionItem
                process_project_division_item(child, item_node, entity_relationships, child_sort_path)
            elif child_type == "8" or child_type == "清单":
                # 处理List类型节点
                process_list_item(child, item_node, entity_relationships, child_sort_path)
            else:
                # 处理ProjectQuantity及其子类
                process_project_quantity(child, item_node, entity_relationships, child_sort_path)


# 处理List及其子类
def process_list_item(list_item, parent_node, entity_relationships, current_sort_path=None):
    """处理清单类型的节点"""
    # 提取必要属性
    guid = list_item.get("GUID", list_item.get("guid", ""))
    list_name = list_item.get("清单名称")
    list_type = list_item.get("类型", "")

    # 创建List节点
    list_node = Node("List", guid=guid, name=list_name, type=list_type)

    # 设置sortid属性
    if current_sort_path:
        list_node["sortid"] = ".".join(map(str, current_sort_path))

    # 添加path属性，包含节点类型
    parent_path = parent_node.get("path", "")
    if parent_path:
        list_node["path"] = f"{parent_path}/{list_name}(清单)"
    else:
        # 如果父节点没有path属性（不应该发生，但为了健壮性）
        parent_name = parent_node.get("name", "")
        list_node["path"] = f"{parent_name}/{list_name}(清单)"
    # logger.info(f"为List节点 {list_name} 设置path: {list_node['path']}")

    # 添加其他属性
    for key, value in list_item.items():
        if key not in ["清单名称", "类型", "guid", "children"] and value is not None:
            # 检查是否为资源库列表
            if key == "资源库列表" and isinstance(value, list):
                # 将资源库列表转换为分号分隔的字符串
                resource_names = []
                for resource in value:
                    if isinstance(resource, dict) and "资源库名称" in resource:
                        resource_names.append(resource["资源库名称"])
                list_node["资源库名称"] = "；".join(resource_names)
                # logger.info(f"将资源库列表转换为字符串: {list_node['资源库名称']}")
            # 检查值是否为基本类型
            elif isinstance(value, (str, int, float, bool)):
                list_node[key] = value
            # 如果是列表，尝试转换为分号分隔的字符串
            elif isinstance(value, list):
                try:
                    if all(isinstance(x, (str, int, float, bool)) for x in value):
                        list_node[key] = "；".join(str(x) for x in value)
                    else:
                        # 对于包含复杂对象的列表，尝试提取关键信息
                        extracted_values = []
                        for item_in_list in value:
                            if isinstance(item_in_list, dict):
                                # 尝试提取字典中的名称或标识符
                                for name_key in ["名称", "name", "标识", "id", "ID"]:
                                    if name_key in item_in_list:
                                        extracted_values.append(str(item_in_list[name_key]))
                                        break
                                else:
                                    # 如果没有找到名称键，使用第一个键值对
                                    if item_in_list:
                                        first_key = next(iter(item_in_list))
                                        extracted_values.append(f"{first_key}:{item_in_list[first_key]}")
                            else:
                                extracted_values.append(str(item_in_list))
                        list_node[key] = "；".join(extracted_values)
                except Exception as e:
                    logger.warning(f"无法将列表属性 {key} 转换为字符串: {e}")
            # 如果是字典，尝试转换为字符串
            elif isinstance(value, dict):
                try:
                    # 提取字典中的关键信息
                    extracted_info = []
                    for dict_key, dict_value in value.items():
                        if isinstance(dict_value, (str, int, float, bool)):
                            extracted_info.append(f"{dict_key}:{dict_value}")
                    list_node[key] = "；".join(extracted_info)
                except Exception as e:
                    logger.warning(f"无法将字典属性 {key} 转换为字符串: {e}")

    graph.create(list_node)
    graph.push(list_node)  # 保存属性
    # logger.info(f"创建List节点: {list_name} (类型: {list_type})")

    # 创建与父节点的关系
    # 从本体层获取关系类型
    relationship_type = get_relationship_type(list(parent_node.labels)[0], "List", entity_relationships)
    if relationship_type:
        graph.create(Relationship(parent_node, relationship_type, list_node))
    #     logger.info(f"创建关系: {parent_node['name']} {relationship_type} {list_name}")
    # else:
    #     logger.info(f"不创建关系: {parent_node['name']} 到 {list_name}")

    # 如果有GUID，尝试建立与CostSet的关系
    if guid:
        # 查找对应的CostSet节点
        cost_set_query = f"""
        MATCH (c:CostSet)
        WHERE c.GUID = '{guid}'
        RETURN c
        """
        cost_set_nodes = list(graph.run(cost_set_query))
        if cost_set_nodes:
            cost_set_node = cost_set_nodes[0]["c"]
            relationship_type = get_relationship_type("List", "CostSet", entity_relationships)
            if relationship_type:
                graph.create(Relationship(list_node, relationship_type, cost_set_node))
            #     logger.info(f"创建关系: {list_name} {relationship_type} CostSet (GUID: {guid})")
            # else:
            #     logger.info(f"不创建关系: {list_name} 到 CostSet (GUID: {guid})")

    # 处理子项
    if "children" in list_item and list_item["children"]:
        children = list_item["children"]
        # logger.info(f"List {list_name} 有 {len(children)} 个子项")

        for child_idx, child in enumerate(children, start=1):
            # 确定子项类型
            child_type = child.get("type", child.get("类型", ""))

            # 创建子节点的排序路径
            child_sort_path = current_sort_path + [child_idx] if current_sort_path else [1, child_idx]

            if child_type == "项目划分":
                # 递归处理子ProjectDivisionItem
                process_project_division_item(child, list_node, entity_relationships, child_sort_path)
            elif child_type == "8" or child_type == "清单":
                # 递归处理子List
                process_list_item(child, list_node, entity_relationships, child_sort_path)
            else:
                # 处理ProjectQuantity及其子类
                process_project_quantity(child, list_node, entity_relationships, child_sort_path)


# 处理ProjectQuantity及其子类
def process_project_quantity(quantity, parent_node, entity_relationships, current_sort_path=None):
    # 确定具体类型
    quantity_type = quantity.get("type", quantity.get("类型", ""))
    labels = ["ProjectQuantity"]
    type_name = "ProjectQuantity"

    # 支持数字和文本类型
    if quantity_type == "0" or quantity_type == "定额":
        labels.append("Quota")
        type_name = "定额"
    elif quantity_type == "1" or quantity_type == "主材":
        labels.append("MainMaterial")
        type_name = "主材"
    elif quantity_type == "5" or quantity_type == "设备":
        labels.append("Equipment")
        type_name = "设备"
    elif quantity_type == "配件":
        # 特殊处理：当类型为“配件”时，读取“配件类型”字段
        accessory_type = quantity.get("配件类型", "")  # 默认为“配件”
        if accessory_type == "主材":
            labels.append("MainMaterial")
            type_name = "主材"
        else:
            labels.append("Accessory")
            type_name = "配件"
    else:
        # 可选：处理未知类型
        pass

    # 创建节点
    quantity_name = quantity.get("项目名称", quantity.get("名称", ""))

    quantity_node = Node(*labels, name=quantity_name)

    # 设置sortid属性
    if current_sort_path:
        quantity_node["sortid"] = ".".join(map(str, current_sort_path))

    # 添加path属性，包含节点类型
    parent_path = parent_node.get("path", "")
    if parent_path:
        quantity_node["path"] = f"{parent_path}/{quantity_name}({type_name})"
    else:
        # 如果父节点没有path属性（不应该发生，但为了健壮性）
        parent_name = parent_node.get("name", "")
        quantity_node["path"] = f"{parent_name}/{quantity_name}({type_name})"
    # logger.info(f"为ProjectQuantity节点 {quantity_name} 设置path: {quantity_node['path']}")

    # 添加其他属性
    for key, value in quantity.items():
        if key not in ["名称", "项目名称", "材机列表", "children"] and value is not None:
            # 检查是否为资源库列表
            if key == "资源库列表" and isinstance(value, list):
                # 将资源库列表转换为分号分隔的字符串
                resource_names = []
                for resource in value:
                    if isinstance(resource, dict) and "资源库名称" in resource:
                        resource_names.append(resource["资源库名称"])
                quantity_node["资源库名称"] = "；".join(resource_names)
                # logger.info(f"将资源库列表转换为字符串: {quantity_node['资源库名称']}")
            # 检查值是否为基本类型
            elif isinstance(value, (str, int, float, bool)):
                quantity_node[key] = value
            # 如果是列表，尝试转换为分号分隔的字符串
            elif isinstance(value, list):
                try:
                    if all(isinstance(x, (str, int, float, bool)) for x in value):
                        quantity_node[key] = "；".join(str(x) for x in value)
                    else:
                        # 对于包含复杂对象的列表，尝试提取关键信息
                        extracted_values = []
                        for item_in_list in value:
                            if isinstance(item_in_list, dict):
                                # 尝试提取字典中的名称或标识符
                                for name_key in ["名称", "name", "标识", "id", "ID"]:
                                    if name_key in item_in_list:
                                        extracted_values.append(str(item_in_list[name_key]))
                                        break
                                else:
                                    # 如果没有找到名称键，使用第一个键值对
                                    if item_in_list:
                                        first_key = next(iter(item_in_list))
                                        extracted_values.append(f"{first_key}:{item_in_list[first_key]}")
                            else:
                                extracted_values.append(str(item_in_list))
                        quantity_node[key] = "；".join(extracted_values)
                except Exception as e:
                    logger.warning(f"无法将列表属性 {key} 转换为字符串: {e}")
            # 如果是字典，尝试转换为字符串
            elif isinstance(value, dict):
                try:
                    # 提取字典中的关键信息
                    extracted_info = []
                    for dict_key, dict_value in value.items():
                        if isinstance(dict_value, (str, int, float, bool)):
                            extracted_info.append(f"{dict_key}:{dict_value}")
                    quantity_node[key] = "；".join(extracted_info)
                except Exception as e:
                    logger.warning(f"无法将字典属性 {key} 转换为字符串: {e}")

    graph.create(quantity_node)
    graph.push(quantity_node)  # 保存属性
    # logger.info(f"创建ProjectQuantity节点: {quantity_name} (id: {quantity_id}, 类型: {quantity_type})")

    # 创建与父节点的关系
    # 从本体层获取关系类型
    parent_label = list(parent_node.labels)[0]  # 获取父节点的第一个标签
    relationship_type = get_relationship_type(parent_label, "ProjectQuantity", entity_relationships)
    if relationship_type:
        graph.create(Relationship(parent_node, relationship_type, quantity_node))
    #     logger.info(f"创建关系: {parent_node['name']} {relationship_type} {quantity_name}")
    # else:
    #     logger.info(f"不创建关系: {parent_node['name']} 到 {quantity_name}")

    # 移除GUID关系建立代码，避免重复创建
    # 关系将在establish_relationships函数中批量创建

    # 处理材机列表或children
    materials = None

    # 先检查是否有材机列表
    if "材机列表" in quantity and quantity["材机列表"]:
        materials = quantity["材机列表"]
        # logger.info(f"ProjectQuantity {quantity_name} 有 {len(materials)} 个材机项")

        for mat_idx, material in enumerate(materials, start=1):
            # 创建子节点的排序路径
            mat_sort_path = current_sort_path + [mat_idx] if current_sort_path else [1, mat_idx]
            process_material_or_equipment(material, quantity_node, entity_relationships, mat_sort_path)

    # 如果没有材机列表，则检查是否有children
    elif "children" in quantity and quantity["children"]:
        children = quantity["children"]
        # logger.info(f"ProjectQuantity {quantity_name} 有 {len(children)} 个子项")

        for child_idx, child in enumerate(children, start=1):
            child_type = child.get("类型", child.get("type", ""))

            # 创建子节点的排序路径
            child_sort_path = current_sort_path + [child_idx] if current_sort_path else [1, child_idx]

            # 如果子项类型为人工、材料或机械，则视为MaterialOrEquipment
            if child_type in ["人工", "材料", "机械", "2", "3", "4"]:
                process_material_or_equipment(child, quantity_node, entity_relationships, child_sort_path)
            # 如果子项类型为主材、设备或定额，则递归处理为ProjectQuantity
            elif child_type in ["1", "主材", "5", "设备", "0", "定额"]:
                process_project_quantity(child, quantity_node, entity_relationships, child_sort_path)


# 处理MaterialOrEquipment
def process_material_or_equipment(material, parent_node, entity_relationships, current_sort_path=None):
    material_id = material.get("id", material.get("ID", ""))
    material_name = material.get("名称", "")
    material_type = material.get("类型", material.get("type", ""))

    if not material_id and not material_name:
        logger.warning("MaterialOrEquipment缺少id和名称")
        return

    # 创建唯一标识，结合父节点的ID和当前项的id
    parent_id = parent_node.get("id", parent_node.get("GUID", ""))
    unique_id = f"{parent_id}_{material_id}" if parent_id else material_id

    # 直接创建新节点，不检查是否已存在
    material_node = Node(
        "MaterialOrEquipment", id=material_id, unique_id=unique_id, name=material_name, type=material_type
    )

    # 设置sortid属性
    if current_sort_path:
        material_node["sortid"] = ".".join(map(str, current_sort_path))

    # 添加其他属性
    for key, value in material.items():
        if key not in ["id", "ID", "名称", "类型", "type"] and value is not None:
            # 检查是否为资源库列表
            if key == "资源库列表" and isinstance(value, list):
                # 将资源库列表转换为分号分隔的字符串
                resource_names = []
                for resource in value:
                    if isinstance(resource, dict) and "资源库名称" in resource:
                        resource_names.append(resource["资源库名称"])
                material_node["资源库名称"] = "；".join(resource_names)
                # logger.info(f"将资源库列表转换为字符串: {material_node['资源库名称']}")
            # 检查值是否为基本类型
            elif isinstance(value, (str, int, float, bool)):
                material_node[key] = value
            # 如果是列表，尝试转换为分号分隔的字符串
            elif isinstance(value, list):
                try:
                    if all(isinstance(x, (str, int, float, bool)) for x in value):
                        material_node[key] = "；".join(str(x) for x in value)
                    else:
                        # 对于包含复杂对象的列表，尝试提取关键信息
                        extracted_values = []
                        for item_in_list in value:
                            if isinstance(item_in_list, dict):
                                # 尝试提取字典中的名称或标识符
                                for name_key in ["名称", "name", "标识", "id", "ID"]:
                                    if name_key in item_in_list:
                                        extracted_values.append(str(item_in_list[name_key]))
                                        break
                                else:
                                    # 如果没有找到名称键，使用第一个键值对
                                    if item_in_list:
                                        first_key = next(iter(item_in_list))
                                        extracted_values.append(f"{first_key}:{item_in_list[first_key]}")
                            else:
                                extracted_values.append(str(item_in_list))
                        material_node[key] = "；".join(extracted_values)
                except Exception as e:
                    logger.warning(f"无法将列表属性 {key} 转换为字符串: {e}")
            # 如果是字典，尝试转换为字符串
            elif isinstance(value, dict):
                try:
                    # 提取字典中的关键信息
                    extracted_info = []
                    for dict_key, dict_value in value.items():
                        if isinstance(dict_value, (str, int, float, bool)):
                            extracted_info.append(f"{dict_key}:{dict_value}")
                    material_node[key] = "；".join(extracted_info)
                except Exception as e:
                    logger.warning(f"无法将字典属性 {key} 转换为字符串: {e}")

    graph.create(material_node)
    graph.push(material_node)  # 保存属性
    # logger.info(f"创建MaterialOrEquipment节点: {material_name} (id: {material_id}, 类型: {material_type})")

    # 创建与父节点的关系
    # 从本体层获取关系类型
    relationship_type = get_relationship_type("ProjectQuantity", "MaterialOrEquipment", entity_relationships)
    if relationship_type:
        graph.create(Relationship(parent_node, relationship_type, material_node))
    #     logger.info(f"创建关系: {parent_node['name']} {relationship_type} {material_name}")
    # else:
    #     logger.info(f"不创建关系: {parent_node['name']} 到 {material_name}")


# --------------------------*费用预览*--------------------------
# 处理CostSet
def process_cost_set(data, root_node, entity_relationships):
    """处理费用预览，为每个具有GUID的节点创建CostSet节点"""
    # 根据JSON结构，访问expensePreview数据
    if "projectData" in data and "expensePreview" in data["projectData"]:
        expense_preview = data["projectData"]["expensePreview"]
    elif "expensePreview" in data:
        expense_preview = data["expensePreview"]
    else:
        logger.warning("JSON中未找到expensePreview数据")
        logger.info(f"JSON顶层键: {list(data.keys())}")
        return

    # logger.info("开始处理expensePreview")

    # 用于跟踪已处理的GUID，避免重复处理
    processed_guids = set()

    # 用于跟踪费用预览节点的序号
    cost_set_counter = 1

    # 递归函数，用于遍历expensePreview中的所有节点
    def traverse_expense_preview(node, path=""):
        nonlocal cost_set_counter

        if isinstance(node, dict):
            # 如果节点包含GUID，则创建CostSet节点
            if "GUID" in node:
                guid = node["GUID"]
                # 标准化GUID格式
                guid = guid.strip("{}")
                guid = "{" + guid.upper() + "}"

                if guid not in processed_guids:
                    processed_guids.add(guid)
                    # 创建CostSet节点
                    cost_set_node = Node("CostSet", GUID=guid, name="费用预览集")

                    # 设置sortid属性，每个CostSet都从1开始
                    cost_set_node["sortid"] = "1"

                    try:
                        graph.create(cost_set_node)
                        graph.push(cost_set_node)  # 保存属性
                        # logger.info(f"创建CostSet节点: 费用预览集 (GUID: {guid}, 路径: {path})")
                    except Exception as e:
                        logger.error(f"创建CostSet节点失败: {e}")
                        return

                    # 创建与根节点的关系
                    relationship_type = get_relationship_type("EngineeringData", "CostSet", entity_relationships)
                    if relationship_type:
                        try:
                            graph.create(Relationship(root_node, relationship_type, cost_set_node))
                            # logger.info(f"创建关系: {root_node['name']} {relationship_type} 费用预览集")
                        except Exception as e:
                            logger.error(f"创建关系失败: {e}")

                    # 处理sum数组中的CostItem
                    if "sum" in node and isinstance(node["sum"], list) and node["sum"]:
                        for item_idx, item in enumerate(node["sum"], start=1):
                            try:
                                # 为每个CostItem创建独立的排序路径，从1.1开始
                                item_sort_path = [1, item_idx]
                                process_cost_item(item, cost_set_node, entity_relationships, item_sort_path)
                            except Exception as e:
                                logger.error(f"处理CostItem时出错: {e}")

                    # 处理rcj数组中的MaterialandmachineCostItem
                    if "rcj" in node and isinstance(node["rcj"], list) and node["rcj"]:
                        for item_idx, item in enumerate(node["rcj"], start=1):
                            try:
                                # 为每个MaterialandmachineCostItem创建独立的排序路径，从2.1开始
                                item_sort_path = [2, item_idx]
                                process_material_machine_cost_item(
                                    item, cost_set_node, entity_relationships, item_sort_path
                                )
                            except Exception as e:
                                logger.error(f"处理MaterialandmachineCostItem时出错: {e}")

                    # 增加计数器，为下一个CostSet准备
                    cost_set_counter += 1

            # 递归处理子节点
            for key, value in node.items():
                if key == "children" and isinstance(value, list):
                    for i, child in enumerate(value, start=1):
                        traverse_expense_preview(child, f"{path}.children[{i}]")
        elif isinstance(node, list):
            for i, item in enumerate(node, start=1):
                traverse_expense_preview(item, f"{path}[{i}]")

    # 遍历expensePreview中的所有类别
    for category_idx, (category_name, category) in enumerate(expense_preview.items(), start=1):
        # 处理category是字典的情况
        if isinstance(category, dict):
            for subcategory_idx, (subcategory_name, subcategory) in enumerate(category.items(), start=1):
                if isinstance(subcategory, list):
                    for i, item in enumerate(subcategory, start=1):
                        traverse_expense_preview(item, f"expensePreview.{category_name}.{subcategory_name}[{i}]")
        # 处理category是列表的情况
        elif isinstance(category, list):
            for i, item in enumerate(category, start=1):
                traverse_expense_preview(item, f"expensePreview.{category_name}[{i}]")

    # logger.info(f"共处理了 {len(processed_guids)} 个CostSet节点")


# 处理CostItem - 简化处理逻辑，确保正确创建节点
def process_cost_item(item, parent_node, entity_relationships, current_sort_path=None):
    # 确保item是字典
    if not isinstance(item, dict):
        logger.error(f"CostItem不是字典类型: {item}")
        return

    item_id = item.get("id", "")
    cost = item.get("cost", "")

    if not item_id:
        logger.warning(f"跳过没有id的CostItem: {item}")
        return

    # 使用id作为名称
    name = item_id

    # 创建唯一标识，结合父节点的GUID和当前项的id
    parent_guid = parent_node.get("GUID", "")
    unique_id = f"{parent_guid}_{item_id}" if parent_guid else item_id

    # 创建CostItem节点 - 简化属性，只保留关键属性
    item_node = Node("CostItem", id=item_id, unique_id=unique_id, cost=cost, name=name)

    # 设置sortid属性
    if current_sort_path:
        item_node["sortid"] = ".".join(map(str, current_sort_path))

    # 添加其他属性 - 简化属性处理
    for key, value in item.items():
        if key not in ["id", "cost"] and value is not None:
            if isinstance(value, (str, int, float, bool)):
                item_node[key] = value

    graph.create(item_node)
    graph.push(item_node)  # 保存属性
    # logger.info(f"创建CostItem节点: {name} (id: {item_id}, cost: {cost})")

    # 创建与父节点的关系
    relationship_type = get_relationship_type("CostSet", "CostItem", entity_relationships)
    if relationship_type:
        graph.create(Relationship(parent_node, relationship_type, item_node))
    #     logger.info(f"创建关系: {parent_node['name']} {relationship_type} {name}")
    # else:
    #     logger.info(f"不创建关系: {parent_node['name']} 到 {name}")


# 处理人材机合价项(MaterialandmachineCostItem)
def process_material_machine_cost_item(item, parent_node, entity_relationships, current_sort_path=None):
    """处理人材机合价项"""
    # 提取必要属性
    item_type = item.get("type", "")
    name = item.get("名称", "")
    code = item.get("编码", "")
    unit = item.get("单位", "")
    supplier = item.get("供货方", "")
    budget_price = item.get("预算价不含税", "")
    market_price = item.get("市场价不含税", "")
    budget_total = item.get("预算价合价", "")
    market_total = item.get("市场价合价", "")
    price_diff = item.get("价差", "")
    quantity = item.get("数量", "")

    if not name:
        logger.warning("MaterialandmachineCostItem缺少名称")
        return

    # 创建唯一标识，结合父节点的GUID和当前项的编码
    parent_guid = parent_node.get("GUID", "")
    unique_id = f"{parent_guid}_{code}" if parent_guid and code else (parent_guid or code)

    # 创建人材机合价项节点，确保所有属性值都是原始类型
    properties = {"type": str(item_type), "name": str(name), "unique_id": str(unique_id)}

    # 设置sortid属性
    if current_sort_path:
        properties["sortid"] = ".".join(map(str, current_sort_path))

    # 添加其他属性，确保都是字符串类型
    if supplier:
        properties["供货方"] = str(supplier)
    if code:
        properties["编码"] = str(code)
    if unit:
        properties["单位"] = str(unit)
    if budget_price:
        properties["预算价不含税"] = str(budget_price)
    if market_price:
        properties["市场价不含税"] = str(market_price)
    if budget_total:
        properties["预算价合价"] = str(budget_total)
    if market_total:
        properties["市场价合价"] = str(market_total)
    if price_diff:
        properties["价差"] = str(price_diff)
    if quantity:
        properties["数量"] = str(quantity)

    # 创建节点
    item_node = Node("MaterialandmachineCostItem", **properties)

    try:
        graph.create(item_node)
        graph.push(item_node)  # 保存属性
        # logger.info(f"创建MaterialandmachineCostItem节点: {name} (类型: {item_type}, 编码: {code})")
    except Exception as e:
        logger.error(f"创建MaterialandmachineCostItem节点失败: {e}")
        # 打印详细的节点属性，帮助调试
        for key, value in properties.items():
            logger.error(f"属性 {key}: {type(value)} = {value}")
        return

    # 创建与父节点的关系
    relationship_type = get_relationship_type("CostSet", "MaterialandmachineCostItem", entity_relationships)
    if relationship_type:
        try:
            graph.create(Relationship(parent_node, relationship_type, item_node))
            # logger.info(f"创建关系: {parent_node['name']} {relationship_type} {name}")
        except Exception as e:
            logger.error(f"创建关系失败: {e}")
    else:
        logger.info(f"不创建关系: {parent_node['name']} 到 {name}")


# 修改establish_relationships函数，添加项目划分与人材机合价集合的关系
def establish_relationships(entity_relationships):

    # 首先检查数据库中的节点情况
    logger.info("检查数据库中的节点情况...")

    # 检查ProjectDivisionItem节点
    pdi_query = """
    MATCH (pdi:ProjectDivisionItem)
    RETURN count(pdi) as count, collect(distinct pdi.GUID)[..10] as sample_guids
    """
    pdi_result = graph.run(pdi_query).data()[0]
    logger.info(f"数据库中有 {pdi_result['count']} 个ProjectDivisionItem节点")
    logger.info(f"ProjectDivisionItem节点GUID样本: {pdi_result['sample_guids']}")

    # 检查ProjectQuantity节点
    pq_query = """
    MATCH (pq:ProjectQuantity)
    RETURN count(pq) as count, collect(distinct pq.id)[..10] as sample_ids
    """
    pq_result = graph.run(pq_query).data()[0]
    logger.info(f"数据库中有 {pq_result['count']} 个ProjectQuantity节点")
    logger.info(f"ProjectQuantity节点ID样本: {pq_result['sample_ids']}")

    # 检查CostSet节点
    cs_query = """
    MATCH (cs:CostSet)
    RETURN count(cs) as count, collect(distinct cs.GUID)[..10] as sample_guids
    """
    cs_result = graph.run(cs_query).data()[0]
    logger.info(f"数据库中有 {cs_result['count']} 个CostSet节点")
    logger.info(f"CostSet节点GUID样本: {cs_result['sample_guids']}")

    # 获取从ProjectDivisionItem到CostSet的关系类型
    relationship_type = get_relationship_type("ProjectDivisionItem", "CostSet", entity_relationships)

    if relationship_type:  # 只有当关系类型不为None时才创建关系
        # 建立ProjectDivisionItem与CostSet的关系，使用更宽松的匹配条件，并避免重复创建
        query_division_item = f"""
        MATCH (pdi:ProjectDivisionItem), (cs:CostSet)
        WHERE toUpper(replace(replace(pdi.GUID, '{{', ''), '}}', '')) = toUpper(replace(replace(cs.GUID, '{{', ''), '}}', ''))
        AND pdi.GUID <> ""
        AND NOT EXISTS((pdi)-[:{relationship_type}]->(cs))
        CREATE (pdi)-[:{relationship_type}]->(cs)
        RETURN count(*) as count
        """
        try:
            result = graph.run(query_division_item)
            count = result.data()[0]["count"]
            logger.info(f"创建了 {count} 个 ProjectDivisionItem {relationship_type} CostSet 关系")
        except Exception as e:
            logger.error(f"创建ProjectDivisionItem与CostSet关系失败: {e}")
    else:
        logger.info("本体层中未定义ProjectDivisionItem到CostSet的关系，跳过创建")

    # 获取从ProjectQuantity到CostSet的关系类型
    relationship_type = get_relationship_type("ProjectQuantity", "CostSet", entity_relationships)

    if relationship_type:  # 只有当关系类型不为None时才创建关系
        # 建立ProjectQuantity与CostSet的关系，使用更宽松的匹配条件，并避免重复创建
        query_quantity = f"""
            MATCH (pq:ProjectQuantity), (cs:CostSet)
            WHERE
                toUpper(replace(replace(coalesce(pq.GUID, pq.guid, ''), '{{', ''), '}}', '')) =
                toUpper(replace(replace(coalesce(cs.GUID, cs.guid, ''), '{{', ''), '}}', ''))
                AND coalesce(pq.GUID, pq.guid, '') <> ''
                AND coalesce(cs.GUID, cs.guid, '') <> ''
                AND NOT EXISTS((pq)-[:{relationship_type}]->(cs))
            CREATE (pq)-[:{relationship_type}]->(cs)
            RETURN count(*) as count
            """
        try:
            result = graph.run(query_quantity)
            count = result.data()[0]["count"]
            logger.info(f"创建了 {count} 个 ProjectQuantity {relationship_type} CostSet 关系")
        except Exception as e:
            logger.error(f"创建ProjectQuantity与CostSet关系失败: {e}")
    else:
        logger.info("本体层中未定义ProjectQuantity到CostSet的关系，跳过创建")

    # 检查最终的关系数量
    # 检查ProjectDivisionItem到CostSet的关系
    pdi_cs_query = """
    MATCH (pdi:ProjectDivisionItem)-[r]->(cs:CostSet)
    RETURN count(r) as count
    """
    pdi_cs_result = graph.run(pdi_cs_query).data()[0]
    logger.info(f"数据库中最终有 {pdi_cs_result['count']} 个ProjectDivisionItem到CostSet的关系")

    # 检查ProjectQuantity到CostSet的关系
    pq_cs_query = """
    MATCH (pq:ProjectQuantity)-[r]->(cs:CostSet)
    RETURN count(r) as count
    """
    pq_cs_result = graph.run(pq_cs_query).data()[0]
    logger.info(f"数据库中最终有 {pq_cs_result['count']} 个ProjectQuantity到CostSet的关系")


# --------------------------*费用表*--------------------------
# 处理取费表模板集(FeeTableTemplateSet)
def process_fee_table_template_set(data, parent_node, entity_relationships):
    if "projectData" in data and "costSetting" in data["projectData"]:
        cost_setting = data["projectData"]["costSetting"]
    elif "costSetting" in data:
        cost_setting = data["costSetting"]
    else:
        logger.warning("JSON中未找到costSetting数据")
        logger.info(f"JSON顶层键: {list(data.keys())}")
        return

    # 创建取费表模板集节点
    fee_template_set_node = Node("FeeTableTemplateSet", name="取费表模板集")
    graph.create(fee_template_set_node)
    fee_template_set_node["sortid"] = "1"  # 固定为1
    graph.push(fee_template_set_node)  # 更新节点属性

    relationship_type = get_relationship_type("EngineeringData", "FeeTableTemplateSet", entity_relationships)
    graph.create(Relationship(parent_node, relationship_type, fee_template_set_node))

    # 开始处理子节点，从编号 [1] 开始
    idx = 1
    for template_set_name, template_set_content in cost_setting.items():
        if "tables" in template_set_content and isinstance(template_set_content["tables"], list):
            for template_item in template_set_content["tables"]:
                process_fee_table_template_item(template_item, fee_template_set_node, entity_relationships, [1, idx])
                idx += 1  # 同级模板项顺序递增


# 处理取费表模板项(FeeTableTemplateItem)
def process_fee_table_template_item(template_item, parent_node, entity_relationships, current_sort_path):
    # 只获取必须的name属性
    name = template_item.get("name", "")
    if not name:
        logger.warning("FeeTableTemplateItem缺少name")
        return

    # 创建基础节点（只设置name）
    template_item_node = Node("FeeTableTemplateItem", name=name)

    # 动态添加所有其他属性（排除name和children）
    for key, value in template_item.items():
        if key not in ["name", "children"] and value is not None:
            if isinstance(value, (str, int, float, bool)):
                template_item_node[key] = value  # 完全原样存储
            else:
                logger.warning(f"跳过非基本类型属性: {key}={value}(类型:{type(value)})")

    # 设置系统属性
    template_item_node["sortid"] = ".".join(map(str, current_sort_path))

    # 保存节点
    try:
        graph.create(template_item_node)
        graph.push(template_item_node)
    except Exception as e:
        logger.error(f"创建模板项节点失败: {str(e)}")
        return

    # 创建关系
    relationship_type = get_relationship_type("FeeTableTemplateSet", "FeeTableTemplateItem", entity_relationships)
    if relationship_type:
        graph.create(Relationship(parent_node, relationship_type, template_item_node))

    # 处理子费用项（保持原有逻辑）
    if "children" in template_item and isinstance(template_item["children"], list):
        for i, fee_item in enumerate(template_item["children"], start=1):
            process_fee(fee_item, template_item_node, entity_relationships, current_sort_path + [i])


# 处理取费(FeeCollection)
def process_fee(fee_item, parent_node, entity_relationships, current_sort_path):
    # 只获取费用名称（必须字段）
    fee_name = fee_item.get("费用名称", "")
    if not fee_name:
        logger.warning("Fee缺少费用名称")
        return

    # 创建节点（只设置name属性）
    fee_node = Node("FeeCollection", name=fee_name)

    # 动态添加所有其他属性（排除children和费用名称）
    for key, value in fee_item.items():
        if key not in ["费用名称", "children"] and value is not None:
            # 只允许基本数据类型（Neo4j不支持复杂类型）
            if isinstance(value, (str, int, float, bool)):
                fee_node[key] = value  # 完全原样存储，不做任何映射
            else:
                logger.warning(f"跳过非基本类型属性: {key}={value}(类型:{type(value)})")

    # 设置系统属性
    fee_node["sortid"] = ".".join(map(str, current_sort_path))  # 排序路径

    # 设置path（保持原有逻辑）
    if "FeeCollection" in parent_node.labels:
        parent_path = parent_node.get("path", "")
        fee_node["path"] = f"{parent_path}/{fee_name}"
    else:
        parent_name = parent_node.get("name", "")
        fee_node["path"] = f"{parent_name}/{fee_name}"

    # 保存节点
    try:
        graph.create(fee_node)
        graph.push(fee_node)
    except Exception as e:
        logger.error(f"创建节点失败: {str(e)}")
        return

    # 创建关系
    parent_label = list(parent_node.labels)[0]
    relationship_type = get_relationship_type(parent_label, "FeeCollection", entity_relationships)
    if relationship_type:
        graph.create(Relationship(parent_node, relationship_type, fee_node))

    # 递归处理子费用
    if "children" in fee_item and isinstance(fee_item["children"], list):
        for i, child_fee in enumerate(fee_item["children"], start=1):
            process_fee(child_fee, fee_node, entity_relationships, current_sort_path + [i])


# --------------------------*工程费用*--------------------------
# 处理费用表集(FeeScheduleSet)
def process_fee_schedule_set(data, parent_node, entity_relationships):
    """处理费用表集、费用表项和费用"""
    if "projectData" in data and "projectCost" in data["projectData"]:
        project_cost = data["projectData"]["projectCost"]
    elif "projectCost" in data:
        project_cost = data["projectCost"]
    else:
        logger.warning("JSON中未找到projectCost数据")
        logger.info(f"JSON顶层键: {list(data.keys())}")
        return

    # 创建 FeeScheduleSet 节点：工程费用
    fee_schedule_set = Node("FeeScheduleSet", name="工程费用")
    graph.create(fee_schedule_set)

    # 设置 sortid = "1"
    fee_schedule_set["sortid"] = "1"
    graph.push(fee_schedule_set)  # 保存属性

    # 创建关系
    relationship_type = get_relationship_type("EngineeringData", "FeeScheduleSet", entity_relationships)
    graph.create(Relationship(parent_node, relationship_type, fee_schedule_set))

    # 开始处理子项，从编号 1 开始
    for idx, (fee_table_name, fee_table_content) in enumerate(project_cost.items(), start=1):
        # 传入当前路径 [1, idx]，因为 FeeScheduleSet 是 1
        process_fee_schedule_item(fee_table_content, fee_table_name, fee_schedule_set, entity_relationships, [1, idx])


# 处理费用表项(FeeScheduleItem)
def process_fee_schedule_item(fee_table_content, item_name, parent_node, entity_relationships, current_sort_path):
    """处理费用表项（FeeScheduleItem）"""
    # 创建 FeeScheduleItem 节点
    fee_schedule_item = Node("FeeScheduleItem", name=item_name)
    fee_schedule_item["sortid"] = ".".join(map(str, current_sort_path))
    graph.create(fee_schedule_item)
    graph.push(fee_schedule_item)  # 保存 sortid

    # 创建与父节点的关系
    relationship_type = get_relationship_type("FeeScheduleSet", "FeeScheduleItem", entity_relationships)
    graph.create(Relationship(parent_node, relationship_type, fee_schedule_item))

    # 处理费用项列表
    if isinstance(fee_table_content, list):
        # 原有格式：直接是费用项列表
        for i, fee_item in enumerate(fee_table_content, start=1):
            process_fee_item(fee_item, fee_schedule_item, entity_relationships, current_sort_path + [i])
    elif isinstance(fee_table_content, dict) and "children" in fee_table_content:
        # 新格式：包含children字段的字典
        if isinstance(fee_table_content["children"], list):
            for i, fee_item in enumerate(fee_table_content["children"], start=1):
                process_fee_item(fee_item, fee_schedule_item, entity_relationships, current_sort_path + [i])
        else:
            logger.warning(
                f"FeeScheduleItem {item_name} 的children不是列表，类型: {type(fee_table_content['children'])}"
            )
    else:
        logger.warning(f"FeeScheduleItem {item_name} 的内容类型未知: {type(fee_table_content)}")


# 处理费用项(Fee)
def process_fee_item(fee, parent_node, entity_relationships, current_sort_path=None):
    """
    处理费用项（Fee）
    current_sort_path: 当前层级编号路径，如 [1, 2, 1]
    """
    if current_sort_path is None:
        current_sort_path = [1]

    # 尝试获取"费用名称"，如果不存在，则尝试获取"项目名称"
    name = fee.get("费用名称", fee.get("项目名称", fee.get("项目划分名称", fee.get("名称", ""))))
    if not name:
        logger.warning(f"Fee缺少费用名称或项目名称，当前对象: {fee}")
        return
    # 创建基础节点，只设置name属性
    fee_node = Node("Fee", name=name)

    # 动态添加所有其他属性（排除"费用名称"、"项目名称"和"children"）
    for key, value in fee.items():
        # 只处理基本数据类型，跳过children和复杂对象
        if key not in ["费用名称", "项目名称", "children"] and value is not None:
            if isinstance(value, (str, int, float, bool)):
                fee_node[key] = value
            elif isinstance(value, (list, dict)):
                logger.warning(f"跳过复杂属性: {key} (类型: {type(value)})")

    # 设置 sortid
    fee_node["sortid"] = ".".join(map(str, current_sort_path))

    # 设置 path 属性
    if "Fee" in parent_node.labels:
        parent_path = parent_node.get("path", "")
        fee_node["path"] = f"{parent_path}/{name}"
    else:
        parent_name = parent_node.get("name", "")
        fee_node["path"] = f"{parent_name}/{name}"

    graph.create(fee_node)
    graph.push(fee_node)

    # 创建关系
    parent_label = list(parent_node.labels)[0]
    relationship_type = get_relationship_type(parent_label, "Fee", entity_relationships)
    if relationship_type:
        graph.create(Relationship(parent_node, relationship_type, fee_node))

    # 递归处理子费用（从children获取）
    if "children" in fee and isinstance(fee["children"], list):
        for i, child in enumerate(fee["children"], start=1):
            process_fee_item(child, fee_node, entity_relationships, current_sort_path + [i])


# --------------------------*工程属性*--------------------------
# 处理工程属性集(ProjectPropertySet)
def process_project_property_set(data, parent_node, entity_relationships):
    # 检查projectInfo是否存在
    if "projectData" in data and "projectInfo" in data["projectData"]:
        project_info = data["projectData"]["projectInfo"]
    elif "projectInfo" in data:
        project_info = data["projectInfo"]
    else:
        logger.warning("JSON中未找到projectInfo数据")
        logger.info(f"JSON顶层键: {list(data.keys())}")
        return

    # 创建工程属性集节点
    property_set_node = Node("ProjectPropertySet", name="工程属性")
    property_set_node["sortid"] = "1"  # 固定根编号
    graph.create(property_set_node)
    graph.push(property_set_node)  # 保存属性

    # 创建关系
    relationship_type = get_relationship_type("EngineeringData", "ProjectPropertySet", entity_relationships)
    graph.create(Relationship(parent_node, relationship_type, property_set_node))

    # 添加上传时间属性
    upload_date = time.strftime("%Y/%m/%d")
    current_sort_path = [1, 0]  # 确保排在最前面
    create_property_node("上传时间", upload_date, property_set_node, entity_relationships, current_sort_path)

    # 处理每个属性，从编号 1 开始
    for idx, (key, value) in enumerate(project_info.items(), start=1):
        if value is not None:
            current_sort_path = [1, idx]  # 从 1.x 开始
            create_property_node(key, value, property_set_node, entity_relationships, current_sort_path)


# 处理工程属性（ProjectProperty）
def create_property_node(key, value, parent_node, entity_relationships, current_sort_path):
    """递归创建 ProjectProperty 节点，支持嵌套结构"""
    # 创建当前节点
    property_node = Node("ProjectProperty", name=key)
    property_node["sortid"] = ".".join(map(str, current_sort_path))

    # 设置 value 属性（基本类型或摘要）
    if isinstance(value, (dict, list)):
        type_name = "字典" if isinstance(value, dict) else "列表"
        count = len(value)
        property_node["value"] = f"{type_name}({count}项)"
    else:
        property_node["value"] = str(value)

    graph.create(property_node)
    graph.push(property_node)

    # 创建与父节点的关系
    rel_type = (
        get_relationship_type("ProjectPropertySet", "ProjectProperty", entity_relationships)
        if "ProjectPropertySet" in parent_node.labels
        else get_relationship_type("ProjectProperty", "ProjectProperty", entity_relationships)
    )

    graph.create(Relationship(parent_node, rel_type, property_node))

    # 递归处理嵌套结构
    if isinstance(value, dict):
        for sub_key, sub_value in value.items():
            if sub_value is not None:
                sub_full_name = f"{key}.{sub_key}"
                create_property_node(
                    sub_full_name,
                    sub_value,
                    property_node,
                    entity_relationships,
                    current_sort_path + [1],  # 暂用 1，后面可优化为真实顺序
                )
    elif isinstance(value, list):
        for i, item in enumerate(value):
            list_node_name = f"{key}[{i}]"
            list_node = Node("ProjectProperty", name=list_node_name, value="列表项")
            list_sort_path = current_sort_path + [i + 1]
            list_node["sortid"] = ".".join(map(str, list_sort_path))
            graph.create(list_node)
            graph.push(list_node)

            rel_type = get_relationship_type("ProjectProperty", "ProjectProperty", entity_relationships)
            graph.create(Relationship(property_node, rel_type, list_node))

            if isinstance(item, dict):
                for sub_key, sub_value in item.items():
                    if sub_value is not None:
                        sub_full_name = f"{key}[{i}].{sub_key}"
                        create_property_node(
                            sub_full_name, sub_value, list_node, entity_relationships, list_sort_path + [1]
                        )
            else:
                # 基本类型列表项
                item_node = Node("ProjectProperty", name=f"{key}[{i}]", value=str(item))
                item_node["sortid"] = ".".join(map(str, list_sort_path))
                graph.create(item_node)
                graph.push(item_node)
                graph.create(Relationship(list_node, rel_type, item_node))


# 将创建知识图谱的功能封装为函数
def create_KG(json_file_path, ontology_file_path="Ontology_Layer.txt"):
    """
    创建知识图谱

    Args:
        json_file_path: JSON文件路径
        ontology_file_path: 本体层文件路径

    Returns:
        success: 是否成功创建知识图谱
    """
    try:
        # 解析本体层文件
        entity_types, entity_relationships = parse_ontology_file(ontology_file_path)
        if not entity_types or not entity_relationships:
            logger.error("解析本体层文件失败")
            return False

        # 读取JSON文件
        try:
            with open(json_file_path, "r", encoding="utf-8") as f:
                data = json.load(f)
        except Exception as e:
            logger.error(f"读取JSON文件失败: {e}")
            return False

        # 获取文件名作为工程名称
        file_name = os.path.basename(json_file_path)
        project_name = os.path.splitext(file_name)[0]

        # 为每个JSON文件创建一个新的EngineeringData节点
        root_node = Node("EngineeringData", name=project_name)
        graph.create(root_node)
        logger.info(f"创建根节点: {project_name}")

        # 处理费用预览，创建CostSet节点及其子节点
        process_cost_set(data, root_node, entity_relationships)

        # 处理项目划分，创建ProjectDivisionSet和ProjectDivisionItem节点
        process_project_division_set(data, root_node, entity_relationships)

        # 处理取费表模板集
        process_fee_table_template_set(data, root_node, entity_relationships)

        # 处理费用表集
        process_fee_schedule_set(data, root_node, entity_relationships)

        # 处理工程属性集
        process_project_property_set(data, root_node, entity_relationships)

        # 建立实体间的关系
        establish_relationships(entity_relationships)

        logger.info(f"成功创建知识图谱: {json_file_path}")
        return True

    except Exception as e:
        logger.error(f"创建知识图谱失败: {e}")
        import traceback

        traceback.print_exc()
        return False


# 添加一个新的函数，用于处理文件夹中的多个JSON文件
def create_KGs_from_folder(input_folder, ontology_file_path="Ontology_Layer.txt"):
    """
    从文件夹中创建多个知识图谱

    Args:
        input_folder: 输入文件夹路径，包含多个JSON文件
        ontology_file_path: 本体层文件路径

    Returns:
        success_count: 成功处理的文件数量
        total_count: 总文件数量
        deleted_projects: 被删除的同名工程列表
    """
    # 获取输入文件夹中的所有JSON文件
    json_files = []
    deleted_projects = []

    # 如果输入是文件夹，则获取文件夹中的所有JSON文件
    if os.path.isdir(input_folder):
        json_files = glob.glob(os.path.join(input_folder, "*.json"))
    # 如果输入是文件，则直接使用该文件
    elif os.path.isfile(input_folder) and input_folder.endswith(".json"):
        json_files = [input_folder]
    else:
        logger.error(f"输入路径无效: {input_folder}")
        return 0, 0, []

    if not json_files:
        logger.error(f"未找到JSON文件: {input_folder}")
        return 0, 0, []

    # 连接到Neo4j数据库
    config = read_config()
    if not connect_to_neo4j(config["neo4j"]["uri"], config["neo4j"]["user"], config["neo4j"]["password"]):
        return 0, 0, []

    # 处理每个JSON文件
    success_count = 0
    for json_file in json_files:
        logger.info(f"处理文件: {json_file}")

        # 获取文件名作为工程名称
        file_name = os.path.basename(json_file)
        project_name = os.path.splitext(file_name)[0]

        # 读取JSON数据用于比较
        try:
            with open(json_file, "r", encoding="utf-8") as f:
                project_data = json.load(f)
        except Exception as e:
            logger.error(f"读取JSON文件失败: {e}")
            continue

        # 检查是否存在同名工程，如果存在则删除
        if check_and_delete_duplicate_project(project_name, project_data):
            deleted_projects.append(project_name)
            logger.info(f"删除同名工程: {project_name}")

        # 创建知识图谱
        if create_KG(json_file, ontology_file_path):
            success_count += 1

    logger.info(f"知识图谱构建完成，成功处理 {success_count}/{len(json_files)} 个文件")
    return success_count, len(json_files), deleted_projects


# 检查是否存在同名工程，如果存在则删除
def check_and_delete_duplicate_project(project_name, project_data):
    """
    检查是否存在同名工程，如果存在则删除其整个连通子图（包括所有出边/入边连接的节点）

    Args:
        project_name: 工程名称（对应 EngineeringData 的 name 属性）
        project_data: 工程数据，用于进一步比较内部的“工程名称”

    Returns:
        deleted: 是否删除了至少一个工程
    """
    try:
        # 查询是否存在同名的 EngineeringData 节点
        query = """
        MATCH (ed:EngineeringData {name: $name})
        RETURN id(ed) AS node_id
        """

        config = read_config()
        with GraphDatabase.driver(
            config["neo4j"]["uri"], auth=(config["neo4j"]["user"], config["neo4j"]["password"])
        ) as driver:
            deleted_count = 0

            with driver.session() as session:
                result = session.run(query, name=project_name)
                records = list(result)

                if not records:
                    return False  # 没有找到同名工程

                logger.info(f"找到 {len(records)} 个同名工程节点: {project_name}")

                # ====== 是否需要根据内部“工程名称”进行精确匹配？======
                need_exact_match = (
                    project_data and "projectData" in project_data and "projectInfo" in project_data["projectData"]
                )
                new_project_name = (
                    project_data["projectData"]["projectInfo"].get("工程名称", "") if need_exact_match else ""
                )

                # 遍历每个同名的 EngineeringData 节点
                for record in records:
                    node_id = record["node_id"]
                    should_delete = False

                    if need_exact_match and new_project_name:
                        # 需要精确匹配内部“工程名称”
                        matched = False
                        for rel_type in ["HAS_CHILD", "USE"]:
                            prop_query = f"""
                            MATCH (ed:EngineeringData)-[:{rel_type}*1..3]->(pp:ProjectProperty {{name: "工程名称"}})
                            WHERE id(ed) = $node_id
                            RETURN pp.value AS project_name
                            """
                            prop_result = session.run(prop_query, node_id=node_id)
                            prop_record = prop_result.single()

                            if prop_record and prop_record["project_name"] == new_project_name:
                                matched = True
                                break

                        should_delete = matched
                    else:
                        # 不需要精确匹配，直接删除所有同名工程
                        should_delete = True

                    # ====== 执行删除：删除从该 EngineeringData 可达的所有节点（任意关系、任意方向）======
                    if should_delete:
                        delete_query = """
                        MATCH (ed:EngineeringData)
                        WHERE id(ed) = $node_id

                        // 从 ed 出发，匹配所有通过任意关系、任意方向、任意深度连接的节点（包括 ed 自己）
                        MATCH (ed)-[*0..]-(n)

                        // 删除所有匹配到的节点及其关系
                        DETACH DELETE n
                        """
                        session.run(delete_query, node_id=node_id)
                        logger.info(f"已递归删除工程及其所有关联节点: {project_name} (Node ID: {node_id})")
                        deleted_count += 1

            return deleted_count > 0

    except Exception as e:
        logger.error(f"检查并删除同名工程失败: {e}")
        import traceback

        traceback.print_exc()
        return False


# if __name__ == "__main__":
#     input_folder = "project2json/outputs/GPRB1MJL/final"
#     create_KGs_from_folder(input_folder)