KG_generation/unified_structure.py

"""
第三步：将bcl计算结果补充到json文件中
"""

import json
import os
from copy import deepcopy
import uuid


class ProjectExpenseProcessor:
    def __init__(self):
        self.project_data = None

    def initialize_project_data(self, project_data):
        """初始化项目数据，深拷贝避免修改原始数据"""
        self.project_data = deepcopy(project_data)

        # 添加调试信息
        print(f"项目数据结构: {list(self.project_data.keys())}")

        # 检查projectData是否存在
        if "projectData" in self.project_data:
            print("projectData 结构存在")
            print(f"projectData 子结构: {list(self.project_data['projectData'].keys())}")

            # 检查expensePreview是否存在于projectData中
            if "expensePreview" in self.project_data["projectData"]:
                print("expensePreview 结构存在于 projectData 中")
                expense_preview = self.project_data["projectData"]["expensePreview"]
                print(f"expensePreview 类别: {list(expense_preview.keys())}")

                for category_name, category in expense_preview.items():
                    print(f"类别: {category_name}, 类型: {type(category)}")
                    for group_name, group in category.items():
                        print(f"  组: {group_name}, 类型: {type(group)}")
                        for i, item in enumerate(group):
                            if "GUID" in item:
                                print(f"    项目 {i} GUID: {item['GUID']}")
            else:
                print("警告: expensePreview 结构不存在于 projectData 中!")
        else:
            print("警告: projectData 结构不存在!")

        return self

    def check_guids_in_division(self, node=None, path=""):
        """检查项目划分中的所有GUID"""
        if node is None:
            for category_name, category in self.project_data.get("projectDivision", {}).items():
                self.check_guids_in_division(category, category_name)
            return

        if isinstance(node, list):
            for i, item in enumerate(node):
                self.check_guids_in_division(item, f"{path}[{i}]")
        elif isinstance(node, dict):
            if "GUID" in node:
                guid = node["GUID"]
                print(f"项目划分GUID: {guid} 在路径: {path}")
                # 尝试在费用预览中查找
                expense_node = self.find_expense_preview_node(guid)
                if expense_node:
                    print(f"  ✓ 在费用预览中找到对应节点")
                else:
                    print(f"  ✗ 在费用预览中未找到对应节点")

            if "children" in node:
                self.check_guids_in_division(node["children"], f"{path}.children")

    def find_project_division_node(self, target_guid, node=None, path=""):
        """查找指定GUID的最子级项目划分节点"""
        # 将目标GUID转换为大写并去掉花括号
        target_guid = target_guid.strip("{}").upper()

        if node is None:
            # 从 projectData.projectDivision 开始搜索
            if "projectData" not in self.project_data or "projectDivision" not in self.project_data["projectData"]:
                print("警告: project_data 为空或不包含 projectData.projectDivision")
                return None

            # print(f"开始在项目划分中查找GUID: {target_guid}")
            for category_name, category in self.project_data["projectData"]["projectDivision"].items():
                # print(f"搜索项目划分类别: {category_name}")
                # 递归处理category，无论它是什么类型
                result = self.find_project_division_node(
                    target_guid, category, f"projectData.projectDivision.{category_name}"
                )
                if result:
                    return result
            # print(f"在项目划分中未找到GUID: {target_guid}")
            return None

        if isinstance(node, list):
            for i, item in enumerate(node):
                new_path = f"{path}[{i}]"
                result = self.find_project_division_node(target_guid, item, new_path)
                if result:
                    return result
        elif isinstance(node, dict):
            # 检查当前节点的GUID
            current_guid = node.get("GUID", "").strip("{}").upper()
            if current_guid:
                # print(f"比较项目划分GUID: {current_guid} vs {target_guid} 在路径: {path}")
                pass
            if current_guid == target_guid:
                # print(f"找到匹配的项目划分GUID: {current_guid} 在路径: {path}")
                return node

            # 递归检查所有子节点，包括children和其他字典值
            for key, value in node.items():
                if isinstance(value, (dict, list)):
                    new_path = f"{path}.{key}"
                    result = self.find_project_division_node(target_guid, value, new_path)
                    if result:
                        return result
        return None

    def find_expense_preview_node(self, target_guid, node=None, path=""):
        """
        在 expensePreview 中递归查找指定 GUID 的节点
        :param target_guid: 要查找的 GUID（字符串，不带 {}）
        :param node: 当前查找的子节点（默认从 project_data 开始）
        :param path: 当前搜索路径（用于调试）
        :return: 找到的节点或 None
        """
        # 将目标GUID转换为大写并去掉花括号
        target_guid = target_guid.strip("{}").upper()

        if (
            not self.project_data
            or "projectData" not in self.project_data
            or "expensePreview" not in self.project_data["projectData"]
        ):
            print("警告: project_data 为空或不包含 projectData.expensePreview")
            return None

        # 初始调用时从顶层开始
        if node is None:
            # print(f"开始查找GUID: {target_guid}")
            expense_preview = self.project_data["projectData"]["expensePreview"]
            for category_name, category in expense_preview.items():
                for group_name, group in category.items():
                    for i, item in enumerate(group):
                        new_path = f"projectData.expensePreview.{category_name}.{group_name}[{i}]"
                        result = self.find_expense_preview_node(target_guid, item, new_path)
                        if result:
                            return result
            # print(f"在顶层搜索中未找到GUID: {target_guid}")
            return None

        # 检查当前节点
        current_guid = node.get("GUID", "").strip("{}").upper()
        if current_guid:
            # print(f"比较GUID: {current_guid} vs {target_guid} 在路径: {path}")
            pass
        if current_guid == target_guid:
            # print(f"找到匹配的GUID: {current_guid} 在路径: {path}")
            return node

        # 递归检查子节点
        if "children" in node and isinstance(node["children"], list):
            for i, child in enumerate(node["children"]):
                new_path = f"{path}.children[{i}]"
                result = self.find_expense_preview_node(target_guid, child, new_path)
                if result:
                    return result

        return None

    @staticmethod
    def generate_id(text):
        """生成简单的ID（只保留字母数字和中文）"""
        return "".join(c for c in text if c.isalnum() or "\u4e00" <= c <= "\u9fa5").upper()

    def convert_calculation_results_to_children(self, calculation_results):
        """将工程量计算结果转换为标准格式"""
        children = []
        for node_name, costs in calculation_results.items():
            node_data = {"name": node_name, "type": "工程量节点", "children": []}
            for cost_type, amount in costs.items():
                node_data["children"].append(
                    {
                        "id": f"{cost_type}_{self.generate_id(cost_type)}",
                        "cost": str(amount),
                    }
                )
            children.append(node_data)
        return children

    def generate_guid(self):
        """生成新的GUID"""
        return "{" + str(uuid.uuid4()).upper() + "}"

    def add_quantity_node_expense_data(self, project_guid, calculation_results):
        """添加工程量节点费用预览数据"""
        try:
            # 1. 查找项目划分节点
            division_node = self.find_project_division_node(project_guid)
            if not division_node:
                print(f"未找到GUID为 {project_guid} 的项目划分节点")
                return False

            # 2. 查找费用预览节点
            expense_node = self.find_expense_preview_node(project_guid)
            if not expense_node:
                print(f"未找到GUID为 {project_guid} 的费用预览节点")
                return False

            # 3. 确保节点有标准格式
            self.ensure_standard_format(expense_node)

            # 4. 清空children准备添加工程量节点
            expense_node["children"] = []

            # 5. 处理每个工程量计算结果
            for node_name, costs in calculation_results.items():
                # 5.1 查找项目划分中对应的工程量节点
                quantity_node = None
                if "children" in division_node:
                    for child in division_node["children"]:
                        if child.get("项目名称") == node_name or child.get("name") == node_name:
                            quantity_node = child
                            break

                if not quantity_node:
                    print(f"未找到名称为 {node_name} 的工程量节点")
                    continue

                # 5.2 如果工程量节点没有GUID，生成一个
                if "GUID" not in quantity_node:
                    quantity_node["GUID"] = self.generate_guid()

                quantity_guid = quantity_node["GUID"]

                # 5.3 创建工程量费用预览节点（使用标准格式）
                quantity_expense = {"GUID": quantity_guid, "sum": [], "children": [], "rcj": []}

                # 5.4 添加费用项到sum
                for cost_type, amount in costs.items():
                    quantity_expense["sum"].append({"id": f"{cost_type}", "cost": str(amount)})

                # 5.5 将工程量费用预览节点添加到项目划分费用预览的children中
                expense_node["children"].append(quantity_expense)

            print(f"成功添加GUID为 {project_guid} 的工程量节点费用预览数据")
            return True

        except Exception as e:
            print(f"添加工程量节点费用预览数据失败: {str(e)}")
            return False

    def add_labor_material_machine_expense_data(self, project_guid, lmm_data):
        """添加人材机节点费用数据到rcj数组"""
        try:
            # 1. 查找项目划分节点
            division_node = self.find_project_division_node(project_guid)
            if not division_node:
                print(f"未找到GUID为 {project_guid} 的项目划分节点")
                return False

            # 2. 查找费用预览节点
            expense_node = self.find_expense_preview_node(project_guid)
            if not expense_node:
                print(f"未找到GUID为 {project_guid} 的费用预览节点")
                return False

            # 3. 确保节点有标准格式
            self.ensure_standard_format(expense_node)

            # 4. 确保rcj数组存在
            if "rcj" not in expense_node:
                expense_node["rcj"] = []

            # 4. 处理人材机数据
            # 添加人工节点
            if "人工节点" in lmm_data:
                for item in lmm_data["人工节点"]:
                    expense_node["rcj"].append(
                        {
                            "type": "人工",
                            "编码": item.get("编码", ""),
                            "名称": item.get("名称", ""),
                            "单位": item.get("单位", ""),
                            "预算价不含税": item.get("预算价不含税", ""),
                            "市场价不含税": item.get("市场价不含税", ""),
                            "预算价合价": item.get("预算价合价", ""),
                            "市场价合价": item.get("市场价合价", ""),
                            "价差": item.get("价差", ""),
                            "数量": item.get("数量", ""),
                        }
                    )

            # 添加材料节点
            if "材料节点" in lmm_data:
                for item in lmm_data["材料节点"]:
                    expense_node["rcj"].append(
                        {
                            "type": "材料",
                            "供货方": item.get("供货方", ""),
                            "编码": item.get("编码", ""),
                            "名称": item.get("名称", ""),
                            "单位": item.get("单位", ""),
                            "预算价不含税": item.get("预算价不含税", ""),
                            "市场价不含税": item.get("市场价不含税", ""),
                            "预算价合价": item.get("预算价合价", ""),
                            "市场价合价": item.get("市场价合价", ""),
                            "价差": item.get("价差", ""),
                            "数量": item.get("数量", ""),
                        }
                    )

            # 添加机械节点
            if "机械节点" in lmm_data:
                for item in lmm_data["机械节点"]:
                    expense_node["rcj"].append(
                        {
                            "type": "机械",
                            "编码": item.get("编码", ""),
                            "名称": item.get("名称", ""),
                            "单位": item.get("单位", ""),
                            "预算价不含税": item.get("预算价不含税", ""),
                            "市场价不含税": item.get("市场价不含税", ""),
                            "预算价合价": item.get("预算价合价", ""),
                            "市场价合价": item.get("市场价合价", ""),
                            "价差": item.get("价差", ""),
                            "数量": item.get("数量", ""),
                        }
                    )

            print(f"成功添加GUID为 {project_guid} 的人材机节点费用预览数据到rcj数组")
            return True

        except Exception as e:
            print(f"添加人材机节点费用预览数据失败: {str(e)}")
            return False

    def batch_process_expense_data(self, data_list):
        """
        批量处理多个项目的费用预览数据
        :param data_list: 包含项目GUID和费用数据的列表
        :return: 是否全部成功处理
        """
        all_success = True
        for data in data_list:
            project_guid = data.get("projectGuid")
            calculation_results = data.get("calculationResults")
            labor_material_machine_data = data.get("laborMaterialMachineData")

            if calculation_results:
                success1 = self.add_quantity_node_expense_data(project_guid, calculation_results)
                all_success &= success1

            if labor_material_machine_data:
                success2 = self.add_labor_material_machine_expense_data(project_guid, labor_material_machine_data)
                all_success &= success2

        return all_success

    def batch_process_from_folder(self, folder_path):
        """
        批量处理指定文件夹下的工程量和人材机JSON文件
        :param folder_path: 包含JSON文件的文件夹路径
        :return: 成功处理的数量
        """
        # 先列出所有项目划分中的GUID
        print("\n=== 列出所有项目划分中的GUID ===")
        all_guids = self.list_all_division_guids()
        print("=== 列出结束 ===\n")

        # 首先确保所有费用预览节点都有标准格式
        print("\n=== 确保所有费用预览节点都有标准格式 ===")
        standardized_count = 0
        if all_guids:
            for guid, name, path in all_guids:
                # 查找费用预览节点
                expense_node = self.find_expense_preview_node(guid)
                if expense_node:
                    self.ensure_standard_format(expense_node)
                    standardized_count += 1
        print(f"已标准化 {standardized_count} 个费用预览节点")
        print("=== 标准化结束 ===\n")

        success_count = 0
        files = os.listdir(folder_path)

        guid_map = {}

        # 第一步：遍历所有文件，按 GUID 分组
        for filename in files:
            if not filename.endswith(".json"):
                continue

            parts = filename.split("_")
            if len(parts) < 3:
                continue

            # 尝试从文件名中提取GUID
            guid = parts[1]  # 文件名中的第二个字段为 GUID

            # 确保GUID格式正确
            guid = guid.strip("{}")  # 去掉可能的花括号

            # print(f"从文件名 {filename} 提取的GUID: {guid}")

            if guid not in guid_map:
                guid_map[guid] = {"calc": None, "rcj": None}

            if "调差_预算工程" in filename:
                guid_map[guid]["calc"] = os.path.join(folder_path, filename)
            elif "调差_rcj" in filename:
                guid_map[guid]["rcj"] = os.path.join(folder_path, filename)

        # 第二步：逐个 GUID 加载数据并调用处理函数
        for guid, paths in guid_map.items():
            # 尝试通过GUID查找项目划分节点
            division_node = self.find_project_division_node(guid)

            # 如果找不到，尝试通过文件名前缀(项目名称)查找
            if not division_node:
                file_path = paths["calc"] or paths["rcj"]
                if file_path:
                    filename = os.path.basename(file_path)
                    project_name = filename.split("_")[0]  # 假设文件名第一部分是项目名称
                    print(f"通过GUID未找到节点，尝试通过名称 '{project_name}' 查找")
                    division_node = self.find_division_node_by_name(project_name)

            if not division_node:
                print(f"无法找到对应的项目划分节点，跳过处理 GUID: {guid}")
                continue

            # 使用找到的节点的GUID
            actual_guid = division_node.get("GUID", "")
            # print(
            #     f"找到项目划分节点，GUID: {actual_guid}, 名称: {division_node.get('项目名称', division_node.get('name', '未命名'))}"
            # )

            # 查找费用预览节点，如果不存在则创建
            expense_node = self.find_expense_preview_node(actual_guid)
            if not expense_node:
                print(f"未找到GUID为 {actual_guid} 的费用预览节点，将在处理时创建")

            calc_data = None
            rcj_data = None

            if paths["calc"]:
                try:
                    with open(paths["calc"], "r", encoding="utf-8") as f:
                        calc_data = json.load(f)
                except Exception as e:
                    print(f"读取工程量文件失败 {paths['calc']}: {e}")
                    continue

            if paths["rcj"]:
                try:
                    with open(paths["rcj"], "r", encoding="utf-8") as f:
                        rcj_data = json.load(f)
                except Exception as e:
                    print(f"读取人材机文件失败 {paths['rcj']}: {e}")
                    continue

            # 调用处理方法
            success = True
            if calc_data:
                success &= self.add_quantity_node_expense_data(guid, calc_data)
            if rcj_data:
                success &= self.add_labor_material_machine_expense_data(guid, rcj_data)

            if success:
                success_count += 1
                print(f"✅ 成功处理 GUID: {guid}")
            else:
                print(f"❌ 处理 GUID: {guid} 时发生错误")

        return success_count

    def get_processed_project_data(self):
        """
        获取处理后的项目数据
        :return: 处理后的项目数据
        """
        return self.project_data

    def export_to_json(self, pretty=True):
        """
        导出为JSON字符串
        :param pretty: 是否格式化输出
        :return: JSON字符串
        """
        return json.dumps(self.project_data, ensure_ascii=False, indent=2 if pretty else 0)

    def list_all_division_guids(self):
        """列出所有项目划分中的GUID"""
        if "projectData" not in self.project_data or "projectDivision" not in self.project_data["projectData"]:
            print("警告: project_data 为空或不包含 projectData.projectDivision")
            return

        guids = []

        def collect_guids(node, path=""):
            """递归收集所有GUID"""
            if isinstance(node, list):
                for i, item in enumerate(node):
                    collect_guids(item, f"{path}[{i}]")
            elif isinstance(node, dict):
                if "GUID" in node:
                    guid = node["GUID"]
                    name = node.get("项目名称", node.get("name", "未命名"))
                    guids.append((guid, name, path))

                # 递归检查所有子节点，包括children和其他字典值
                for key, value in node.items():
                    if isinstance(value, (dict, list)):
                        collect_guids(value, f"{path}.{key}")

        # 从projectDivision开始收集
        collect_guids(self.project_data["projectData"]["projectDivision"], "projectData.projectDivision")

        # print(f"项目划分中共有 {len(guids)} 个GUID:")
        # for guid, name, path in guids:
        #     print(f"GUID: {guid}, 名称: {name}, 路径: {path}")

        return guids

    def find_division_node_by_name(self, name):
        """通过名称查找项目划分节点"""
        if "projectData" not in self.project_data or "projectDivision" not in self.project_data["projectData"]:
            print("警告: project_data 为空或不包含 projectData.projectDivision")
            return None

        def search_by_name(node):
            if isinstance(node, list):
                for item in node:
                    result = search_by_name(item)
                    if result:
                        return result
            elif isinstance(node, dict):
                node_name = node.get("项目名称", node.get("name", ""))
                if node_name == name:
                    return node

                if "children" in node:
                    result = search_by_name(node["children"])
                    if result:
                        return result
            return None

        for category in self.project_data["projectData"]["projectDivision"].values():
            result = search_by_name(category)
            if result:
                return result

        return None

    def ensure_standard_format(self, expense_node):
        """确保费用预览节点有标准格式（sum, children, rcj）"""
        # 确保sum存在
        if "sum" not in expense_node:
            expense_node["sum"] = []

        # 如果children中有费用项(直接费用项，不是子节点)，将其移至sum并清空children
        if "children" in expense_node:
            has_direct_cost_items = False
            for child in expense_node["children"]:
                if "cost" in child and "id" in child and "GUID" not in child:
                    # 这是直接费用项，应该移到sum中
                    has_direct_cost_items = True
                    # 检查是否已经存在相同id的项
                    exists = False
                    for item in expense_node["sum"]:
                        if item.get("id") == child["id"]:
                            exists = True
                            break

                    if not exists:
                        # 只保留id和cost两个属性
                        expense_node["sum"].append({"id": child["id"], "cost": child["cost"]})

            # 如果children中只有直接费用项，清空children
            if has_direct_cost_items and all("GUID" not in child for child in expense_node["children"]):
                expense_node["children"] = []
        else:
            expense_node["children"] = []

        # 确保rcj存在
        if "rcj" not in expense_node:
            expense_node["rcj"] = []

        # 确保sum中的项只有id和cost两个属性
        for i, item in enumerate(expense_node["sum"]):
            if "id" in item and "cost" in item:
                expense_node["sum"][i] = {"id": item["id"], "cost": item["cost"]}

        return expense_node


def write_BCLresult_into_json(original_json_path, bcl_result_folder, output_json_path):
    """
    将BCL计算结果写入到原始JSON文件中

    :param original_json_path: 原始JSON文件路径
    :param bcl_result_folder: BCL计算结果文件夹路径
    :param output_json_path: 输出JSON文件路径（合并后的JSON）
    :return: 是否成功处理
    """
    try:
        # 1. 加载原始项目数据
        print(f"加载原始项目数据: {original_json_path}")
        with open(original_json_path, "r", encoding="utf-8") as f:
            project_data = json.load(f)

        # 2. 初始化处理器
        processor = ProjectExpenseProcessor()
        processor.initialize_project_data(project_data)

        # 3. 检查BCL计算结果文件夹是否存在
        if not os.path.exists(bcl_result_folder):
            print(f"错误: BCL计算结果文件夹不存在: {bcl_result_folder}")
            return False

        print(f"使用BCL计算结果文件夹: {bcl_result_folder}")

        # 4. 批量处理
        count = processor.batch_process_from_folder(bcl_result_folder)
        print(f"共成功处理了 {count} 个项目节点。")

        # 5. 导出更新后的数据
        print(f"保存更新后的数据: {output_json_path}")
        os.makedirs(os.path.dirname(output_json_path), exist_ok=True)  # 确保输出目录存在
        with open(output_json_path, "w", encoding="utf-8") as f:
            f.write(processor.export_to_json(pretty=True))

        print(f"✅ 数据已保存至 {output_json_path}")
        return True

    except Exception as e:
        print(f"❌ 处理失败: {str(e)}")
        import traceback

        traceback.print_exc()
        return False


def batch_write_BCLresult_into_json(original_folder, bcl_result_folder, output_folder):
    """
    批量处理文件夹中的所有JSON文件，将BCL计算结果写入到原始JSON文件中

    :param original_folder: 原始JSON文件夹路径
    :param bcl_result_folder: BCL计算结果文件夹路径
    :param output_folder: 输出文件夹路径（合并后的JSON）
    :return: 处理成功的文件数量
    """
    # 确保输出文件夹存在
    os.makedirs(output_folder, exist_ok=True)

    # 查找所有JSON文件
    json_files = []
    for file in os.listdir(original_folder):
        if file.lower().endswith(".json"):
            json_files.append(os.path.join(original_folder, file))

    if not json_files:
        print(f"警告: 在目录 {original_folder} 中没有找到JSON文件")
        return 0

    # 处理每个JSON文件
    success_count = 0
    for original_file in json_files:
        # 构建输出文件路径
        rel_path = os.path.relpath(original_file, original_folder)
        output_file = os.path.join(output_folder, rel_path)

        # 获取文件名（不含扩展名），用于查找对应的BCL计算结果文件夹
        base_filename = os.path.splitext(os.path.basename(original_file))[0]

        # 构建BCL计算结果文件夹路径（假设与原始文件同名）
        file_bcl_result_folder = os.path.join(bcl_result_folder, base_filename)

        # 如果不存在同名文件夹，使用BCL计算结果文件夹本身
        if not os.path.exists(file_bcl_result_folder):
            file_bcl_result_folder = bcl_result_folder

        print(f"\n处理文件: {original_file}")
        print(f"BCL计算结果文件夹: {file_bcl_result_folder}")
        print(f"输出文件: {output_file}")

        # 处理文件
        if write_BCLresult_into_json(original_file, file_bcl_result_folder, output_file):
            success_count += 1

    return success_count


if __name__ == "__main__":
    # 使用硬编码的文件夹路径，不需要命令行参数
    original_folder = "project2json/outputs"  # 原始JSON文件夹
    bcl_result_folder = "outputs-2"  # BCL计算结果文件夹
    output_folder = "final_outputs"  # 输出文件夹（合并后的JSON）

    print(f"原始JSON文件夹: {original_folder}")
    print(f"BCL计算结果文件夹: {bcl_result_folder}")
    print(f"输出文件夹: {output_folder}")

    count = batch_write_BCLresult_into_json(original_folder, bcl_result_folder, output_folder)
    print(f"\n批量处理完成: 共成功处理了 {count} 个文件")