import json import os import re import uuid def _determine_project_type(data): """ 根据division字段判断工程类型 :param data: 项目数据 :return: 主网, 配网, 技改 """ # 尝试从数据中获取division字段 division = None if "division" in data: division = data["division"] # 如果找到division字段 if division: # 去掉"主网-"前缀 if "-" in division: division_type = division.split("-")[0] else: division_type = division if division_type in ["主网", "主网线路"]: return "主网" elif division_type in ["配网", "配网造价", "配网清单"]: return "配网" elif division_type in ["技改", "技改造价", "技改清单"]: return "技改" return None def transform_expense_preview(input_file, output_file): """ 转换技改预算线路.json中的expensePreview结构,使其与主网预算线路.json中的结构一致 思路: 1. 从projectDivision中提取项目划分结构 2. 根据这个结构重新生成expensePreview 3. 将原始expensePreview中的children挂载到对应GUID的节点下 """ print(f"正在读取文件: {input_file}") try: with open(input_file, "r", encoding="utf-8") as f: data = json.load(f) print("JSON文件加载成功") # 提取原始的expensePreview和projectDivision original_expense_preview = data.get("projectData", {}).get("expensePreview", {}) project_division = data.get("projectData", {}).get("projectDivision", {}) print(f"原始expensePreview中的顶级分类: {list(original_expense_preview.keys())}") print(f"projectDivision中的顶级分类: {list(project_division.keys())}") # 创建新的expensePreview结构 new_expense_preview = {} # 创建GUID到原始expensePreview中数据的映射 guid_to_data = {} # 创建GUID到嵌套GUID的映射,用于处理嵌套结构 guid_to_nested_guids = {} # 记录已处理过的GUID,避免重复添加 processed_guids = set() # 递归处理嵌套结构 def extract_guid_data(obj, path="", parent_guid=None): if isinstance(obj, dict): # 检查当前对象是否有guid字段 guid = obj.get("guid") if guid: # 保存整个对象数据 guid_to_data[guid] = obj # 同时保存大写GUID的映射,以处理大小写不一致问题 guid_to_data[guid.upper()] = obj # 如果有父GUID,记录嵌套关系 if parent_guid: if parent_guid not in guid_to_nested_guids: guid_to_nested_guids[parent_guid] = [] guid_to_nested_guids[parent_guid].append(guid) # 继续递归处理所有子项 current_guid = guid if guid else parent_guid for key, value in obj.items(): new_path = f"{path}.{key}" if path else key extract_guid_data(value, new_path, current_guid) elif isinstance(obj, list): for i, item in enumerate(obj): new_path = f"{path}[{i}]" extract_guid_data(item, new_path, parent_guid) # 对每个顶级分类进行递归处理 for category, category_data in original_expense_preview.items(): if isinstance(category_data, dict): # 处理每个顶级节点 for key, item_data in category_data.items(): # 检查键名是否是GUID格式 if key.startswith("{") and key.endswith("}"): # 带清单的结构:键名是GUID格式 parent_guid = key.strip("{}") # 递归处理该GUID下的所有数据 extract_guid_data(item_data, f"{category}.{key}", parent_guid) # 直接添加到guid_to_data映射中 if "guid" not in item_data: item_data["guid"] = key guid_to_data[parent_guid] = item_data else: # 不带清单的结构:键名是描述性名称 if isinstance(item_data, dict) and "guid" in item_data: # 直接保存到guid_to_data映射中 guid = item_data["guid"] guid_stripped = guid.strip("{}") guid_to_data[guid] = item_data guid_to_data[guid_stripped] = item_data guid_to_data[guid.upper()] = item_data guid_to_data[guid_stripped.upper()] = item_data else: # 递归处理 extract_guid_data(item_data, f"{category}.{key}") else: # 不是字典结构,直接递归处理 extract_guid_data(category_data, category) print(f"找到 {len(guid_to_data)} 个GUID映射") print(f"找到 {len(guid_to_nested_guids)} 个嵌套GUID关系") # 处理projectDivision中的数据 for category, items in project_division.items(): if category == "工程量" and isinstance(items, dict): for specialty_type, specialty_items in items.items(): if isinstance(specialty_items, list) and specialty_items: print(f"处理专业类型: {specialty_type}") # 创建专业类型的分类 if specialty_type not in new_expense_preview: new_expense_preview[specialty_type] = [] # 处理每个项目 for item in specialty_items: if item.get("type") == "项目划分": # 构建项目层级 project_hierarchy = build_project_hierarchy( item, guid_to_data, guid_to_nested_guids, processed_guids ) if project_hierarchy: new_expense_preview[specialty_type].append(project_hierarchy) # 如果没有从projectDivision中找到数据,保留原始的expensePreview结构 if not new_expense_preview: print("未从projectDivision中找到数据,保留原始结构") for category, category_data in original_expense_preview.items(): if category not in new_expense_preview: new_expense_preview[category] = [] # 尝试转换原始结构为列表结构 for item_key, item_data in category_data.items(): if isinstance(item_data, dict): # 检查是否是直接包含guid的项 if "guid" in item_data: guid = item_data["guid"] new_item = {"GUID": guid} # 复制所有其他属性 for k, v in item_data.items(): if k != "guid": new_item[k] = v new_expense_preview[category].append(new_item) else: # 处理嵌套结构 for nested_key, nested_data in item_data.items(): if isinstance(nested_data, dict) and "guid" in nested_data: guid = nested_data["guid"] new_item = {"GUID": guid} # 复制所有其他属性 for k, v in nested_data.items(): if k != "guid": new_item[k] = v # 记录父子关系 parent_guid = item_key.strip("{}") if parent_guid: if parent_guid not in guid_to_nested_guids: guid_to_nested_guids[parent_guid] = [] guid_to_nested_guids[parent_guid].append(guid) new_expense_preview[category].append(new_item) # 后处理:移除所有自引用节点 remove_self_references(new_expense_preview) print(f"新expensePreview中的顶级分类: {list(new_expense_preview.keys())}") # 更新data中的expensePreview data["projectData"]["expensePreview"] = new_expense_preview # 保存转换后的文件 print(f"正在保存文件: {output_file}") with open(output_file, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) print("转换完成!") except Exception as e: print(f"处理过程中出错: {str(e)}") def build_project_hierarchy(item, guid_to_data, guid_to_nested_guids, processed_guids=None): """构建项目的层级结构""" guid = item.get("GUID") if not guid: return None # 如果已经处理过该GUID,则跳过 if processed_guids is not None: if guid in processed_guids: return None processed_guids.add(guid) # 创建新的项目节点,只包含GUID project_node = {"GUID": guid} # 如果在原始数据中有对应的数据,则复制相关属性 guid_stripped = guid.strip("{}") if guid_stripped in guid_to_data: original_data = guid_to_data[guid_stripped] # 复制children和其他属性 for key, value in original_data.items(): if key != "guid": # 不复制guid,因为已经用GUID替代 # 特殊处理children字段,避免类型错误 if key == "children" and isinstance(value, list): if "children" not in project_node: project_node["children"] = [] # 复制children中的每个元素 for child in value: # 只有当child是包含id和cost的字典时才添加 if isinstance(child, dict) and ("id" in child or "cost" in child): project_node["children"].append(child) else: # 确保不复制形如"{GUID}": {...}的键值对 if not (isinstance(key, str) and key.startswith("{") and key.endswith("}")): project_node[key] = value # 检查是否有嵌套的GUID需要处理 if guid_stripped in guid_to_nested_guids: if "children" not in project_node: project_node["children"] = [] # 为每个嵌套的GUID创建子节点 for nested_guid in guid_to_nested_guids[guid_stripped]: # 避免创建自引用 if nested_guid == guid_stripped: continue # 避免重复处理 if processed_guids is not None and f"{{{nested_guid}}}" in processed_guids: continue # 标准化GUID格式,确保只有单中括号 normalized_guid = nested_guid.strip("{}") guid_with_braces = "{" + normalized_guid + "}" nested_node = {"GUID": guid_with_braces} # 记录已处理过的GUID if processed_guids is not None: processed_guids.add(guid_with_braces) # 从guid_to_data中获取嵌套节点的数据 if nested_guid in guid_to_data: nested_data = guid_to_data[nested_guid] # 复制嵌套节点的属性 for key, value in nested_data.items(): if key != "guid": # 确保不复制形如"{GUID}": {...}的键值对 if not (isinstance(key, str) and key.startswith("{") and key.endswith("}")): # 特殊处理children字段 if key == "children" and isinstance(value, list): # 不直接赋值,而是逐个添加元素 if "children" not in nested_node: nested_node["children"] = [] for child in value: # 只有当child是包含id和cost的字典时才添加 if isinstance(child, dict) and ("id" in child or "cost" in child): nested_node["children"].append(child) else: # 直接复制值,让JSON序列化处理类型转换 nested_node[key] = value # 递归处理嵌套节点的嵌套关系 if nested_guid in guid_to_nested_guids: # 不直接赋值,而是确保children是一个列表 if "children" not in nested_node: nested_node["children"] = [] build_nested_hierarchy(nested_node, nested_guid, guid_to_data, guid_to_nested_guids, processed_guids) project_node["children"].append(nested_node) # 处理子项 children = item.get("children", []) if children: if "children" not in project_node: project_node["children"] = [] # 递归处理每个子项 for child in children: if child.get("type") == "项目划分": child_node = build_project_hierarchy(child, guid_to_data, guid_to_nested_guids, processed_guids) if child_node: # 避免创建自引用 if child_node.get("GUID") != guid: project_node["children"].append(child_node) return project_node def build_nested_hierarchy(node, guid, guid_to_data, guid_to_nested_guids, processed_guids=None): """递归构建嵌套的层级结构""" if guid in guid_to_nested_guids: # 不直接赋值,而是确保children是一个列表 if "children" not in node: node["children"] = [] for nested_guid in guid_to_nested_guids[guid]: # 避免创建自引用 if nested_guid == guid: continue # 避免重复处理 if processed_guids is not None and f"{{{nested_guid}}}" in processed_guids: continue # 标准化GUID格式,确保只有单中括号 normalized_guid = nested_guid.strip("{}") guid_with_braces = "{" + normalized_guid + "}" nested_node = {"GUID": guid_with_braces} # 记录已处理过的GUID if processed_guids is not None: processed_guids.add(guid_with_braces) # 从guid_to_data中获取嵌套节点的数据 if nested_guid in guid_to_data: nested_data = guid_to_data[nested_guid] # 复制嵌套节点的属性 for key, value in nested_data.items(): if key != "guid": # 确保不复制形如"{GUID}": {...}的键值对 if not (isinstance(key, str) and key.startswith("{") and key.endswith("}")): # 特殊处理children字段 if key == "children" and isinstance(value, list): # 不直接赋值,而是逐个添加元素 if "children" not in nested_node: nested_node["children"] = [] for child in value: # 只有当child是包含id和cost的字典时才添加 if isinstance(child, dict) and ("id" in child or "cost" in child): nested_node["children"].append(child) else: # 直接复制值,让JSON序列化处理类型转换 nested_node[key] = value # 递归处理更深层次的嵌套 if nested_guid in guid_to_nested_guids: # 不直接赋值,而是确保children是一个列表 if "children" not in nested_node: nested_node["children"] = [] build_nested_hierarchy(nested_node, nested_guid, guid_to_data, guid_to_nested_guids, processed_guids) node["children"].append(nested_node) def remove_self_references(expense_preview): """移除所有自引用节点""" for category, items in expense_preview.items(): if isinstance(items, list): for item in items: remove_self_references_from_node(item) def remove_self_references_from_node(node): """递归移除节点中的自引用""" if not isinstance(node, dict): return guid = node.get("GUID") if not guid: return # 检查children if "children" in node and isinstance(node["children"], list): # 找出需要移除的自引用节点 to_remove = [] for i, child in enumerate(node["children"]): if isinstance(child, dict): child_guid = child.get("GUID") if child_guid: # 检查是否是自引用 if child_guid == guid: to_remove.append(i) # 检查是否是双重大括号的自引用 elif child_guid == f"{{{guid.strip('{}')}}}" or f"{{{child_guid.strip('{}')}}}" == guid: to_remove.append(i) # 检查是否是不带大括号的自引用 elif child_guid.strip("{}") == guid.strip("{}"): to_remove.append(i) else: # 递归处理子节点 remove_self_references_from_node(child) # 从后往前移除自引用节点,避免索引变化 for i in reversed(to_remove): del node["children"][i] def find_node_in_expense_preview(expense_preview, target_guid): """在expensePreview中查找指定GUID的节点""" for category, items in expense_preview.items(): if isinstance(items, list): for item in items: result = find_node(item, target_guid) if result: return result return None def find_node(node, target_guid): """递归查找指定GUID的节点""" if node.get("GUID") == target_guid: return node # 检查children children = node.get("children", []) for child in children: result = find_node(child, target_guid) if result: return result return None def transform_json_types(input_file_path, output_file_path=None): """ 主网转换JSON文件中的多个字段值 参数: input_file_path (str): 输入的JSON文件路径 output_file_path (str, 可选): 输出的JSON文件路径,如果为None则覆盖原文件 返回: dict: 转换后的JSON数据 """ # 定义类型映射关系 type_mapping = {"8": "清单", "0": "定额", "1": "主材", "5": "设备", "2": "人工", "3": "材料", "4": "机械"} # 定义设备类型映射关系 device_type_mapping = {"0": "普通设备"} # 定义供货方映射关系 supplier_mapping = {"1": "甲供", "2": "乙供"} # 定义费用类型映射关系 fee_type_mapping = {"0": "取费", "1": "不取费"} # 读取输入文件 with open(input_file_path, "r", encoding="utf-8") as f: data = json.load(f) # 递归处理函数 def traverse(obj): if isinstance(obj, dict): # 转换"类型"字段 if "类型" in obj: current_type = str(obj["类型"]) if current_type in type_mapping: obj["类型"] = type_mapping[current_type] # 转换id为GUID if current_type in ("0", "1", "5") and "id" in obj: obj["GUID"] = obj["id"] del obj["id"] if current_type in ("0", "1", "5") and "费用类型" in obj: fee_type = str(obj["费用类型"]) if fee_type in fee_type_mapping: obj["费用类型"] = fee_type_mapping[fee_type] # 类型为1或5的节点: 转换供货方 if current_type in ("1", "5") and "供货方" in obj: supplier = str(obj["供货方"]) if supplier in supplier_mapping: obj["供货方"] = supplier_mapping[supplier] # 类型为5的节点: 转换设备类型 if current_type == "5" and "设备类型" in obj: device_type = str(obj["设备类型"]) if device_type in device_type_mapping: obj["设备类型"] = device_type_mapping[device_type] # 递归处理所有值 for value in obj.values(): traverse(value) elif isinstance(obj, list): for item in obj: traverse(item) # 执行转换 traverse(data) # 确定输出路径 if output_file_path is None: output_file_path = input_file_path # 写入输出文件 with open(output_file_path, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) return data def add_missing_guids_to_nodes(file_path): """ 为缺少GUID的定额、主材、设备节点生成GUID 参数: file_path (str): JSON文件路径 """ try: print(f"正在为缺少GUID的节点生成GUID: {file_path}") # 读取JSON文件 with open(file_path, "r", encoding="utf-8") as f: data = json.load(f) # 统计生成的GUID数量 generated_count = 0 def process_node(node): """递归处理节点,为缺少GUID的定额、主材、设备节点生成GUID""" nonlocal generated_count if isinstance(node, dict): # 检查节点类型 node_type = node.get("type", "") # 如果是定额、主材、设备类型,且没有GUID,则生成一个 if node_type in ["定额", "主材", "设备"] and "guid" not in node and "GUID" not in node: new_guid = "{" + str(uuid.uuid4()).upper() + "}" node["guid"] = new_guid generated_count += 1 print(f"为{node_type}节点生成GUID: {new_guid}") # 递归处理所有子节点 for key, value in node.items(): if isinstance(value, (dict, list)): process_node(value) elif isinstance(node, list): # 处理列表中的每个元素 for item in node: process_node(item) # 从projectData开始处理 if "projectData" in data: process_node(data["projectData"]) # 保存修改后的文件 with open(file_path, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2) print(f"✅ 成功为 {generated_count} 个节点生成了GUID") except Exception as e: print(f"❌ 为节点生成GUID时出错: {str(e)}") import traceback traceback.print_exc() def process_directory(directory_path): """ 批量处理指定目录下的所有JSON文件 参数: directory_path (str): 包含JSON文件的目录路径 """ print(f"开始处理目录: {directory_path}") # 确保目录存在 if not os.path.exists(directory_path): print(f"错误: 目录 {directory_path} 不存在") return # 获取目录中的所有JSON文件 json_files = [f for f in os.listdir(directory_path) if f.lower().endswith(".json")] if not json_files: print(f"警告: 目录 {directory_path} 中没有找到JSON文件") return print(f"找到 {len(json_files)} 个JSON文件") # 处理每个JSON文件 for json_file in json_files: file_path = os.path.join(directory_path, json_file) print(f"\n处理文件: {file_path}") try: # 读取JSON文件 with open(file_path, "r", encoding="utf-8") as f: data = json.load(f) # 确定项目类型 project_type = _determine_project_type(data) if project_type: print(f"检测到项目类型: {project_type}") # 根据项目类型选择处理方法 if project_type == "主网": print("应用主网转换...") transform_json_types(file_path) # 覆盖原文件 elif project_type in ["配网", "技改"]: print(f"应用{project_type}转换...") transform_expense_preview(file_path, file_path) # 覆盖原文件 # 为缺少GUID的定额、主材、设备节点生成GUID add_missing_guids_to_nodes(file_path) else: print(f"未知项目类型: {project_type},跳过处理") else: print("无法确定项目类型,跳过处理") except Exception as e: print(f"处理文件 {file_path} 时出错: {str(e)}") print("\n批量处理完成!") if __name__ == "__main__": # 示例用法 # # 单文件处理 # try: # input_file = "project2json/outputs/json/招标-架线检修.json" # output_file = "project2json/outputs/json/招标-架线检修_transformed.json" # print("直接测试单个文件处理...") # print(f"输入文件: {input_file}") # print(f"输出文件: {output_file}") # transform_expense_preview(input_file, output_file) # print("处理完成!") # except Exception as e: # import traceback # print(f"处理过程中出错: {str(e)}") # traceback.print_exc() # 批量处理目录 json_directory = "project2json/outputs/json" process_directory(json_directory)