import json import os import csv from collections import defaultdict def analyze_project_division(json_file_path): """ 分析JSON文件中的projectDivision数据,统计不同type节点的属性名 Args: json_file_path: JSON文件路径 Returns: dict: 按type分类的属性名集合 """ try: # 读取JSON文件 with open(json_file_path, "r", encoding="utf-8") as f: data = json.load(f) # 检查是否存在projectData.projectDivision if "projectData" not in data or "projectDivision" not in data["projectData"]: print(f"文件 {json_file_path} 中不包含projectData.projectDivision数据") return {} project_division = data["projectData"]["projectDivision"] # 按type分类存储属性名 type_attributes = defaultdict(set) # 递归遍历树状结构 def traverse_node(node): if isinstance(node, dict): # 如果有type或类型字段,则使用该字段作为节点类型 node_type = node.get("type", node.get("类型", "未知类型")) # 收集当前节点的所有属性名 for attr_name in node.keys(): type_attributes[node_type].add(attr_name) # 处理子节点 if "children" in node and isinstance(node["children"], list): for child in node["children"]: traverse_node(child) # 处理其他可能的嵌套结构 for key, value in node.items(): if isinstance(value, dict) and key != "children": traverse_node(value) elif isinstance(value, list) and key != "children": for item in value: traverse_node(item) elif isinstance(node, list): for item in node: traverse_node(item) # 处理projectDivision的每个顶级键 for key, value in project_division.items(): if isinstance(value, dict): traverse_node(value) elif isinstance(value, list): for item in value: traverse_node(item) return type_attributes except Exception as e: print(f"处理文件 {json_file_path} 时出错: {e}") import traceback traceback.print_exc() return {} def save_to_csv(type_attributes, output_file="node_attributes.csv"): """ 将统计结果保存到CSV文件 Args: type_attributes: 按type分类的属性名集合 output_file: 输出CSV文件名 """ with open(output_file, "w", encoding="utf-8", newline="") as f: writer = csv.writer(f) writer.writerow(["节点类型", "属性名"]) for node_type, attributes in type_attributes.items(): # 将属性名集合转换为排序后的列表 sorted_attrs = sorted(attributes) for attr in sorted_attrs: writer.writerow([node_type, attr]) def main(): """ 主函数 """ # 指定JSON文件路径,可以是单个文件或目录 json_dir = "dataset/json/配网清单/2022行业招标3.1.12_readable.json" # 可以根据实际情况修改 # 存储所有文件的统计结果 all_type_attributes = defaultdict(set) # 如果是目录,则遍历所有JSON文件 if os.path.isdir(json_dir): for root, _, files in os.walk(json_dir): for file in files: if file.endswith(".json"): file_path = os.path.join(root, file) print(f"正在处理: {file_path}") # 获取当前文件的统计结果 current_type_attrs = analyze_project_division(file_path) # 合并结果 for node_type, attrs in current_type_attrs.items(): all_type_attributes[node_type].update(attrs) else: # 单个文件 print(f"正在处理: {json_dir}") all_type_attributes = analyze_project_division(json_dir) # 保存结果到CSV save_to_csv(all_type_attributes) print(f"统计结果已保存到 node_attributes.csv") if __name__ == "__main__": main()