132 lines
4.3 KiB
Python
132 lines
4.3 KiB
Python
import json
|
|
import os
|
|
import csv
|
|
from collections import defaultdict
|
|
|
|
|
|
def analyze_project_division(json_file_path):
|
|
"""
|
|
分析JSON文件中的projectDivision数据,统计不同type节点的属性名
|
|
|
|
Args:
|
|
json_file_path: JSON文件路径
|
|
|
|
Returns:
|
|
dict: 按type分类的属性名集合
|
|
"""
|
|
try:
|
|
# 读取JSON文件
|
|
with open(json_file_path, "r", encoding="utf-8") as f:
|
|
data = json.load(f)
|
|
|
|
# 检查是否存在projectData.projectDivision
|
|
if "projectData" not in data or "projectDivision" not in data["projectData"]:
|
|
print(f"文件 {json_file_path} 中不包含projectData.projectDivision数据")
|
|
return {}
|
|
|
|
project_division = data["projectData"]["projectDivision"]
|
|
|
|
# 按type分类存储属性名
|
|
type_attributes = defaultdict(set)
|
|
|
|
# 递归遍历树状结构
|
|
def traverse_node(node):
|
|
if isinstance(node, dict):
|
|
# 如果有type或类型字段,则使用该字段作为节点类型
|
|
node_type = node.get("type", node.get("类型", "未知类型"))
|
|
|
|
# 收集当前节点的所有属性名
|
|
for attr_name in node.keys():
|
|
type_attributes[node_type].add(attr_name)
|
|
|
|
# 处理子节点
|
|
if "children" in node and isinstance(node["children"], list):
|
|
for child in node["children"]:
|
|
traverse_node(child)
|
|
|
|
# 处理其他可能的嵌套结构
|
|
for key, value in node.items():
|
|
if isinstance(value, dict) and key != "children":
|
|
traverse_node(value)
|
|
elif isinstance(value, list) and key != "children":
|
|
for item in value:
|
|
traverse_node(item)
|
|
|
|
elif isinstance(node, list):
|
|
for item in node:
|
|
traverse_node(item)
|
|
|
|
# 处理projectDivision的每个顶级键
|
|
for key, value in project_division.items():
|
|
if isinstance(value, dict):
|
|
traverse_node(value)
|
|
elif isinstance(value, list):
|
|
for item in value:
|
|
traverse_node(item)
|
|
|
|
return type_attributes
|
|
|
|
except Exception as e:
|
|
print(f"处理文件 {json_file_path} 时出错: {e}")
|
|
import traceback
|
|
|
|
traceback.print_exc()
|
|
return {}
|
|
|
|
|
|
def save_to_csv(type_attributes, output_file="node_attributes.csv"):
|
|
"""
|
|
将统计结果保存到CSV文件
|
|
|
|
Args:
|
|
type_attributes: 按type分类的属性名集合
|
|
output_file: 输出CSV文件名
|
|
"""
|
|
with open(output_file, "w", encoding="utf-8", newline="") as f:
|
|
writer = csv.writer(f)
|
|
writer.writerow(["节点类型", "属性名"])
|
|
|
|
for node_type, attributes in type_attributes.items():
|
|
# 将属性名集合转换为排序后的列表
|
|
sorted_attrs = sorted(attributes)
|
|
for attr in sorted_attrs:
|
|
writer.writerow([node_type, attr])
|
|
|
|
|
|
def main():
|
|
"""
|
|
主函数
|
|
"""
|
|
# 指定JSON文件路径,可以是单个文件或目录
|
|
json_dir = "dataset/json/配网清单/2022行业招标3.1.12_readable.json" # 可以根据实际情况修改
|
|
|
|
# 存储所有文件的统计结果
|
|
all_type_attributes = defaultdict(set)
|
|
|
|
# 如果是目录,则遍历所有JSON文件
|
|
if os.path.isdir(json_dir):
|
|
for root, _, files in os.walk(json_dir):
|
|
for file in files:
|
|
if file.endswith(".json"):
|
|
file_path = os.path.join(root, file)
|
|
print(f"正在处理: {file_path}")
|
|
|
|
# 获取当前文件的统计结果
|
|
current_type_attrs = analyze_project_division(file_path)
|
|
|
|
# 合并结果
|
|
for node_type, attrs in current_type_attrs.items():
|
|
all_type_attributes[node_type].update(attrs)
|
|
else:
|
|
# 单个文件
|
|
print(f"正在处理: {json_dir}")
|
|
all_type_attributes = analyze_project_division(json_dir)
|
|
|
|
# 保存结果到CSV
|
|
save_to_csv(all_type_attributes)
|
|
print(f"统计结果已保存到 node_attributes.csv")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|