上传文件
This commit is contained in:
@@ -0,0 +1,131 @@
|
||||
import json
|
||||
import os
|
||||
import csv
|
||||
from collections import defaultdict
|
||||
|
||||
|
||||
def analyze_project_division(json_file_path):
|
||||
"""
|
||||
分析JSON文件中的projectDivision数据,统计不同type节点的属性名
|
||||
|
||||
Args:
|
||||
json_file_path: JSON文件路径
|
||||
|
||||
Returns:
|
||||
dict: 按type分类的属性名集合
|
||||
"""
|
||||
try:
|
||||
# 读取JSON文件
|
||||
with open(json_file_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
# 检查是否存在projectData.projectDivision
|
||||
if "projectData" not in data or "projectDivision" not in data["projectData"]:
|
||||
print(f"文件 {json_file_path} 中不包含projectData.projectDivision数据")
|
||||
return {}
|
||||
|
||||
project_division = data["projectData"]["projectDivision"]
|
||||
|
||||
# 按type分类存储属性名
|
||||
type_attributes = defaultdict(set)
|
||||
|
||||
# 递归遍历树状结构
|
||||
def traverse_node(node):
|
||||
if isinstance(node, dict):
|
||||
# 如果有type或类型字段,则使用该字段作为节点类型
|
||||
node_type = node.get("type", node.get("类型", "未知类型"))
|
||||
|
||||
# 收集当前节点的所有属性名
|
||||
for attr_name in node.keys():
|
||||
type_attributes[node_type].add(attr_name)
|
||||
|
||||
# 处理子节点
|
||||
if "children" in node and isinstance(node["children"], list):
|
||||
for child in node["children"]:
|
||||
traverse_node(child)
|
||||
|
||||
# 处理其他可能的嵌套结构
|
||||
for key, value in node.items():
|
||||
if isinstance(value, dict) and key != "children":
|
||||
traverse_node(value)
|
||||
elif isinstance(value, list) and key != "children":
|
||||
for item in value:
|
||||
traverse_node(item)
|
||||
|
||||
elif isinstance(node, list):
|
||||
for item in node:
|
||||
traverse_node(item)
|
||||
|
||||
# 处理projectDivision的每个顶级键
|
||||
for key, value in project_division.items():
|
||||
if isinstance(value, dict):
|
||||
traverse_node(value)
|
||||
elif isinstance(value, list):
|
||||
for item in value:
|
||||
traverse_node(item)
|
||||
|
||||
return type_attributes
|
||||
|
||||
except Exception as e:
|
||||
print(f"处理文件 {json_file_path} 时出错: {e}")
|
||||
import traceback
|
||||
|
||||
traceback.print_exc()
|
||||
return {}
|
||||
|
||||
|
||||
def save_to_csv(type_attributes, output_file="node_attributes.csv"):
|
||||
"""
|
||||
将统计结果保存到CSV文件
|
||||
|
||||
Args:
|
||||
type_attributes: 按type分类的属性名集合
|
||||
output_file: 输出CSV文件名
|
||||
"""
|
||||
with open(output_file, "w", encoding="utf-8", newline="") as f:
|
||||
writer = csv.writer(f)
|
||||
writer.writerow(["节点类型", "属性名"])
|
||||
|
||||
for node_type, attributes in type_attributes.items():
|
||||
# 将属性名集合转换为排序后的列表
|
||||
sorted_attrs = sorted(attributes)
|
||||
for attr in sorted_attrs:
|
||||
writer.writerow([node_type, attr])
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
主函数
|
||||
"""
|
||||
# 指定JSON文件路径,可以是单个文件或目录
|
||||
json_dir = "dataset/json/配网清单/2022行业招标3.1.12_readable.json" # 可以根据实际情况修改
|
||||
|
||||
# 存储所有文件的统计结果
|
||||
all_type_attributes = defaultdict(set)
|
||||
|
||||
# 如果是目录,则遍历所有JSON文件
|
||||
if os.path.isdir(json_dir):
|
||||
for root, _, files in os.walk(json_dir):
|
||||
for file in files:
|
||||
if file.endswith(".json"):
|
||||
file_path = os.path.join(root, file)
|
||||
print(f"正在处理: {file_path}")
|
||||
|
||||
# 获取当前文件的统计结果
|
||||
current_type_attrs = analyze_project_division(file_path)
|
||||
|
||||
# 合并结果
|
||||
for node_type, attrs in current_type_attrs.items():
|
||||
all_type_attributes[node_type].update(attrs)
|
||||
else:
|
||||
# 单个文件
|
||||
print(f"正在处理: {json_dir}")
|
||||
all_type_attributes = analyze_project_division(json_dir)
|
||||
|
||||
# 保存结果到CSV
|
||||
save_to_csv(all_type_attributes)
|
||||
print(f"统计结果已保存到 node_attributes.csv")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user