上传文件

2025-08-01 15:31:56 +08:00
commit 9609bb67b4
805 changed files with 982256 additions and 0 deletions
@@ -0,0 +1,131 @@
+import json
+import os
+import csv
+from collections import defaultdict
+
+
+def analyze_project_division(json_file_path):
+    """
+    分析JSON文件中的projectDivision数据，统计不同type节点的属性名
+
+    Args:
+        json_file_path: JSON文件路径
+
+    Returns:
+        dict: 按type分类的属性名集合
+    """
+    try:
+        # 读取JSON文件
+        with open(json_file_path, "r", encoding="utf-8") as f:
+            data = json.load(f)
+
+        # 检查是否存在projectData.projectDivision
+        if "projectData" not in data or "projectDivision" not in data["projectData"]:
+            print(f"文件 {json_file_path} 中不包含projectData.projectDivision数据")
+            return {}
+
+        project_division = data["projectData"]["projectDivision"]
+
+        # 按type分类存储属性名
+        type_attributes = defaultdict(set)
+
+        # 递归遍历树状结构
+        def traverse_node(node):
+            if isinstance(node, dict):
+                # 如果有type或类型字段，则使用该字段作为节点类型
+                node_type = node.get("type", node.get("类型", "未知类型"))
+
+                # 收集当前节点的所有属性名
+                for attr_name in node.keys():
+                    type_attributes[node_type].add(attr_name)
+
+                # 处理子节点
+                if "children" in node and isinstance(node["children"], list):
+                    for child in node["children"]:
+                        traverse_node(child)
+
+                # 处理其他可能的嵌套结构
+                for key, value in node.items():
+                    if isinstance(value, dict) and key != "children":
+                        traverse_node(value)
+                    elif isinstance(value, list) and key != "children":
+                        for item in value:
+                            traverse_node(item)
+
+            elif isinstance(node, list):
+                for item in node:
+                    traverse_node(item)
+
+        # 处理projectDivision的每个顶级键
+        for key, value in project_division.items():
+            if isinstance(value, dict):
+                traverse_node(value)
+            elif isinstance(value, list):
+                for item in value:
+                    traverse_node(item)
+
+        return type_attributes
+
+    except Exception as e:
+        print(f"处理文件 {json_file_path} 时出错: {e}")
+        import traceback
+
+        traceback.print_exc()
+        return {}
+
+
+def save_to_csv(type_attributes, output_file="node_attributes.csv"):
+    """
+    将统计结果保存到CSV文件
+
+    Args:
+        type_attributes: 按type分类的属性名集合
+        output_file: 输出CSV文件名
+    """
+    with open(output_file, "w", encoding="utf-8", newline="") as f:
+        writer = csv.writer(f)
+        writer.writerow(["节点类型", "属性名"])
+
+        for node_type, attributes in type_attributes.items():
+            # 将属性名集合转换为排序后的列表
+            sorted_attrs = sorted(attributes)
+            for attr in sorted_attrs:
+                writer.writerow([node_type, attr])
+
+
+def main():
+    """
+    主函数
+    """
+    # 指定JSON文件路径，可以是单个文件或目录
+    json_dir = "dataset/json/配网清单/2022行业招标3.1.12_readable.json"  # 可以根据实际情况修改
+
+    # 存储所有文件的统计结果
+    all_type_attributes = defaultdict(set)
+
+    # 如果是目录，则遍历所有JSON文件
+    if os.path.isdir(json_dir):
+        for root, _, files in os.walk(json_dir):
+            for file in files:
+                if file.endswith(".json"):
+                    file_path = os.path.join(root, file)
+                    print(f"正在处理: {file_path}")
+
+                    # 获取当前文件的统计结果
+                    current_type_attrs = analyze_project_division(file_path)
+
+                    # 合并结果
+                    for node_type, attrs in current_type_attrs.items():
+                        all_type_attributes[node_type].update(attrs)
+    else:
+        # 单个文件
+        print(f"正在处理: {json_dir}")
+        all_type_attributes = analyze_project_division(json_dir)
+
+    # 保存结果到CSV
+    save_to_csv(all_type_attributes)
+    print(f"统计结果已保存到 node_attributes.csv")
+
+
+if __name__ == "__main__":
+    main()