增加了知识图谱导出excel

2025-08-18 15:14:37 +08:00
parent ce2986fbe2
commit 3fd0b2af0c
610 changed files with 6062 additions and 4932473 deletions
@@ -1,3 +1,7 @@
+"""
+批量对比多个 calculation_results.json 与 project_data.json 中的费用
+"""
+
 import json
 import os
 import re
@@ -6,19 +10,15 @@ from difflib import SequenceMatcher

 def extract_guid_from_filename(filename):
    """从文件名中提取 GUID"""
-    # 匹配 8-4-4-4-12 格式的 GUID
    pattern = r"([0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12})"
    match = re.search(pattern, filename, re.IGNORECASE)
    return match.group(1).upper() if match else None


 def normalize_cost_name(name):
-    """标准化费用名称，去除特殊符号、后缀等，便于匹配"""
-    # 去除 ID 后缀如 "_GJJ", "_BZHF" 等
-    name = re.sub(r"_\w+$", "", name)
-    # 去除常见符号
-    name = re.sub(r"[^\w]", "", name)
-    # 统一转小写
+    """标准化费用名称，便于匹配"""
+    name = re.sub(r"_\w+$", "", name)  # 去除后缀如 _GJJ
+    name = re.sub(r"[^\w]", "", name)  # 去除符号
    return name.lower()


@@ -40,13 +40,9 @@ def load_calculation_results(json_file_path):


 def find_node_by_guid(expense_preview, target_guid):
-    """
-    在 expensePreview 中递归查找 GUID 对应的节点
-
-    """
+    """在 expensePreview 中递归查找 GUID 对应的节点"""

    def search_recursive(items):
-        """在列表中递归查找 GUID"""
        for item in items:
            current_guid = item.get("GUID", "").strip("{}").upper()
            if current_guid == target_guid:
@@ -57,16 +53,11 @@ def find_node_by_guid(expense_preview, target_guid):
                    return result
        return None

-    # 遍历每个大类（如 "建筑工程"）
-    for category_name, category_data in expense_preview.items():
-        if not isinstance(category_data, dict):
-            continue
-        # 遍历每个子类（如 "建筑"、"安装"）
-        for subcategory_name, items in category_data.items():
-            if isinstance(items, list):
-                result = search_recursive(items)
-                if result is not None:
-                    return result
+    for category_name, items in expense_preview.items():
+        if isinstance(items, list):
+            result = search_recursive(items)
+            if result is not None:
+                return result
    return None


@@ -84,7 +75,6 @@ def load_project_data_and_find_costs(project_json_path, target_guid):

    external_costs = {}
    for item in children:
-        # 有些节点只有 cost，没有 id（可能是合计）
        item_id = item.get("id", "")
        cost_str = item.get("cost", "0")
        try:
@@ -93,10 +83,8 @@ def load_project_data_and_find_costs(project_json_path, target_guid):
            cost_val = 0.0

        if item_id:
-            # 只提取有 id 的项
-            clean_id = re.sub(r"_\w+$", "", item_id)  # 去掉 _GJJ 等后缀
+            clean_id = re.sub(r"_\w+$", "", item_id)
            external_costs[clean_id] = cost_val
-        # 如果没有 id，可以考虑用其他方式标记，这里先忽略
    return external_costs


@@ -117,17 +105,16 @@ def match_and_compare_costs(calc_costs, ext_costs, similarity_threshold=0.6):
                best_score = score
                best_match = ext_name

-        # 判断是否足够相似
        if best_match and best_score >= similarity_threshold:
            ext_value = ext_costs[best_match]
            difference = calc_value - ext_value
            comparison.append(
                {
                    "项目": calc_name,
-                    "计算值": calc_value,
                    "参考值": ext_value,
+                    "计算值": calc_value,
                    "差异": difference,
-                    "匹配项": best_match,
+                    "原数据项": best_match,
                    "相似度": best_score,
                }
            )
@@ -136,10 +123,10 @@ def match_and_compare_costs(calc_costs, ext_costs, similarity_threshold=0.6):
            comparison.append(
                {
                    "项目": calc_name,
-                    "计算值": calc_value,
                    "参考值": None,
+                    "计算值": calc_value,
                    "差异": None,
-                    "匹配项": None,
+                    "原数据项": None,
                    "相似度": best_score,
                }
            )
@@ -148,71 +135,99 @@ def match_and_compare_costs(calc_costs, ext_costs, similarity_threshold=0.6):
    for ext_name, ext_value in ext_costs.items():
        if ext_name not in matched_ext:
            comparison.append(
-                {"项目": None, "计算值": None, "参考值": ext_value, "差异": None, "匹配项": ext_name, "相似度": None}
+                {
+                    "项目": None,
+                    "参考值": ext_value,
+                    "计算值": None,
+                    "差异": None,
+                    "原数据项": ext_name,
+                    "相似度": None,
+                }
            )

    return comparison


 def save_comparison_to_txt(comparison, output_txt_path):
-    """保存对比结果到 TXT 文件，差异保留两位小数，交换计算值和参考值位置"""
+    """保存对比结果到 TXT 文件"""
    with open(output_txt_path, "w", encoding="utf-8") as f:
-        # 表头：项目、参考值、计算值、差异、原数据项
        f.write(f"{'项目':<20} {'参考值':<25} {'计算值':<25} {'差异':<25} {'原数据项':<30}\n")
        f.write("-" * 120 + "\n")

        for item in comparison:
-            # 原始字段
-            project = (item["项目"] or "").ljust(20)[:20]  # 最多20字符，左对齐
-
-            # 交换计算值和参考值的位置
-            ref = str(item["参考值"]) if item["参考值"] is not None else ""
+            project = (item["项目"] or "").ljust(20)[:20]
+            ref = f"{item['参考值']:.2f}" if item["参考值"] is not None else ""
            ref = ref.ljust(25)[:25]
-
-            calc = str(item["计算值"]) if item["计算值"] is not None else ""
-            calc = calc.ljust(25)[:25]  # 最多25字符宽度
-
-            # 差异保留两位小数
-            if item["差异"] is not None:
-                diff = f"{item['差异']:.2f}"
-            else:
-                diff = ""
+            calc = f"{item['计算值']:.2f}" if item["计算值"] is not None else ""
+            calc = calc.ljust(25)[:25]
+            diff = f"{item['差异']:.2f}" if item["差异"] is not None else ""
            diff = diff.ljust(25)[:25]
-
-            original = (item["匹配项"] or "").ljust(30)[:30]  # 原数据项字段更宽
-
+            original = (item["原数据项"] or "").ljust(30)[:30]
            f.write(f"{project}{ref}{calc}{diff}{original}\n")
-    print(f"对比结果已保存至: {output_txt_path}")
+    print(f"✅ 对比结果已保存至: {output_txt_path}")


 def main():
-    # 配置路径
-    calculation_json_path = (
-        "project2json/outputs/bclresult/一般土建_496A54BB-8A38-4BE1-B116-AD4780E6874A_预算工程_calculation_results.json"
-    )
-    project_data_json_path = "project2json/outputs/json/220kV变电站工程_readable.json"
+    # ================== 配置路径 ==================
+    # 存放所有 calculation_results.json 的文件夹
+    calc_results_folder = "project2json/outputs/bclresults/变电技改国网"

-    # 1. 提取 GUID
-    guid = extract_guid_from_filename(calculation_json_path)
-    if not guid:
-        raise ValueError("无法从文件名中提取 GUID")
-    print(f"提取到 GUID: {guid}")
+    # 主 project_data.json 路径（参考数据源）
+    project_data_json_path = "project2json/outputs/json/变电技改国网.json"

-    # 2. 读取计算结果并汇总
-    calc_costs = load_calculation_results(calculation_json_path)
-    print(f"共加载 {len(calc_costs)} 个费用项")
+    # 输出对比结果的文件夹
+    output_folder = "project2json/outputs/comparison_results"
+    os.makedirs(output_folder, exist_ok=True)

-    # 3. 从 project_data.json 中查找对应 GUID 的费用项
-    ext_costs = load_project_data_and_find_costs(project_data_json_path, guid)
-    print(f"从 project_data 中找到 {len(ext_costs)} 个参考费用项")
+    # 支持的文件名关键词（可根据实际命名调整）
+    result_file_keyword = "_calculation_results.json"

-    # 4. 匹配并对比
-    comparison = match_and_compare_costs(calc_costs, ext_costs, similarity_threshold=0.6)
+    # ==================================================

-    # 5. 输出到同名 .txt 文件
-    base_name = os.path.splitext(calculation_json_path)[0]
-    output_txt_path = base_name + ".txt"
-    save_comparison_to_txt(comparison, output_txt_path)
+    print(f"开始批量处理文件夹: {calc_results_folder}")
+    processed_count = 0
+
+    for filename in os.listdir(calc_results_folder):
+        if not filename.endswith(".json") or result_file_keyword not in filename:
+            continue
+
+        calc_json_path = os.path.join(calc_results_folder, filename)
+
+        try:
+            # 提取 GUID
+            guid = extract_guid_from_filename(filename)
+            if not guid:
+                print(f"⚠️ 无法从文件名提取 GUID: {filename}")
+                continue
+
+            print(f"\n📄 处理文件: {filename}")
+            print(f"   提取 GUID: {guid}")
+
+            # 读取计算结果
+            calc_costs = load_calculation_results(calc_json_path)
+            print(f"   加载 {len(calc_costs)} 个计算费用项")
+
+            # 从主 JSON 获取参考费用
+            ext_costs = load_project_data_and_find_costs(project_data_json_path, guid)
+            print(f"   找到 {len(ext_costs)} 个参考费用项")
+
+            # 对比
+            comparison = match_and_compare_costs(calc_costs, ext_costs, similarity_threshold=0.6)
+
+            # 生成输出文件名（与原 JSON 同名，但输出到指定文件夹）
+            base_name = os.path.splitext(filename)[0]
+            output_txt_path = os.path.join(output_folder, base_name + ".txt")
+
+            # 保存结果
+            save_comparison_to_txt(comparison, output_txt_path)
+
+            processed_count += 1
+
+        except Exception as e:
+            print(f"❌ 处理文件 {filename} 时出错: {e}")
+
+    print(f"\n✅ 批量处理完成！共处理 {processed_count} 个文件。")
+    print(f"📊 所有对比结果已保存至: {output_folder}")


 if __name__ == "__main__":