Files
KG_generation/unified_structure.py
T
chentianrui 9609bb67b4 上传文件
2025-08-01 15:31:56 +08:00

706 lines
29 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
"""
第三步:将bcl计算结果补充到json文件中
"""
import json
import os
from copy import deepcopy
import uuid
class ProjectExpenseProcessor:
def __init__(self):
self.project_data = None
def initialize_project_data(self, project_data):
"""初始化项目数据,深拷贝避免修改原始数据"""
self.project_data = deepcopy(project_data)
# 添加调试信息
print(f"项目数据结构: {list(self.project_data.keys())}")
# 检查projectData是否存在
if "projectData" in self.project_data:
print("projectData 结构存在")
print(f"projectData 子结构: {list(self.project_data['projectData'].keys())}")
# 检查expensePreview是否存在于projectData中
if "expensePreview" in self.project_data["projectData"]:
print("expensePreview 结构存在于 projectData 中")
expense_preview = self.project_data["projectData"]["expensePreview"]
print(f"expensePreview 类别: {list(expense_preview.keys())}")
for category_name, category in expense_preview.items():
print(f"类别: {category_name}, 类型: {type(category)}")
for group_name, group in category.items():
print(f" 组: {group_name}, 类型: {type(group)}")
for i, item in enumerate(group):
if "GUID" in item:
print(f" 项目 {i} GUID: {item['GUID']}")
else:
print("警告: expensePreview 结构不存在于 projectData 中!")
else:
print("警告: projectData 结构不存在!")
return self
def check_guids_in_division(self, node=None, path=""):
"""检查项目划分中的所有GUID"""
if node is None:
for category_name, category in self.project_data.get("projectDivision", {}).items():
self.check_guids_in_division(category, category_name)
return
if isinstance(node, list):
for i, item in enumerate(node):
self.check_guids_in_division(item, f"{path}[{i}]")
elif isinstance(node, dict):
if "GUID" in node:
guid = node["GUID"]
print(f"项目划分GUID: {guid} 在路径: {path}")
# 尝试在费用预览中查找
expense_node = self.find_expense_preview_node(guid)
if expense_node:
print(f" ✓ 在费用预览中找到对应节点")
else:
print(f" ✗ 在费用预览中未找到对应节点")
if "children" in node:
self.check_guids_in_division(node["children"], f"{path}.children")
def find_project_division_node(self, target_guid, node=None, path=""):
"""查找指定GUID的最子级项目划分节点"""
# 将目标GUID转换为大写并去掉花括号
target_guid = target_guid.strip("{}").upper()
if node is None:
# 从 projectData.projectDivision 开始搜索
if "projectData" not in self.project_data or "projectDivision" not in self.project_data["projectData"]:
print("警告: project_data 为空或不包含 projectData.projectDivision")
return None
# print(f"开始在项目划分中查找GUID: {target_guid}")
for category_name, category in self.project_data["projectData"]["projectDivision"].items():
# print(f"搜索项目划分类别: {category_name}")
# 递归处理category,无论它是什么类型
result = self.find_project_division_node(
target_guid, category, f"projectData.projectDivision.{category_name}"
)
if result:
return result
# print(f"在项目划分中未找到GUID: {target_guid}")
return None
if isinstance(node, list):
for i, item in enumerate(node):
new_path = f"{path}[{i}]"
result = self.find_project_division_node(target_guid, item, new_path)
if result:
return result
elif isinstance(node, dict):
# 检查当前节点的GUID
current_guid = node.get("GUID", "").strip("{}").upper()
if current_guid:
# print(f"比较项目划分GUID: {current_guid} vs {target_guid} 在路径: {path}")
pass
if current_guid == target_guid:
# print(f"找到匹配的项目划分GUID: {current_guid} 在路径: {path}")
return node
# 递归检查所有子节点,包括children和其他字典值
for key, value in node.items():
if isinstance(value, (dict, list)):
new_path = f"{path}.{key}"
result = self.find_project_division_node(target_guid, value, new_path)
if result:
return result
return None
def find_expense_preview_node(self, target_guid, node=None, path=""):
"""
在 expensePreview 中递归查找指定 GUID 的节点
:param target_guid: 要查找的 GUID(字符串,不带 {}
:param node: 当前查找的子节点(默认从 project_data 开始)
:param path: 当前搜索路径(用于调试)
:return: 找到的节点或 None
"""
# 将目标GUID转换为大写并去掉花括号
target_guid = target_guid.strip("{}").upper()
if (
not self.project_data
or "projectData" not in self.project_data
or "expensePreview" not in self.project_data["projectData"]
):
print("警告: project_data 为空或不包含 projectData.expensePreview")
return None
# 初始调用时从顶层开始
if node is None:
# print(f"开始查找GUID: {target_guid}")
expense_preview = self.project_data["projectData"]["expensePreview"]
for category_name, category in expense_preview.items():
for group_name, group in category.items():
for i, item in enumerate(group):
new_path = f"projectData.expensePreview.{category_name}.{group_name}[{i}]"
result = self.find_expense_preview_node(target_guid, item, new_path)
if result:
return result
# print(f"在顶层搜索中未找到GUID: {target_guid}")
return None
# 检查当前节点
current_guid = node.get("GUID", "").strip("{}").upper()
if current_guid:
# print(f"比较GUID: {current_guid} vs {target_guid} 在路径: {path}")
pass
if current_guid == target_guid:
# print(f"找到匹配的GUID: {current_guid} 在路径: {path}")
return node
# 递归检查子节点
if "children" in node and isinstance(node["children"], list):
for i, child in enumerate(node["children"]):
new_path = f"{path}.children[{i}]"
result = self.find_expense_preview_node(target_guid, child, new_path)
if result:
return result
return None
@staticmethod
def generate_id(text):
"""生成简单的ID(只保留字母数字和中文)"""
return "".join(c for c in text if c.isalnum() or "\u4e00" <= c <= "\u9fa5").upper()
def convert_calculation_results_to_children(self, calculation_results):
"""将工程量计算结果转换为标准格式"""
children = []
for node_name, costs in calculation_results.items():
node_data = {"name": node_name, "type": "工程量节点", "children": []}
for cost_type, amount in costs.items():
node_data["children"].append(
{
"id": f"{cost_type}_{self.generate_id(cost_type)}",
"cost": str(amount),
}
)
children.append(node_data)
return children
def generate_guid(self):
"""生成新的GUID"""
return "{" + str(uuid.uuid4()).upper() + "}"
def add_quantity_node_expense_data(self, project_guid, calculation_results):
"""添加工程量节点费用预览数据"""
try:
# 1. 查找项目划分节点
division_node = self.find_project_division_node(project_guid)
if not division_node:
print(f"未找到GUID为 {project_guid} 的项目划分节点")
return False
# 2. 查找费用预览节点
expense_node = self.find_expense_preview_node(project_guid)
if not expense_node:
print(f"未找到GUID为 {project_guid} 的费用预览节点")
return False
# 3. 确保节点有标准格式
self.ensure_standard_format(expense_node)
# 4. 清空children准备添加工程量节点
expense_node["children"] = []
# 5. 处理每个工程量计算结果
for node_name, costs in calculation_results.items():
# 5.1 查找项目划分中对应的工程量节点
quantity_node = None
if "children" in division_node:
for child in division_node["children"]:
if child.get("项目名称") == node_name or child.get("name") == node_name:
quantity_node = child
break
if not quantity_node:
print(f"未找到名称为 {node_name} 的工程量节点")
continue
# 5.2 如果工程量节点没有GUID,生成一个
if "GUID" not in quantity_node:
quantity_node["GUID"] = self.generate_guid()
quantity_guid = quantity_node["GUID"]
# 5.3 创建工程量费用预览节点(使用标准格式)
quantity_expense = {"GUID": quantity_guid, "sum": [], "children": [], "rcj": []}
# 5.4 添加费用项到sum
for cost_type, amount in costs.items():
quantity_expense["sum"].append({"id": f"{cost_type}", "cost": str(amount)})
# 5.5 将工程量费用预览节点添加到项目划分费用预览的children中
expense_node["children"].append(quantity_expense)
print(f"成功添加GUID为 {project_guid} 的工程量节点费用预览数据")
return True
except Exception as e:
print(f"添加工程量节点费用预览数据失败: {str(e)}")
return False
def add_labor_material_machine_expense_data(self, project_guid, lmm_data):
"""添加人材机节点费用数据到rcj数组"""
try:
# 1. 查找项目划分节点
division_node = self.find_project_division_node(project_guid)
if not division_node:
print(f"未找到GUID为 {project_guid} 的项目划分节点")
return False
# 2. 查找费用预览节点
expense_node = self.find_expense_preview_node(project_guid)
if not expense_node:
print(f"未找到GUID为 {project_guid} 的费用预览节点")
return False
# 3. 确保节点有标准格式
self.ensure_standard_format(expense_node)
# 4. 确保rcj数组存在
if "rcj" not in expense_node:
expense_node["rcj"] = []
# 4. 处理人材机数据
# 添加人工节点
if "人工节点" in lmm_data:
for item in lmm_data["人工节点"]:
expense_node["rcj"].append(
{
"type": "人工",
"编码": item.get("编码", ""),
"名称": item.get("名称", ""),
"单位": item.get("单位", ""),
"预算价不含税": item.get("预算价不含税", ""),
"市场价不含税": item.get("市场价不含税", ""),
"预算价合价": item.get("预算价合价", ""),
"市场价合价": item.get("市场价合价", ""),
"价差": item.get("价差", ""),
"数量": item.get("数量", ""),
}
)
# 添加材料节点
if "材料节点" in lmm_data:
for item in lmm_data["材料节点"]:
expense_node["rcj"].append(
{
"type": "材料",
"供货方": item.get("供货方", ""),
"编码": item.get("编码", ""),
"名称": item.get("名称", ""),
"单位": item.get("单位", ""),
"预算价不含税": item.get("预算价不含税", ""),
"市场价不含税": item.get("市场价不含税", ""),
"预算价合价": item.get("预算价合价", ""),
"市场价合价": item.get("市场价合价", ""),
"价差": item.get("价差", ""),
"数量": item.get("数量", ""),
}
)
# 添加机械节点
if "机械节点" in lmm_data:
for item in lmm_data["机械节点"]:
expense_node["rcj"].append(
{
"type": "机械",
"编码": item.get("编码", ""),
"名称": item.get("名称", ""),
"单位": item.get("单位", ""),
"预算价不含税": item.get("预算价不含税", ""),
"市场价不含税": item.get("市场价不含税", ""),
"预算价合价": item.get("预算价合价", ""),
"市场价合价": item.get("市场价合价", ""),
"价差": item.get("价差", ""),
"数量": item.get("数量", ""),
}
)
print(f"成功添加GUID为 {project_guid} 的人材机节点费用预览数据到rcj数组")
return True
except Exception as e:
print(f"添加人材机节点费用预览数据失败: {str(e)}")
return False
def batch_process_expense_data(self, data_list):
"""
批量处理多个项目的费用预览数据
:param data_list: 包含项目GUID和费用数据的列表
:return: 是否全部成功处理
"""
all_success = True
for data in data_list:
project_guid = data.get("projectGuid")
calculation_results = data.get("calculationResults")
labor_material_machine_data = data.get("laborMaterialMachineData")
if calculation_results:
success1 = self.add_quantity_node_expense_data(project_guid, calculation_results)
all_success &= success1
if labor_material_machine_data:
success2 = self.add_labor_material_machine_expense_data(project_guid, labor_material_machine_data)
all_success &= success2
return all_success
def batch_process_from_folder(self, folder_path):
"""
批量处理指定文件夹下的工程量和人材机JSON文件
:param folder_path: 包含JSON文件的文件夹路径
:return: 成功处理的数量
"""
# 先列出所有项目划分中的GUID
print("\n=== 列出所有项目划分中的GUID ===")
all_guids = self.list_all_division_guids()
print("=== 列出结束 ===\n")
# 首先确保所有费用预览节点都有标准格式
print("\n=== 确保所有费用预览节点都有标准格式 ===")
standardized_count = 0
if all_guids:
for guid, name, path in all_guids:
# 查找费用预览节点
expense_node = self.find_expense_preview_node(guid)
if expense_node:
self.ensure_standard_format(expense_node)
standardized_count += 1
print(f"已标准化 {standardized_count} 个费用预览节点")
print("=== 标准化结束 ===\n")
success_count = 0
files = os.listdir(folder_path)
guid_map = {}
# 第一步:遍历所有文件,按 GUID 分组
for filename in files:
if not filename.endswith(".json"):
continue
parts = filename.split("_")
if len(parts) < 3:
continue
# 尝试从文件名中提取GUID
guid = parts[1] # 文件名中的第二个字段为 GUID
# 确保GUID格式正确
guid = guid.strip("{}") # 去掉可能的花括号
# print(f"从文件名 {filename} 提取的GUID: {guid}")
if guid not in guid_map:
guid_map[guid] = {"calc": None, "rcj": None}
if "调差_预算工程" in filename:
guid_map[guid]["calc"] = os.path.join(folder_path, filename)
elif "调差_rcj" in filename:
guid_map[guid]["rcj"] = os.path.join(folder_path, filename)
# 第二步:逐个 GUID 加载数据并调用处理函数
for guid, paths in guid_map.items():
# 尝试通过GUID查找项目划分节点
division_node = self.find_project_division_node(guid)
# 如果找不到,尝试通过文件名前缀(项目名称)查找
if not division_node:
file_path = paths["calc"] or paths["rcj"]
if file_path:
filename = os.path.basename(file_path)
project_name = filename.split("_")[0] # 假设文件名第一部分是项目名称
print(f"通过GUID未找到节点,尝试通过名称 '{project_name}' 查找")
division_node = self.find_division_node_by_name(project_name)
if not division_node:
print(f"无法找到对应的项目划分节点,跳过处理 GUID: {guid}")
continue
# 使用找到的节点的GUID
actual_guid = division_node.get("GUID", "")
# print(
# f"找到项目划分节点,GUID: {actual_guid}, 名称: {division_node.get('项目名称', division_node.get('name', '未命名'))}"
# )
# 查找费用预览节点,如果不存在则创建
expense_node = self.find_expense_preview_node(actual_guid)
if not expense_node:
print(f"未找到GUID为 {actual_guid} 的费用预览节点,将在处理时创建")
calc_data = None
rcj_data = None
if paths["calc"]:
try:
with open(paths["calc"], "r", encoding="utf-8") as f:
calc_data = json.load(f)
except Exception as e:
print(f"读取工程量文件失败 {paths['calc']}: {e}")
continue
if paths["rcj"]:
try:
with open(paths["rcj"], "r", encoding="utf-8") as f:
rcj_data = json.load(f)
except Exception as e:
print(f"读取人材机文件失败 {paths['rcj']}: {e}")
continue
# 调用处理方法
success = True
if calc_data:
success &= self.add_quantity_node_expense_data(guid, calc_data)
if rcj_data:
success &= self.add_labor_material_machine_expense_data(guid, rcj_data)
if success:
success_count += 1
print(f"✅ 成功处理 GUID: {guid}")
else:
print(f"❌ 处理 GUID: {guid} 时发生错误")
return success_count
def get_processed_project_data(self):
"""
获取处理后的项目数据
:return: 处理后的项目数据
"""
return self.project_data
def export_to_json(self, pretty=True):
"""
导出为JSON字符串
:param pretty: 是否格式化输出
:return: JSON字符串
"""
return json.dumps(self.project_data, ensure_ascii=False, indent=2 if pretty else 0)
def list_all_division_guids(self):
"""列出所有项目划分中的GUID"""
if "projectData" not in self.project_data or "projectDivision" not in self.project_data["projectData"]:
print("警告: project_data 为空或不包含 projectData.projectDivision")
return
guids = []
def collect_guids(node, path=""):
"""递归收集所有GUID"""
if isinstance(node, list):
for i, item in enumerate(node):
collect_guids(item, f"{path}[{i}]")
elif isinstance(node, dict):
if "GUID" in node:
guid = node["GUID"]
name = node.get("项目名称", node.get("name", "未命名"))
guids.append((guid, name, path))
# 递归检查所有子节点,包括children和其他字典值
for key, value in node.items():
if isinstance(value, (dict, list)):
collect_guids(value, f"{path}.{key}")
# 从projectDivision开始收集
collect_guids(self.project_data["projectData"]["projectDivision"], "projectData.projectDivision")
# print(f"项目划分中共有 {len(guids)} 个GUID:")
# for guid, name, path in guids:
# print(f"GUID: {guid}, 名称: {name}, 路径: {path}")
return guids
def find_division_node_by_name(self, name):
"""通过名称查找项目划分节点"""
if "projectData" not in self.project_data or "projectDivision" not in self.project_data["projectData"]:
print("警告: project_data 为空或不包含 projectData.projectDivision")
return None
def search_by_name(node):
if isinstance(node, list):
for item in node:
result = search_by_name(item)
if result:
return result
elif isinstance(node, dict):
node_name = node.get("项目名称", node.get("name", ""))
if node_name == name:
return node
if "children" in node:
result = search_by_name(node["children"])
if result:
return result
return None
for category in self.project_data["projectData"]["projectDivision"].values():
result = search_by_name(category)
if result:
return result
return None
def ensure_standard_format(self, expense_node):
"""确保费用预览节点有标准格式(sum, children, rcj"""
# 确保sum存在
if "sum" not in expense_node:
expense_node["sum"] = []
# 如果children中有费用项(直接费用项,不是子节点),将其移至sum并清空children
if "children" in expense_node:
has_direct_cost_items = False
for child in expense_node["children"]:
if "cost" in child and "id" in child and "GUID" not in child:
# 这是直接费用项,应该移到sum中
has_direct_cost_items = True
# 检查是否已经存在相同id的项
exists = False
for item in expense_node["sum"]:
if item.get("id") == child["id"]:
exists = True
break
if not exists:
# 只保留id和cost两个属性
expense_node["sum"].append({"id": child["id"], "cost": child["cost"]})
# 如果children中只有直接费用项,清空children
if has_direct_cost_items and all("GUID" not in child for child in expense_node["children"]):
expense_node["children"] = []
else:
expense_node["children"] = []
# 确保rcj存在
if "rcj" not in expense_node:
expense_node["rcj"] = []
# 确保sum中的项只有id和cost两个属性
for i, item in enumerate(expense_node["sum"]):
if "id" in item and "cost" in item:
expense_node["sum"][i] = {"id": item["id"], "cost": item["cost"]}
return expense_node
def write_BCLresult_into_json(original_json_path, bcl_result_folder, output_json_path):
"""
将BCL计算结果写入到原始JSON文件中
:param original_json_path: 原始JSON文件路径
:param bcl_result_folder: BCL计算结果文件夹路径
:param output_json_path: 输出JSON文件路径(合并后的JSON)
:return: 是否成功处理
"""
try:
# 1. 加载原始项目数据
print(f"加载原始项目数据: {original_json_path}")
with open(original_json_path, "r", encoding="utf-8") as f:
project_data = json.load(f)
# 2. 初始化处理器
processor = ProjectExpenseProcessor()
processor.initialize_project_data(project_data)
# 3. 检查BCL计算结果文件夹是否存在
if not os.path.exists(bcl_result_folder):
print(f"错误: BCL计算结果文件夹不存在: {bcl_result_folder}")
return False
print(f"使用BCL计算结果文件夹: {bcl_result_folder}")
# 4. 批量处理
count = processor.batch_process_from_folder(bcl_result_folder)
print(f"共成功处理了 {count} 个项目节点。")
# 5. 导出更新后的数据
print(f"保存更新后的数据: {output_json_path}")
os.makedirs(os.path.dirname(output_json_path), exist_ok=True) # 确保输出目录存在
with open(output_json_path, "w", encoding="utf-8") as f:
f.write(processor.export_to_json(pretty=True))
print(f"✅ 数据已保存至 {output_json_path}")
return True
except Exception as e:
print(f"❌ 处理失败: {str(e)}")
import traceback
traceback.print_exc()
return False
def batch_write_BCLresult_into_json(original_folder, bcl_result_folder, output_folder):
"""
批量处理文件夹中的所有JSON文件,将BCL计算结果写入到原始JSON文件中
:param original_folder: 原始JSON文件夹路径
:param bcl_result_folder: BCL计算结果文件夹路径
:param output_folder: 输出文件夹路径(合并后的JSON)
:return: 处理成功的文件数量
"""
# 确保输出文件夹存在
os.makedirs(output_folder, exist_ok=True)
# 查找所有JSON文件
json_files = []
for file in os.listdir(original_folder):
if file.lower().endswith(".json"):
json_files.append(os.path.join(original_folder, file))
if not json_files:
print(f"警告: 在目录 {original_folder} 中没有找到JSON文件")
return 0
# 处理每个JSON文件
success_count = 0
for original_file in json_files:
# 构建输出文件路径
rel_path = os.path.relpath(original_file, original_folder)
output_file = os.path.join(output_folder, rel_path)
# 获取文件名(不含扩展名),用于查找对应的BCL计算结果文件夹
base_filename = os.path.splitext(os.path.basename(original_file))[0]
# 构建BCL计算结果文件夹路径(假设与原始文件同名)
file_bcl_result_folder = os.path.join(bcl_result_folder, base_filename)
# 如果不存在同名文件夹,使用BCL计算结果文件夹本身
if not os.path.exists(file_bcl_result_folder):
file_bcl_result_folder = bcl_result_folder
print(f"\n处理文件: {original_file}")
print(f"BCL计算结果文件夹: {file_bcl_result_folder}")
print(f"输出文件: {output_file}")
# 处理文件
if write_BCLresult_into_json(original_file, file_bcl_result_folder, output_file):
success_count += 1
return success_count
if __name__ == "__main__":
# 使用硬编码的文件夹路径,不需要命令行参数
original_folder = "project2json/outputs" # 原始JSON文件夹
bcl_result_folder = "outputs-2" # BCL计算结果文件夹
output_folder = "final_outputs" # 输出文件夹(合并后的JSON
print(f"原始JSON文件夹: {original_folder}")
print(f"BCL计算结果文件夹: {bcl_result_folder}")
print(f"输出文件夹: {output_folder}")
count = batch_write_BCLresult_into_json(original_folder, bcl_result_folder, output_folder)
print(f"\n批量处理完成: 共成功处理了 {count} 个文件")