""" 第三步:向上汇总费用预览 """ import json import os from typing import Dict, List, Any, Tuple, Optional import copy import re class ExpenseProcessor: def __init__(self): pass @staticmethod def normalize_guid(guid: str) -> str: """ 标准化GUID格式,确保只有单中括号 :param guid: 原始GUID字符串 :return: 标准化后的GUID字符串 """ if not guid: return guid # 移除所有中括号,然后添加单中括号 normalized = guid.strip("{}") return "{" + normalized + "}" @staticmethod def is_cost_item(obj: Any) -> bool: """ 判断一个对象是否为费用项(只有 id 和 cost 字段) """ return ( isinstance(obj, dict) and "id" in obj and "cost" in obj and len(obj) <= 2 # 允许有额外字段,但核心是 id 和 cost ) @staticmethod def extract_costs_from_children(node: Dict[str, Any]) -> List[Dict[str, Any]]: """ 从节点的 children 中提取费用项(用于叶子节点) :param node: 节点 :return: 费用项列表 """ costs = [] if "children" in node and isinstance(node["children"], list): for child in node["children"]: if ExpenseProcessor.is_cost_item(child): # 深拷贝费用项 costs.append(copy.deepcopy(child)) return costs @staticmethod def calculate_parent_costs(node: Dict[str, Any]) -> List[Dict[str, Any]]: """ 计算节点的汇总费用(包括自身和所有后代) :param node: 费用预览节点 :return: 汇总后的费用项列表 """ result_nodes = [] processed_ids = {} # 1. 收集本节点自身的 sum 费用 if "sum" in node and isinstance(node["sum"], list): for cost_item in node["sum"]: if "id" in cost_item and "cost" in cost_item: item_id = cost_item["id"] if item_id not in processed_ids: processed_ids[item_id] = 0.0 try: processed_ids[item_id] += float(cost_item["cost"]) except (ValueError, TypeError): pass # 忽略无效 cost # 2. 检查 children 中是否直接包含费用项(叶子节点) child_costs = ExpenseProcessor.extract_costs_from_children(node) for cost_item in child_costs: item_id = cost_item["id"] if item_id not in processed_ids: processed_ids[item_id] = 0.0 try: processed_ids[item_id] += float(cost_item["cost"]) except (ValueError, TypeError): pass # 3. 递归处理子节点(结构化节点) # 注意:这里我们不需要再递归计算,因为每个子节点已经在process_node中计算了自己的sum # 我们只需要直接使用子节点的sum即可 if "children" in node and isinstance(node["children"], list): for child in node["children"]: # 只处理非费用项的子节点 if not ExpenseProcessor.is_cost_item(child): # 直接使用子节点的sum if "sum" in child and isinstance(child["sum"], list): for cost_item in child["sum"]: if "id" in cost_item and "cost" in cost_item: item_id = cost_item["id"] if item_id not in processed_ids: processed_ids[item_id] = 0.0 try: processed_ids[item_id] += float(cost_item["cost"]) except (ValueError, TypeError): pass # 构建结果 result_nodes = [{"id": item_id, "cost": str(total_cost)} for item_id, total_cost in processed_ids.items()] return result_nodes @staticmethod def find_guid_quantity(project_data: Optional[Dict[str, Any]], guid: str) -> float: """ 在projectDivision中查找指定GUID节点的数量 :param project_data: 项目数据 :param guid: 要查找的GUID(带花括号的格式) :return: 数量值,如果未找到则返回1.0 """ if not project_data or "projectDivision" not in project_data: return 1.0 # 移除花括号以便比较 guid_clean = guid.strip("{}") def search_node_quantity(node): if isinstance(node, dict): # 检查当前节点的GUID node_guid = node.get("GUID", "").strip("{}") if node_guid == guid_clean: # 找到匹配的GUID,获取数量 quantity = node.get("数量") if quantity: try: return float(quantity) except (ValueError, TypeError): return 1.0 # 递归查找子节点 for key, value in node.items(): if isinstance(value, (dict, list)): result = search_node_quantity(value) if result != 1.0: # 找到非默认值 return result elif isinstance(node, list): for item in node: result = search_node_quantity(item) if result != 1.0: # 找到非默认值 return result return 1.0 # 默认返回1.0 return search_node_quantity(project_data["projectDivision"]) @staticmethod def process_node( node: Dict[str, Any], project_data: Optional[Dict[str, Any]] = None, is_bill_engineering: Optional[bool] = None ) -> Dict[str, Any]: """ 处理单个节点,计算汇总费用并更新sum数组 :param node: 费用预览节点 :param project_data: 项目数据,用于查找GUID对应的数量 :param is_bill_engineering: 是否为清单工程 :return: 处理后的节点 """ result = copy.deepcopy(node) # 标准化GUID格式 if "GUID" in result: result["GUID"] = ExpenseProcessor.normalize_guid(result["GUID"]) # 确保关键字段存在 if "sum" not in result: result["sum"] = [] if "rcj" not in result: result["rcj"] = [] if "children" not in result: result["children"] = [] # 如果is_bill_engineering为None,默认为False if is_bill_engineering is None: is_bill_engineering = False # === 特殊处理:如果 children 包含的是费用项(叶子节点)=== direct_costs = ExpenseProcessor.extract_costs_from_children(result) if direct_costs: # 如果是清单工程且有项目数据,需要根据GUID调整费用 if is_bill_engineering and project_data and "GUID" in result: guid = result["GUID"] quantity = ExpenseProcessor.find_guid_quantity(project_data, guid) # 调整费用值:乘以数量 for cost_item in direct_costs: try: original_cost = float(cost_item["cost"]) adjusted_cost = original_cost * quantity cost_item["cost"] = str(adjusted_cost) except (ValueError, TypeError): pass # 忽略无效 cost # 将直接费用项迁移到 sum result["sum"] = direct_costs # 清空 children(因为已经迁移) result["children"] = [] # 不再递归处理 children return result # === 普通节点处理:children 是子节点列表 === # 递归处理所有子节点 processed_children = [] if result["children"]: for child in node["children"]: if not ExpenseProcessor.is_cost_item(child): processed_child = ExpenseProcessor.process_node(child, project_data, is_bill_engineering) processed_children.append(processed_child) # 更新处理后的子节点 result["children"] = processed_children # 重要修改:使用处理后的result(包含已处理的子节点)来计算汇总费用 # 而不是使用原始的node total_costs = ExpenseProcessor.calculate_parent_costs(result) result["sum"] = total_costs return result @staticmethod def process_expense_preview( expense_preview: Dict[str, Any], project_data: Optional[Dict[str, Any]] = None, is_bill_engineering: Optional[bool] = None, ) -> Dict[str, Any]: """ 处理整个费用预览结构 :param expense_preview: 费用预览数据 :param project_data: 项目数据,用于查找GUID对应的数量 :param is_bill_engineering: 是否为清单工程 :return: 处理后的费用预览数据 """ # 如果is_bill_engineering为None,默认为False if is_bill_engineering is None: is_bill_engineering = False result = copy.deepcopy(expense_preview) for category_key, category_value in expense_preview.items(): if isinstance(category_value, dict): for subcategory_key, subcategory_value in category_value.items(): if isinstance(subcategory_value, list): result[category_key][subcategory_key] = [ ExpenseProcessor.process_node(item, project_data, is_bill_engineering) for item in subcategory_value ] elif isinstance(category_value, list): result[category_key] = [ ExpenseProcessor.process_node(item, project_data, is_bill_engineering) for item in category_value ] return result # 以下方法保持不变 @classmethod def load_and_process_from_file( cls, input_path: str, output_path: str | None = None, is_bill_engineering: Optional[bool] = None ) -> Optional[Dict[str, Any]]: try: with open(input_path, "r", encoding="utf-8") as f: data = json.load(f) if "projectData" in data and "expensePreview" in data["projectData"]: # 如果没有指定工程类型,则自动判断 if is_bill_engineering is None: project_type = _determine_project_type(data) is_bill_engineering = project_type == "inventory" print(f"自动判断工程类型: {'清单工程' if is_bill_engineering else '预算工程'}") processed_data = copy.deepcopy(data) processed_data["projectData"]["expensePreview"] = cls.process_expense_preview( data["projectData"]["expensePreview"], data["projectData"] if is_bill_engineering else None, is_bill_engineering, ) if output_path: with open(output_path, "w", encoding="utf-8") as f: json.dump(processed_data, f, ensure_ascii=False, indent=4) print(f"处理完成,结果已保存到 {output_path}") return processed_data else: print(f"警告: 文件 {input_path} 中未找到 projectData.expensePreview 路径") return None except Exception as e: print(f"处理文件 {input_path} 时出错: {str(e)}") return None @classmethod def process_raw_data(cls, raw_data: Dict[str, Any], is_bill_engineering: Optional[bool] = None) -> Dict[str, Any]: if "projectData" in raw_data and "expensePreview" in raw_data["projectData"]: # 如果没有指定工程类型,则自动判断 if is_bill_engineering is None: project_type = _determine_project_type(raw_data) is_bill_engineering = project_type == "inventory" print(f"自动判断工程类型: {'清单工程' if is_bill_engineering else '预算工程'}") processed_data = copy.deepcopy(raw_data) processed_data["projectData"]["expensePreview"] = cls.process_expense_preview( raw_data["projectData"]["expensePreview"], raw_data["projectData"] if is_bill_engineering else None, is_bill_engineering, ) return processed_data else: raise ValueError("未找到 projectData.expensePreview 路径") @classmethod def process_directory( cls, input_dir: str, output_dir: str, is_bill_engineering: Optional[bool] = None ) -> List[Tuple[str, str]]: os.makedirs(output_dir, exist_ok=True) json_files = [f for f in os.listdir(input_dir) if f.lower().endswith(".json")] if not json_files: print(f"警告: 在目录 {input_dir} 中没有找到JSON文件") return [] successful_files = [] for file in json_files: input_file = os.path.join(input_dir, file) output_file = os.path.join(output_dir, file) print(f"处理文件: {input_file}") processed_data = cls.load_and_process_from_file(input_file, output_file, is_bill_engineering) if processed_data: successful_files.append((input_file, output_file)) print(f"✅ 成功处理: {file}") else: print(f"❌ 处理失败: {file}") return successful_files def _determine_project_type(data): """ 根据division字段判断工程类型 :param data: 项目数据 :return: 'inventory' 表示清单工程,'budget' 表示预算工程 """ # 清单工程关键词 inventory_keywords = ["清单", "结算", "招标控制价", "招投标工程", "清单计价"] # 预算工程关键词 budget_keywords = ["概预算", "定额", "定额计价", "概算", "概预算工程"] # 尝试从数据中获取division字段 division = None if "division" in data: division = data["division"] parts = division.split("-") # 如果找到division字段 if division: # 去掉"主网-"前缀 if len(parts) == 2: division_type = parts[1].strip() else: division_type = parts[2].strip() # 判断是否为清单工程 for keyword in inventory_keywords: if keyword in division_type: print(f"根据division字段 '{division}' 判断为清单工程") return "inventory" # 判断是否为预算工程 for keyword in budget_keywords: if keyword in division_type: print(f"根据division字段 '{division}' 判断为预算工程") return "budget" # 如果无法通过division字段判断,则尝试通过数据结构判断 is_inventory_project = False for key in data.keys(): if re.search(r"[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}", key, re.IGNORECASE): is_inventory_project = True print("通过数据结构判断为清单工程") break return "inventory" if is_inventory_project else "budget" def costsummary_upwards( input_dir: str, output_dir: str, is_bill_engineering: Optional[bool] = None ) -> List[Tuple[str, str]]: return ExpenseProcessor.process_directory(input_dir, output_dir, is_bill_engineering) if __name__ == "__main__": input_directory = "project2json/outputs/json" output_directory = "project2json/outputs/merged" # 自动判断工程类型 result = costsummary_upwards(input_directory, output_directory) if result: print(f"\n成功处理了 {len(result)} 个文件:") for src, dst in result: print(f" {os.path.basename(src)} -> {os.path.basename(dst)}") else: print("\n没有文件被成功处理")