import os import csv from typing import Dict, Tuple, List, Optional # 固定宽度列配置(与 cost_comparison.save_comparison_to_txt 保持一致) # 项目:20, 参考值:25, 计算值:25, 差异:25, 原数据项:30 COL_PROJECT = (0, 20) COL_REF = (20, 45) COL_CALC = (45, 70) COL_DIFF = (70, 95) COL_ORIG = (95, 125) def _slice(line: str, span: Tuple[int, int]) -> str: return line[span[0] : span[1]].strip() if len(line) >= span[0] else "" def parse_txt_line(line: str) -> Tuple[Optional[str], Optional[float]]: """解析一行对齐文本,提取 '项目' 与 '差异'。 返回 (项目名, 差异值);若无法解析,返回 (None, None)。 """ try: project = _slice(line, COL_PROJECT) diff_str = _slice(line, COL_DIFF) if not project: return None, None if not diff_str: return project, None diff_val = float(diff_str) return project, diff_val except Exception: return None, None def scan_comparison_results_folder(comp_dir: str) -> Dict[str, Tuple[float, str]]: """遍历一个工程的 comparison_results 目录下的所有 txt 文件, 对每个 '项目' 取绝对值最大的 '差异' 值,并记录来源的 txt 文件名。 返回: { 项目名: (差异值(保留正负), 来源txt文件名) } 仅包含差异非0的项目。 """ max_diff_by_project: Dict[str, Tuple[float, str]] = {} txt_files = [f for f in os.listdir(comp_dir) if f.lower().endswith(".txt")] for fname in txt_files: fpath = os.path.join(comp_dir, fname) try: with open(fpath, "r", encoding="utf-8") as f: lines = f.readlines() # 跳过前两行(表头和分隔线) for line in lines[2:]: project, diff_val = parse_txt_line(line) if project is None or diff_val is None: continue # 过滤 0/-0 if abs(diff_val) == 0.0: continue # 更新为绝对值更大的差异 if project not in max_diff_by_project or abs(diff_val) > abs(max_diff_by_project[project][0]): max_diff_by_project[project] = (diff_val, fname) except Exception: # 单个文件解析失败,忽略即可 continue return max_diff_by_project def find_all_projects_with_comparison_results(root: str) -> List[Tuple[str, str]]: """ 在 root 下递归查找所有工程的 comparison_results 目录。 返回列表: [(工程名称, comparison_results绝对路径)] 工程名称采用 bclresults 的父目录名(通常是 <工程名>_)。 如果目录结构为 <工程目录>/bclresults/comparison_results,则工程名称取 <工程目录> 名称。 """ results: List[Tuple[str, str]] = [] for current, dirs, _files in os.walk(root): if "comparison_results" in dirs: comp_dir = os.path.join(current, "comparison_results") # 工程目录通常是 bclresults 的父级目录 # current 可能是 bclresults 目录 if os.path.basename(current) == "bclresults": project_dir = os.path.dirname(current) else: # 兼容其它层级结构 project_dir = os.path.dirname(current) project_name = os.path.basename(project_dir) results.append((project_name, comp_dir)) return results def aggregate_to_csv(root_of_projects: str, output_csv_path: str) -> None: """从根目录递归查找所有 comparison_results,提取每个工程各费用项的最大差异,并按顺序写入(无表头、无固定列)。 每行结构: [工程名称, 费用项1, 差异1, 文件1, 费用项2, 差异2, 文件2, ...] 注意:不写任何表头;不对不同工程对齐列数,按该工程出现的有差异费用项依次写入三元组。 """ os.makedirs(os.path.dirname(output_csv_path) or ".", exist_ok=True) projects = find_all_projects_with_comparison_results(root_of_projects) with open(output_csv_path, "w", encoding="utf-8", newline="") as f: writer = csv.writer(f) for project_name, comp_dir in projects: diff_map = scan_comparison_results_folder(comp_dir) row: List[str] = [project_name] if diff_map: for fee in sorted(diff_map.keys()): val, fname = diff_map[fee] row.extend([fee, str(val), fname]) # 若无差异,也写入仅含工程名称的一行 writer.writerow(row) def main(): root_of_projects = r"data/output" output_csv_path = r"data/output/engineering_diffs_summary.csv" aggregate_to_csv(root_of_projects, output_csv_path) print(f"✅ 汇总完成,输出 CSV: {output_csv_path}") if __name__ == "__main__": main()