import pandas as pd from configparser import ConfigParser from neo4j import GraphDatabase from datetime import datetime import re import os from typing import Dict, List, Any # ------------------------ 通用工具函数 ------------------------ def load_neo4j_config(config_file: str = "config.ini") -> Dict[str, str]: """从config.ini加载Neo4j配置""" config = ConfigParser() config.read(config_file) return { "uri": config.get("neo4j", "uri"), "user": config.get("neo4j", "user"), "password": config.get("neo4j", "password"), } def get_project_name(driver) -> str: """获取工程名称""" query = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD]->(ps:ProjectPropertySet)-[:HAS_CHILD]->(n:ProjectProperty {name: '工程名称'}) RETURN n.value as project_name LIMIT 1 """ with driver.session() as session: result = session.run(query) record = result.single() if record: return record["project_name"] # 如果没有找到,尝试使用旧的查询方式(向后兼容) old_query = """ MATCH (n:ProjectProperty {name: '工程名称'}) RETURN n.value as project_name LIMIT 1 """ result = session.run(old_query) record = result.single() if record: return record["project_name"] return "" # 返回空字符串而非None def get_project_type(driver) -> str: """ 获取工程类型(清单工程或预算工程) 1. 优先检查ProjectProperty中name为'执行规范'的节点的value属性: - 包含"清单"字样的为清单工程 - 包含"预规"字样的为预算工程 2. 如果未找到'执行规范',则查找name为'工程类型'的节点的value属性, 并通过预定义的映射字典判断工程类型。 """ project_type_mapping = { "招投标工程": "清单工程", "概预算工程": "预算工程", "定额计价": "预算工程", "清单计价": "清单工程", } # 查询1:优先查找 '执行规范' query_standard = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD]->(ps:ProjectPropertySet)-[:HAS_CHILD]->(n:ProjectProperty {name: '执行规范'}) RETURN n.value AS standard LIMIT 1 """ with driver.session() as session: # 执行第一个查询 result = session.run(query_standard) record = result.single() if record and record["standard"]: standard = record["standard"] if "清单" in standard: return "清单工程" elif "预规" in standard: return "预算工程" # 如果 '执行规范' 未找到或不匹配,尝试查找 '工程类型' query_type = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD]->(ps:ProjectPropertySet)-[:HAS_CHILD]->(n:ProjectProperty {name: '工程类型'}) RETURN n.value AS type_value LIMIT 1 """ result = session.run(query_type) record = result.single() if record and record["type_value"]: type_value = record["type_value"] # 查找映射字典 if type_value in project_type_mapping: return project_type_mapping[type_value] else: # 如果字典中没有该值,可以打印警告或返回默认值 print(f"Warning: '工程类型' 值 '{type_value}' 未在 project_type_mapping 中定义,使用默认值 '预算工程'") # 您也可以选择抛出异常或返回 None # 如果两种方式都找不到,尝试旧的兼容查询(可选保留) old_query = """ MATCH (n:ProjectProperty {name: '执行规范'}) RETURN n.value AS standard LIMIT 1 """ result = session.run(old_query) record = result.single() if record and record["standard"]: standard = record["standard"] if "清单" in standard: return "清单工程" elif "预规" in standard: return "预算工程" # 最终默认返回 return "预算工程" def create_excel_writer(project_name: str = "") -> tuple: """创建Excel写入器并返回写入器和文件路径""" if project_name: output_file = f"{project_name}.xlsx" else: timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") output_file = f"knowledge_graph_export_{timestamp}.xlsx" # 确保目录存在 - 只有当output_file包含路径时才创建目录 output_dir = os.path.dirname(output_file) if output_dir: # 只有当路径不为空时才创建目录 os.makedirs(output_dir, exist_ok=True) writer = pd.ExcelWriter(output_file, engine="openpyxl") return writer, output_file def custom_sort_key(sortid: str) -> tuple: """ 为 sortid 创建排序键,支持 '1', '1.1', '1.1.1' 格式 """ if not sortid: return (float("inf"),) # 空值排最后 try: return tuple(int(x) for x in sortid.split(".")) except ValueError: # 如果无法转为数字,则按字符串排序(兜底) return tuple(float("inf") for _ in sortid.split(".")) # ------------------------ 项目划分导出函数 ------------------------ def fetch_project_division_items(driver, project_type: str) -> List[Dict[str, Any]]: """ 获取ProjectDivisionItem节点数据,根据工程类型调整查询 project_type: 工程类型,"清单工程"或"预算工程" """ print(f"开始获取项目划分数据,工程类型: {project_type}") if project_type == "清单工程": # 清单工程需要获取ProjectDivisionItem和List两种节点 # 修改查询,确保正确获取List节点,并且只获取当前工程的节点 query = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD]->(pds:ProjectDivisionSet)-[:HAS_CHILD]->(pdt:ProjectDivisionTree)-[:HAS_CHILD*0..]->(n) WHERE n:ProjectDivisionItem OR n:List RETURN n.sortid as sortid, n.name as name, labels(n) as labels, properties(n) as properties """ else: # 预算工程只获取ProjectDivisionItem节点,并且只获取当前工程的节点 query = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD]->(pds:ProjectDivisionSet)-[:HAS_CHILD]->(pdt:ProjectDivisionTree)-[:HAS_CHILD*0..]->(n:ProjectDivisionItem) RETURN n.sortid as sortid, n.name as name, labels(n) as labels, properties(n) as properties """ print(f"执行查询: {query}") results = [] node_count = 0 list_count = 0 pdi_count = 0 with driver.session() as session: result = session.run(query) for record in result: node_count += 1 props = record["properties"] # --- 只移除已单独提取的字段 --- props.pop("sortid", None) props.pop("name", None) # ✅ 不再 pop("序号", None) —— 保留它! # 获取节点标签 labels = record["labels"] label_str = ",".join(labels) if labels else "" # 检查是否有必要的字段 sortid = record["sortid"] name = record["name"] if not sortid or not name: print(f"警告: 跳过缺少必要字段的节点,标签: {label_str}") continue item = { "sortid": sortid, "name": name, "节点类型": label_str, # 添加节点类型信息 **props, } # 包含 "序号" 在内所有其他属性 # 统计不同类型的节点 if "List" in label_str: list_count += 1 elif "ProjectDivisionItem" in label_str: pdi_count += 1 results.append(item) # 输出调试信息 if node_count <= 10 or node_count % 100 == 0: print(f"找到节点: 类型={label_str}, name={name}, sortid={sortid}") print(f"共查询到 {node_count} 个节点,处理后保留 {len(results)} 个节点") print(f"其中 List 类型节点: {list_count} 个, ProjectDivisionItem 类型节点: {pdi_count} 个") # 重要修改:不再按专业类型分组,而是直接返回所有节点 if project_type == "清单工程": # 对于清单工程,直接按sortid排序所有节点 results.sort(key=lambda x: custom_sort_key(str(x["sortid"]))) return results else: # 对于预算工程,保持原有的分组排序逻辑 # === 排序逻辑:先按 '专业类型' 分组,再按 sortid 排序 === group_key = "专业类型" if not results: return [] # 获取所有唯一的"专业类型"值,并排序(按字母顺序或自定义) unique_groups = sorted(set(item.get(group_key, "") for item in results)) print(f"找到的专业类型: {unique_groups}") sorted_results = [] for group in unique_groups: group_items = [item for item in results if item.get(group_key) == group] # ✅ 排序只依赖 sortid,不依赖"序号" group_items.sort(key=lambda x: custom_sort_key(str(x["sortid"]))) sorted_results.extend(group_items) return sorted_results def export_project_division(writer: pd.ExcelWriter, driver, project_type: str) -> None: """ 导出项目划分工作簿(不显示 sortid 列) project_type: 工程类型,"清单工程"或"预算工程" """ print(f"正在导出项目划分数据({project_type})...") data = fetch_project_division_items(driver, project_type) print(f"从fetch_project_division_items获取到 {len(data)} 条数据") if not data: print("警告:未获取到项目划分数据,将创建空表。") df = pd.DataFrame(columns=["序号", "name", "代码", "编码", "节点类型"]) else: # 输出前10条数据的节点类型,用于调试 for i, item in enumerate(data[:10]): print(f"数据{i+1}: 类型={item.get('节点类型', 'N/A')}, name={item.get('name', 'N/A')}") # 创建完整的DataFrame df = pd.DataFrame(data) print(f"创建DataFrame后有 {len(df)} 行") # 检查不同类型的节点数量 list_nodes = df[df["节点类型"].str.contains("List", na=False)] pdi_nodes = df[df["节点类型"].str.contains("ProjectDivisionItem", na=False)] print(f"其中List类型节点有 {len(list_nodes)} 个,ProjectDivisionItem类型节点有 {len(pdi_nodes)} 个") # === 定义希望显示的固定列 === fixed_cols = ["序号", "name", "代码", "编码", "节点类型"] # === 排除的列(如 sortid)=== excluded_cols = {"sortid"} # 获取其他列,但排除 sortid other_cols = [col for col in df.columns if col not in fixed_cols and col not in excluded_cols] # 构建最终列顺序 cols = fixed_cols + other_cols # 确保只保留实际存在的列 cols = [col for col in cols if col in df.columns] df = df[cols] print(f"最终DataFrame有 {len(df)} 行") # 导出所有节点到"项目划分"工作簿 df.to_excel(writer, sheet_name="项目划分_清单", index=False) print(f"✅ 已导出 {len(df)} 条项目划分数据") print(f"📊 表头列:{df.columns.tolist()}") # 如果是清单工程,创建单独的工作簿 if project_type == "清单工程" and len(data) > 0: # 1. 只导出List节点到"清单节点"工作簿 df_list = pd.DataFrame([item for item in data if "List" in item.get("节点类型", "")]) if len(df_list) > 0: # 排除sortid列 if "sortid" in df_list.columns: df_list = df_list.drop(columns=["sortid"]) df_list.to_excel(writer, sheet_name="清单", index=False) print(f"✅ 额外导出 {len(df_list)} 条清单节点数据到'清单节点'工作簿") # 2. 只导出ProjectDivisionItem节点到"项目划分节点"工作簿 df_pdi = pd.DataFrame([item for item in data if "ProjectDivisionItem" in item.get("节点类型", "")]) if len(df_pdi) > 0: # 排除sortid列 if "sortid" in df_pdi.columns: df_pdi = df_pdi.drop(columns=["sortid"]) df_pdi.to_excel(writer, sheet_name="项目划分", index=False) print(f"✅ 额外导出 {len(df_pdi)} 条项目划分节点数据到'项目划分节点'工作簿") # ------------------------ 工程属性导出函数 ------------------------ def fetch_project_properties(driver) -> List[Dict[str, Any]]: """获取ProjectProperty节点数据(包含 sortid)""" query = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD]->(ps:ProjectPropertySet)-[:HAS_CHILD*0..]->(n:ProjectProperty) RETURN n.sortid as sortid, n.name as name, n.value as value ORDER BY n.sortid // 可选:在数据库层初步排序 """ results = [] with driver.session() as session: result = session.run(query) for record in result: results.append({"sortid": record["sortid"], "name": record["name"], "value": record["value"]}) # 如果没有找到结果,尝试使用旧的查询方式(向后兼容) if not results: old_query = """ MATCH (n:ProjectProperty) RETURN n.sortid as sortid, n.name as name, n.value as value ORDER BY n.sortid """ result = session.run(old_query) for record in result: results.append({"sortid": record["sortid"], "name": record["name"], "value": record["value"]}) # 在 Python 中按 sortid 排序(确保复杂格式正确) results.sort(key=lambda x: custom_sort_key(str(x["sortid"]))) return results def export_project_properties(writer: pd.ExcelWriter, driver) -> None: """导出工程属性工作簿""" print("正在导出工程属性数据...") data = fetch_project_properties(driver) if not data: print("警告:未获取到工程属性数据,将创建空表。") df = pd.DataFrame(columns=["sortid", "name", "value"]) else: df = pd.DataFrame(data) # 确保列顺序 df = df[["name", "value"]] df.to_excel(writer, sheet_name="工程属性", index=False) print(f"已导出 {len(df)} 条工程属性数据") # ------------------------ 工程量导出函数 ------------------------ def fetch_project_quantities(driver) -> List[Dict[str, Any]]: """获取ProjectQuantity节点数据""" query = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD]->(pds:ProjectDivisionSet)-[:HAS_CHILD]->(pdt:ProjectDivisionTree)-[:HAS_CHILD*0..]->(n:ProjectQuantity) OPTIONAL MATCH (parent)-[:HAS_CHILD]->(n) RETURN n.name as name, n.sortid as sortid, COALESCE(parent.GUID, parent.guid, '') AS parent_id, // 优先取 GUID,其次 guid,都无则为空字符串 properties(n) as properties """ results = [] with driver.session() as session: result = session.run(query) for record in result: props = record["properties"] props.pop("name", None) props.pop("sortid", None) results.append( { "name": record["name"], "sortid": record["sortid"], "父级GUID": record["parent_id"], **props, } ) # 如果没有找到结果,尝试使用旧的查询方式(向后兼容) if not results: old_query = """ MATCH (n:ProjectQuantity) OPTIONAL MATCH (parent)-[:HAS_CHILD]->(n) RETURN n.name as name, n.sortid as sortid, COALESCE(parent.GUID, parent.guid, '') AS parent_id, properties(n) as properties """ result = session.run(old_query) for record in result: props = record["properties"] props.pop("name", None) props.pop("sortid", None) results.append( { "name": record["name"], "sortid": record["sortid"], "父级GUID": record["parent_id"], **props, } ) # 首先按父级GUID排序,再按当前节点的sortid排序 results.sort( key=lambda x: ( custom_sort_key(str(x["父级GUID"]) if x["父级GUID"] else ""), custom_sort_key(str(x["sortid"])), ) ) return results def export_project_quantities(writer: pd.ExcelWriter, driver) -> None: """导出工程量工作簿(不显示 sortid 列)""" print("正在导出工程量数据...") data = fetch_project_quantities(driver) if not data: print("警告:未获取到工程量数据,将创建空表。") # 创建空表时也不包含 sortid fixed_cols = ["父级GUID", "特征段", "name", "规格型号", "单位", "计算式", "数量"] df = pd.DataFrame(columns=fixed_cols) else: df = pd.DataFrame(data) # === 定义希望显示的固定列(按期望顺序)=== fixed_cols = [ "父级GUID", "特征段", "name", "规格型号", "单位", "计算式", "数量", ] # === 排除的列(如不需要显示的字段)=== excluded_cols = {"sortid"} # 可以添加更多如 "id", "临时字段" 等 # === 获取其余列:在 df.columns 中但不在 fixed_cols 且不在 excluded_cols === other_cols = [col for col in df.columns if col not in fixed_cols and col not in excluded_cols] # === 构建最终列顺序 === cols = fixed_cols + other_cols # 确保只保留实际存在的列 cols = [col for col in cols if col in df.columns] df = df[cols] # 导出到 Excel df.to_excel(writer, sheet_name="工程量", index=False) print(f"✅ 已导出 {len(df)} 条工程量数据") print(f"📊 表头列:{df.columns.tolist()}") # ------------------------ 人材机导出函数 ------------------------ def fetch_materials_equipments(driver) -> List[Dict[str, Any]]: """获取MaterialOrEquipment节点数据""" query = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD]->(pds:ProjectDivisionSet)-[:HAS_CHILD]->(pdt:ProjectDivisionTree)-[:HAS_CHILD*0..]->(pq:ProjectQuantity)-[:HAS_CHILD]->(n:MaterialOrEquipment) OPTIONAL MATCH (parent)-[:HAS_CHILD]->(n) RETURN n.id as id, n.name as name, n.sortid as sortid, COALESCE(parent.GUID, parent.guid, '') AS parent_id, properties(n) as properties """ results = [] with driver.session() as session: result = session.run(query) for record in result: props = record["properties"] props.pop("id", None) props.pop("name", None) props.pop("sortid", None) results.append( { "id": record["id"], "name": record["name"], "sortid": record["sortid"], "父级GUID": record["parent_id"] if record["parent_id"] else "", **props, } ) # 如果没有找到结果,尝试使用旧的查询方式(向后兼容) if not results: old_query = """ MATCH (n:MaterialOrEquipment) OPTIONAL MATCH (parent)-[:HAS_CHILD]->(n) RETURN n.id as id, n.name as name, n.sortid as sortid, COALESCE(parent.GUID, parent.guid, '') AS parent_id, properties(n) as properties """ result = session.run(old_query) for record in result: props = record["properties"] props.pop("id", None) props.pop("name", None) props.pop("sortid", None) results.append( { "id": record["id"], "name": record["name"], "sortid": record["sortid"], "父级GUID": record["parent_id"] if record["parent_id"] else "", **props, } ) # 首先按父级GUID排序,再按当前节点的sortid排序 results.sort(key=lambda x: (custom_sort_key(str(x["父级GUID"])), custom_sort_key(str(x["sortid"])))) return results def export_materials_equipments(writer: pd.ExcelWriter, driver) -> None: """导出入材机工作簿""" print("正在导出入材机数据...") data = fetch_materials_equipments(driver) if not data: print("警告:未获取到入材机数据,将创建空表。") df = pd.DataFrame(columns=["父级GUID", "sortid", "id", "name"]) else: df = pd.DataFrame(data) # 父级GUID, sortid, id, name, 其他属性 cols = ["父级GUID", "id", "name"] + [ col for col in df.columns if col not in ["父级GUID", "sortid", "id", "name"] ] df = df[cols] df.to_excel(writer, sheet_name="人材机", index=False) print(f"已导出 {len(df)} 条人材机数据") # ------------------------ 费用预览导出函数 ------------------------ def fetch_cost_set_items(driver, parent_label: str, project_type: str) -> tuple: """ 获取 CostSet 节点数据,正确提取父节点的中文字段 project_type: 工程类型,"清单工程"或"预算工程" """ if project_type == "清单工程": if parent_label == "ProjectDivisionItem": # 清单工程需要考虑ProjectDivisionItem和List两种节点 child_node = "(child) WHERE child:ProjectDivisionItem OR child:List" parent_node = "(parent) WHERE parent:ProjectDivisionItem OR parent:List" extra_return = """ child.专业类型 AS parent_specialty, child.取费表 AS parent_fee_table, child.序号 AS parent_xuhao, labels(child) AS child_labels """ parent_cols = ["专业类型", "取费表", "序号", "节点类型"] elif parent_label == "ProjectQuantity": child_node = "(child:ProjectQuantity)" parent_node = "(parent:ProjectQuantity)" extra_return = """ child.编码 AS parent_code, labels(child) AS child_labels """ parent_cols = ["编码", "节点类型"] else: raise ValueError(f"不支持的父节点类型: {parent_label}") else: # 预算工程的原有逻辑 if parent_label == "ProjectDivisionItem": child_node = "(child:ProjectDivisionItem)" parent_node = "(parent:ProjectDivisionItem)" extra_return = """ child.专业类型 AS parent_specialty, child.取费表 AS parent_fee_table, child.序号 AS parent_xuhao """ parent_cols = ["专业类型", "取费表", "序号"] elif parent_label == "ProjectQuantity": child_node = "(child:ProjectQuantity)" parent_node = "(parent:ProjectQuantity)" extra_return = """ child.编码 AS parent_code """ parent_cols = ["编码"] else: raise ValueError(f"不支持的父节点类型: {parent_label}") # ✅ 关键:MATCH 父子关系,并提取字段,添加对current=true的过滤 query = f""" MATCH (ed:EngineeringData {{current: true}})-[:HAS_CHILD*0..]->{child_node} OPTIONAL MATCH (parent)-[:HAS_CHILD]->(child) OPTIONAL MATCH (child)-[:USE]->(costSet:CostSet) OPTIONAL MATCH (costSet)-[:HAS_CHILD]->(costItem:CostItem) RETURN child.name AS child_name, COALESCE(costSet.GUID, toString(id(child))) AS guid, COALESCE(costSet.sortid, '999999') AS cost_sortid, {extra_return.strip()}, costItem.name AS cost_item_name, costItem.cost AS cost_value ORDER BY COALESCE(costSet.sortid, '999999') """ results = {} all_cost_names = [] with driver.session() as session: result = session.run(query) for record in result: child_name = record["child_name"] guid = record["guid"] cost_sortid = record["cost_sortid"] if guid not in results: row = {"sortid": cost_sortid, "GUID": guid, "工程名称": child_name} # ✅ 使用 record 中的字段名(来自 extra_return 的别名) if parent_label == "ProjectDivisionItem": row["专业类型"] = record["parent_specialty"] row["取费表"] = record["parent_fee_table"] row["序号"] = record["parent_xuhao"] if project_type == "清单工程": row["节点类型"] = ",".join(record["child_labels"]) if record["child_labels"] else "" elif parent_label == "ProjectQuantity": row["编码"] = record["parent_code"] if project_type == "清单工程": row["节点类型"] = ",".join(record["child_labels"]) if record["child_labels"] else "" results[guid] = row raw_name = record["cost_item_name"] if raw_name is not None: cost_name = raw_name.split("_")[0] if "_" in raw_name else raw_name if cost_name not in all_cost_names: all_cost_names.append(cost_name) results[guid][cost_name] = record["cost_value"] # 如果没有找到结果,尝试使用旧的查询方式(向后兼容) if not results: old_query = f""" MATCH {child_node} OPTIONAL MATCH (parent)-[:HAS_CHILD]->(child) OPTIONAL MATCH (child)-[:USE]->(costSet:CostSet) OPTIONAL MATCH (costSet)-[:HAS_CHILD]->(costItem:CostItem) RETURN child.name AS child_name, COALESCE(costSet.GUID, toString(id(child))) AS guid, COALESCE(costSet.sortid, '999999') AS cost_sortid, {extra_return.strip()}, costItem.name AS cost_item_name, costItem.cost AS cost_value ORDER BY COALESCE(costSet.sortid, '999999') """ result = session.run(old_query) for record in result: child_name = record["child_name"] guid = record["guid"] cost_sortid = record["cost_sortid"] if guid not in results: row = {"sortid": cost_sortid, "GUID": guid, "工程名称": child_name} # ✅ 使用 record 中的字段名(来自 extra_return 的别名) if parent_label == "ProjectDivisionItem": row["专业类型"] = record["parent_specialty"] row["取费表"] = record["parent_fee_table"] row["序号"] = record["parent_xuhao"] if project_type == "清单工程": row["节点类型"] = ",".join(record["child_labels"]) if record["child_labels"] else "" elif parent_label == "ProjectQuantity": row["编码"] = record["parent_code"] if project_type == "清单工程": row["节点类型"] = ",".join(record["child_labels"]) if record["child_labels"] else "" results[guid] = row raw_name = record["cost_item_name"] if raw_name is not None: cost_name = raw_name.split("_")[0] if "_" in raw_name else raw_name if cost_name not in all_cost_names: all_cost_names.append(cost_name) results[guid][cost_name] = record["cost_value"] # 排序 data = list(results.values()) data.sort(key=lambda x: custom_sort_key(str(x["sortid"]))) # 构建列顺序 base_cols = ["sortid", "GUID", "工程名称"] if parent_label == "ProjectDivisionItem": base_cols += ["专业类型", "取费表", "序号"] if project_type == "清单工程": base_cols += ["节点类型"] elif parent_label == "ProjectQuantity": base_cols += ["编码"] if project_type == "清单工程": base_cols += ["节点类型"] final_columns = base_cols + all_cost_names return data, final_columns def export_cost_set_preview( writer: pd.ExcelWriter, driver, parent_label: str, sheet_name: str, project_type: str ) -> None: """ 导出费用预览工作簿(动态列名):按 sortid 排序,但不显示 sortid 列 project_type: 工程类型,"清单工程"或"预算工程" """ print(f"正在导出 {sheet_name} 数据({project_type})...") try: data, column_order = fetch_cost_set_items(driver, parent_label, project_type) except Exception as e: print(f"获取数据失败: {e}") return if not data: print(f"警告: 没有找到 {sheet_name} 相关数据") # 构建空表,排除 sortid cols_without_sortid = [col for col in column_order if col != "sortid"] df = pd.DataFrame(columns=cols_without_sortid) else: df = pd.DataFrame(data) # 补全缺失列 for col in column_order: if col not in df.columns: df[col] = None # 按原始顺序排列(包含 sortid) df = df[column_order] # === 关键:移除 sortid 列 === df = df.drop(columns=["sortid"]) # 或使用:df = df[[col for col in df.columns if col != "sortid"]] df.to_excel(writer, sheet_name=sheet_name, index=False) print(f"✅ 已导出 {len(df)} 条 {sheet_name} 数据") print(f"📊 列顺序: {df.columns.tolist()}") # ------------------------ 材机分析导出函数 ------------------------ def fetch_material_machine_items(driver) -> tuple: """获取MaterialandmachineCostItem节点数据(修正版:避免去重)""" query = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD*0..]->(parent:ProjectDivisionItem)-[:USE]->(costSet:CostSet)-[:HAS_CHILD]->(item:MaterialandmachineCostItem) RETURN costSet.GUID AS guid, costSet.sortid AS sortid, parent.name AS parent_name, properties(item) AS properties ORDER BY costSet.sortid """ results = [] all_columns = set() column_order = [] # 保持字段首次出现顺序 with driver.session() as session: result = session.run(query) for record in result: guid = record["guid"] sortid = record["sortid"] parent_name = record["parent_name"] props = record["properties"] # 构造一行数据 row = {"sortid": sortid, "GUID": guid, "项目划分名称": parent_name} # 添加 properties 中的每个字段 for key, value in props.items(): if key not in all_columns: all_columns.add(key) column_order.append(key) row[key] = value results.append(row) # 如果没有找到结果,尝试使用旧的查询方式(向后兼容) if not results: old_query = """ MATCH (parent:ProjectDivisionItem)-[:USE]->(costSet:CostSet)-[:HAS_CHILD]->(item:MaterialandmachineCostItem) RETURN costSet.GUID AS guid, costSet.sortid AS sortid, parent.name AS parent_name, properties(item) AS properties ORDER BY costSet.sortid """ result = session.run(old_query) for record in result: guid = record["guid"] sortid = record["sortid"] parent_name = record["parent_name"] props = record["properties"] # 构造一行数据 row = {"sortid": sortid, "GUID": guid, "项目划分名称": parent_name} # 添加 properties 中的每个字段 for key, value in props.items(): if key not in all_columns: all_columns.add(key) column_order.append(key) row[key] = value results.append(row) # === 关键修改:复合排序键 === # 先按 GUID 排序,再在每组内按 sortid 排序(使用 custom_sort_key) results.sort( key=lambda x: (x["GUID"], custom_sort_key(str(x["sortid"]))) # 先按 GUID 字符串排序 # 再按 sortid 的自定义顺序 ) # 最终列顺序 final_columns = ["sortid", "GUID", "项目划分名称"] + column_order return results, final_columns def export_material_machine_analysis(writer: pd.ExcelWriter, driver) -> None: """导出材机分析工作簿(修正版):按 sortid 排序,但不显示 sortid 列""" print("正在导出材机分析数据...") try: data, column_order = fetch_material_machine_items(driver) except Exception as e: print(f"获取材机分析数据失败: {e}") return if not data: print("警告: 没有找到材机分析相关数据") # 创建空表,也不包含 sortid df = pd.DataFrame(columns=[col for col in column_order if col != "sortid"]) else: df = pd.DataFrame(data) # 确保所有列都存在(防止某些行缺少字段) for col in column_order: if col not in df.columns: df[col] = None # === 关键:移除 sortid 列(但排序已生效)=== cols_to_keep = [col for col in column_order if col != "sortid"] # 重新排序并过滤列 df = df[cols_to_keep] df.to_excel(writer, sheet_name="材机分析", index=False) print(f"✅ 已导出 {len(df)} 条材机分析数据,列顺序: {df.columns.tolist()}") # ------------------------ 取费表导出函数 ------------------------ def fetch_all_fee_collections(driver) -> List[Dict[str, Any]]: """递归获取所有FeeCollection节点数据,保留原始属性名(英文),加入 template_name 和 sortid""" query = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD]->(templateSet:FeeTableTemplateSet)-[:HAS_CHILD]->(templateItem:FeeTableTemplateItem) OPTIONAL MATCH path=(templateItem)-[:HAS_CHILD*]->(feeCollection:FeeCollection) WHERE ALL(r in relationships(path) WHERE type(r) = 'HAS_CHILD') RETURN templateItem.name as template_name, feeCollection, length(path) as depth ORDER BY template_name, depth """ results = [] with driver.session() as session: result = session.run(query) for record in result: fee_collection = record["feeCollection"] if fee_collection is None: continue # 节点属性转为字典(原始英文属性名) props = dict(fee_collection.items()) # 添加额外字段(template_name 和 depth),也用英文 row = { "template_name": record["template_name"], # 原始英文字段 "sortid": props.get("sortid"), # 用于排序和展示 "depth": record["depth"], # 用于分组排序,导出前会删除 } # 合并节点原始属性(保留英文名) row.update(props) results.append(row) # 如果没有找到结果,尝试使用旧的查询方式(向后兼容) if not results: old_query = """ MATCH (templateSet:FeeTableTemplateSet)-[:HAS_CHILD]->(templateItem:FeeTableTemplateItem) OPTIONAL MATCH path=(templateItem)-[:HAS_CHILD*]->(feeCollection:FeeCollection) WHERE ALL(r in relationships(path) WHERE type(r) = 'HAS_CHILD') RETURN templateItem.name as template_name, feeCollection, length(path) as depth ORDER BY template_name, depth """ result = session.run(old_query) for record in result: fee_collection = record["feeCollection"] if fee_collection is None: continue # 节点属性转为字典(原始英文属性名) props = dict(fee_collection.items()) # 添加额外字段(template_name 和 depth),也用英文 row = { "template_name": record["template_name"], # 原始英文字段 "sortid": props.get("sortid"), # 用于排序和展示 "depth": record["depth"], # 用于分组排序,导出前会删除 } # 合并节点原始属性(保留英文名) row.update(props) results.append(row) return results def export_fee_table(writer, driver) -> None: """导出取费表工作簿:按 sortid 排序,不显示 sortid,固定 name、序号、代码 三列在前""" print("正在导出取费表数据...") data = fetch_all_fee_collections(driver) if not data: print("警告: 没有找到取费表相关数据") # 创建空 DataFrame,包含固定列 df = pd.DataFrame(columns=["序号", "name", "代码"]) else: df = pd.DataFrame(data) # === 确保 sortid 存在,用于排序 === if "sortid" not in df.columns: df["sortid"] = "" # 生成排序键 df["sort_key"] = df["sortid"].apply(custom_sort_key) # 按 template_name 分组,组内按 sort_key 排序 if "template_name" in df.columns: df = df.sort_values(by=["template_name", "sort_key"]).reset_index(drop=True) else: df = df.sort_values(by=["sort_key"]).reset_index(drop=True) # 删除辅助列 df = df.drop(["sort_key", "depth"], axis=1, errors="ignore") # === 固定前置列 === fixed_front_cols = ["序号", "name", "代码"] # 补全缺失的固定列(避免 KeyError) for col in fixed_front_cols: if col not in df.columns: df[col] = None # 构建最终列顺序:固定列 + 其他非 sortid 列 remaining_cols = [col for col in df.columns if col not in fixed_front_cols and col != "sortid"] final_columns = fixed_front_cols + remaining_cols df = df[final_columns] # 导出到 Excel df.to_excel(writer, sheet_name="取费表", index=False) # 安全获取 template_name 数量 num_templates = df["template_name"].nunique() if not df.empty and "template_name" in df.columns else 0 print(f"✅ 已导出 {len(df)} 条取费表数据,来自 {num_templates} 个取费表") print(f"📊 表头顺序: {df.columns.tolist()}") # ------------------------ 工程费用导出函数 ------------------------ def fetch_fee_schedule_items(driver) -> List[Dict[str, str]]: """获取所有FeeScheduleItem节点""" query = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD]->(feeScheduleSet:FeeScheduleSet)-[:HAS_CHILD]->(feeScheduleItem:FeeScheduleItem) RETURN feeScheduleItem.name as name """ results = [] with driver.session() as session: result = session.run(query) for record in result: results.append({"name": record["name"]}) # 如果没有找到结果,尝试使用旧的查询方式(向后兼容) if not results: old_query = """ MATCH (feeScheduleSet:FeeScheduleSet)-[:HAS_CHILD]->(feeScheduleItem:FeeScheduleItem) RETURN feeScheduleItem.name as name """ result = session.run(old_query) for record in result: results.append({"name": record["name"]}) return results def fetch_fees_by_schedule_item(driver, schedule_item_name: str) -> List[Dict[str, Any]]: """获取指定FeeScheduleItem下的Fee节点数据,动态提取所有属性""" query = """ MATCH (ed:EngineeringData {current: true})-[:HAS_CHILD]->(feeScheduleSet:FeeScheduleSet)-[:HAS_CHILD]->(feeScheduleItem:FeeScheduleItem {name: $schedule_item_name}) MATCH path=(feeScheduleItem)-[:HAS_CHILD*]->(fee:Fee) WHERE ALL(r in relationships(path) WHERE type(r) = 'HAS_CHILD') RETURN fee.sortid as sortid, fee // 返回整个节点 """ results = [] with driver.session() as session: result = session.run(query, schedule_item_name=schedule_item_name) for record in result: fee_node = record["fee"] props = dict(fee_node.items()) if fee_node is not None else {} row = {"sortid": record["sortid"]} row.update(props) results.append(row) # 如果没有找到结果,尝试使用旧的查询方式(向后兼容) if not results: old_query = """ MATCH (feeScheduleSet:FeeScheduleSet)-[:HAS_CHILD]->(feeScheduleItem:FeeScheduleItem {name: $schedule_item_name}) MATCH path=(feeScheduleItem)-[:HAS_CHILD*]->(fee:Fee) WHERE ALL(r in relationships(path) WHERE type(r) = 'HAS_CHILD') RETURN fee.sortid as sortid, fee // 返回整个节点 """ result = session.run(old_query, schedule_item_name=schedule_item_name) for record in result: fee_node = record["fee"] props = dict(fee_node.items()) if fee_node is not None else {} row = {"sortid": record["sortid"]} row.update(props) results.append(row) return results def export_fee_schedules(writer: pd.ExcelWriter, driver) -> None: """导出费用计划工作簿:按 sortid 排序,不显示 sortid,固定 name、序号、代码 三列在前""" print("正在导出费用计划数据...") schedule_items = fetch_fee_schedule_items(driver) if not schedule_items: print("警告: 没有找到费用计划相关数据") return # === 固定前置列 === fixed_front_cols = ["序号", "name", "代码"] for item in schedule_items: schedule_item_name = item["name"] print(f"正在导出 '{schedule_item_name}' 费用计划...") # 获取该 FeeScheduleItem 下的所有 Fee 节点(动态属性) fees = fetch_fees_by_schedule_item(driver, schedule_item_name) if not fees: print(f"警告: 没有找到 '{schedule_item_name}' 相关的费用数据") # 创建空 DataFrame,只包含固定列 df = pd.DataFrame(columns=fixed_front_cols) else: df = pd.DataFrame(fees) # === 排序:使用 sortid 排序(关键:排序后才移除)=== if "sortid" in df.columns: df["sort_key"] = df["sortid"].apply(custom_sort_key) df = df.sort_values(by=["sort_key"]).reset_index(drop=True) df = df.drop(["sort_key"], axis=1) # === 构建列顺序:name、序号、代码 在前,其余非 sortid 列在后 === remaining_cols = [col for col in df.columns if col not in fixed_front_cols and col != "sortid"] # 确保 fixed_front_cols 中存在的列才保留(避免 KeyError) cols_in_df = [col for col in fixed_front_cols if col in df.columns] final_columns = cols_in_df + remaining_cols df = df[final_columns] # sheet 名称限制(最大 31 字符) sheet_name = schedule_item_name if len(sheet_name) > 31: sheet_name = sheet_name[:28] + "..." df.to_excel(writer, sheet_name=sheet_name, index=False) print(f"✅ 已导出 {len(df)} 条 '{schedule_item_name}' 费用计划数据") print(f"📊 表头顺序: {df.columns.tolist()}") # ------------------------ 主函数 ------------------------ def export_knowledge_graph(output_dir: str = "", software_name: str = "") -> str: """ 主导出函数 参数: output_dir: 输出目录路径 software_name: 软件名称,优先用作文件名 返回: str: 导出文件的完整路径 """ # 加载配置并创建驱动 config = load_neo4j_config() driver = GraphDatabase.driver(config["uri"], auth=(config["user"], config["password"])) try: # 获取工程类型(清单工程或预算工程) project_type = get_project_type(driver) print(f"检测到工程类型: {project_type}") # 如果提供了软件名称,优先使用软件名称 if software_name: project_name = software_name print(f"使用提供的软件名称作为文件名: {project_name}") else: # 获取工程名称 project_name = get_project_name(driver) if not project_name: print("警告:未找到工程名称,将使用默认文件名") # 如果有指定输出目录,添加到文件名前 if output_dir: if not project_name: project_name = "knowledge_graph_export" project_name = os.path.join(output_dir, project_name) # 创建Excel写入器和获取文件路径 writer, output_path = create_excel_writer(project_name) # 导出项目划分工作簿(根据工程类型) export_project_division(writer, driver, project_type) # 导出工程量工作簿 export_project_properties(writer, driver) # 导出工程量工作簿 export_project_quantities(writer, driver) # 导出人材机工作簿 export_materials_equipments(writer, driver) # 导出费用预览工作簿(根据工程类型) export_cost_set_preview(writer, driver, "ProjectDivisionItem", "项目划分_费用预览", project_type) export_cost_set_preview(writer, driver, "ProjectQuantity", "工程量_费用预览", project_type) # 导出材机分析工作簿 export_material_machine_analysis(writer, driver) # 导出取费表工作簿 export_fee_table(writer, driver) # 导出费用计划工作簿 export_fee_schedules(writer, driver) # 保存Excel文件 writer.close() print(f"导出完成,文件已保存到: {os.path.abspath(output_path)}") return output_path finally: driver.close() # if __name__ == "__main__": # # 执行导出,可以指定输出目录 # export_knowledge_graph() # 默认当前目录