from utils import classify_files from utils import classify_files input_path = input("请输入要分类的文件夹路径,例如 ./***:") classify_files(input_path) import json # 预设JSON框架 json_framework = { "basicData": { "阶段类型": "", "插件名称": "", "地区插件": "", "执行规范": "", "软件名称": "", "工程版本": "", "工程类型": "", "工程类别": "", "工程文件预览信息": "" }, "currentStage": "", "division": "", "errorCode": "", "errorMsg": "", "errorStatus": False, "fileName": "", "fileType": "", "finish": False, "projectData": { "overvoltage": {}, "reportTab": [], "线路特征段": {}, "projectInfo": {}, "quantityAccuracy": [], "expensePreview": [ ], "costSetting": {}, "projectCost": {}, "reportMenu": {}, "report": {}, "projectDivision": {}, "technicalEconomic": [] }, "projectId": "", "stageStatus": "" } ###########################取费表################################# # projectData => costSetting from utils import list_target_xmls from utils import filter_files_by_display_names from utils import name_TypeList_info from utils import xml_to_json, process_tr_element, xml_file_to_json files_list1 = list_target_xmls("./xml_data", recursive=False, fullpath=True, keyword="取费表") # 调用函数 xml_file_path1 = './xml_data/取费表分类信息.xml' filtered_files1, display_names1 = filter_files_by_display_names(xml_file_path1, files_list1, prefix="取费表_") json_framework["projectData"]["costSetting"] = dict.fromkeys(display_names1, None) for key in json_framework["projectData"]["costSetting"]: json_framework["projectData"]["costSetting"][key] = { "name": "", "TypeList": "", "tables": None } for ipath in filtered_files1: info_words = name_TypeList_info(ipath) json_framework["projectData"]["costSetting"][info_words]['name'] = info_words json_framework["projectData"]["costSetting"][info_words]['TypeList'] = info_words result = xml_file_to_json(ipath) json_framework["projectData"]["costSetting"][info_words]['tables'] = result print("写入取费表信息") with open("json_framework.json", "w", encoding="utf-8") as f: json.dump(json_framework, f, ensure_ascii=False, indent=4) ###########################费用表################################# # projectData => projectCost from utils import list_target_xmls from utils import filter_files_by_display_names from utils import xml_to_json, process_tr_element, xml_file_to_json, extract_table_name_from_xml_file files_list2 = list_target_xmls("./xml_data", recursive=False, fullpath=True, keyword="费用表") # 调用函数 xml_file_path2 = './xml_data/费用表分类信息.xml' filtered_files2, display_names2 = filter_files_by_display_names(xml_file_path2, files_list2, prefix="费用表_") json_framework["projectData"]["projectCost"] = dict.fromkeys(display_names2, None) from utils import xml_to_json, process_tr_element, xml_file_to_json, extract_table_name_from_xml_file for ipath in filtered_files2: result = xml_file_to_json(ipath) keysword = extract_table_name_from_xml_file(ipath) json_framework["projectData"]["projectCost"][keysword] = result print("写入费用表信息") with open("json_framework.json", "w", encoding="utf-8") as f: json.dump(json_framework, f, ensure_ascii=False, indent=4) ###########################项目划分################################# # projectData => projectDivision from utils import list_target_xmls from utils import pd_get_table_names_from_xml from utils import create_mapping from utils import xml_to_json, process_tr_element, xml_file_to_json files_list3 = list_target_xmls("./xml_data", recursive=False, fullpath=True, keyword="项目划分") # display_names = [file_path.split('/')[-1].replace('.xml', '') for file_path in files_list] table_names = [pd_get_table_names_from_xml(ipath)[0] for ipath in files_list3] mapping_dict = create_mapping(table_names, files_list3) for table_name in table_names: key = f"项目划分_{table_name}" result = xml_file_to_json(mapping_dict[table_name]) json_framework["projectData"]["projectDivision"][key] = {table_name: result} print("写入项目规划信息") with open("json_framework.json", "w", encoding="utf-8") as f: json.dump(json_framework, f, ensure_ascii=False, indent=4) ###########################费用预览(项目划分_取费)################################# # projectData => expensePreview from utils import list_target_xmls files_list4 = list_target_xmls("./xml_data1.5", recursive=False, fullpath=True, keyword="项目划分") from utils import parse_costs_from_xml_file result = parse_costs_from_xml_file(files_list4[0]) for ipath in files_list4: result = parse_costs_from_xml_file(ipath) json_framework["projectData"]["expensePreview"].append(result ) print("写入费用预览信息") with open("json_framework.json", "w", encoding="utf-8") as f: json.dump(json_framework, f, ensure_ascii=False, indent=4) ###########################线路特征段################################# # projectData => 线路特征段 from utils import list_target_xmls from utils import process_xml_file from utils import extract_title_from_path files_list5 = list_target_xmls("./xml_data", recursive=False, fullpath=True, keyword="线路特征_") if files_list5: # 判断列表是否为空 for ipath in files_list5: process_xml_file(ipath, ipath) for ipath in files_list5: result = xml_file_to_json(ipath) keysword = extract_title_from_path(ipath) json_framework["projectData"]["线路特征段"][keysword] = result print("写入线路特征段信息") with open("json_framework.json", "w", encoding="utf-8") as f: json.dump(json_framework, f, ensure_ascii=False, indent=4) else: print("未找到符合条件的 '线路特征_' XML 文件,跳过线路特征段处理。") ###########################工程属性################################# # projectData => projectInfo from utils import xml_to_dict json_framework["projectData"]["projectInfo"] = xml_to_dict("./xml_data2/工程属性.xml") print("写入工程属性信息") with open("json_framework.json", "w", encoding="utf-8") as f: json.dump(json_framework, f, ensure_ascii=False, indent=4) ###########################工程量################################# # projectData => projectDivision import re import json from utils import extract_file_paths_with_access_path_safe from utils import parse_xml_to_json from utils import file_exists_simple from utils import get_by_strpath # 读取 JSON 文件 with open("./json_framework.json", "r", encoding="utf-8") as f: data = json.load(f) root_path = "/data/Z_project/langchain_lab/xml_to_json/xml_data2/" safe_results = extract_file_paths_with_access_path_safe(data) for result in safe_results: file_name = result['file_path'] file_path_dict = result['access_path'] old_path = root_path + file_name new_path = re.sub(r"\.xmd3$", ".xml", old_path, flags=re.IGNORECASE) if file_exists_simple(new_path) == 1: get_by_strpath(data, file_path_dict, parse_xml_to_json(new_path, from_file=True)) # file_path_dict = parse_xml_to_json(new_path, from_file=True) else: pass print("写入工程量信息") with open("json_framework.json", "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=2)