Files
xml_to_json/pipe.py
T
chentianrui 4459270336 上传文件
2025-10-21 11:34:54 +08:00

222 lines
7.5 KiB
Python

from utils import classify_files
from utils import classify_files
input_path = input("请输入要分类的文件夹路径,例如 ./***:")
classify_files(input_path)
import json
# 预设JSON框架
json_framework = {
"basicData": {
"阶段类型": "",
"插件名称": "",
"地区插件": "",
"执行规范": "",
"软件名称": "",
"工程版本": "",
"工程类型": "",
"工程类别": "",
"工程文件预览信息": ""
},
"currentStage": "",
"division": "",
"errorCode": "",
"errorMsg": "",
"errorStatus": False,
"fileName": "",
"fileType": "",
"finish": False,
"projectData": {
"overvoltage": {},
"reportTab": [],
"线路特征段": {},
"projectInfo": {},
"quantityAccuracy": [],
"expensePreview": [
],
"costSetting": {},
"projectCost": {},
"reportMenu": {},
"report": {},
"projectDivision": {},
"technicalEconomic": []
},
"projectId": "",
"stageStatus": ""
}
###########################取费表#################################
# projectData => costSetting
from utils import list_target_xmls
from utils import filter_files_by_display_names
from utils import name_TypeList_info
from utils import xml_to_json, process_tr_element, xml_file_to_json
files_list1 = list_target_xmls("./xml_data", recursive=False, fullpath=True, keyword="取费表")
# 调用函数
xml_file_path1 = './xml_data/取费表分类信息.xml'
filtered_files1, display_names1 = filter_files_by_display_names(xml_file_path1, files_list1, prefix="取费表_")
json_framework["projectData"]["costSetting"] = dict.fromkeys(display_names1, None)
for key in json_framework["projectData"]["costSetting"]:
json_framework["projectData"]["costSetting"][key] = {
"name": "",
"TypeList": "",
"tables": None
}
for ipath in filtered_files1:
info_words = name_TypeList_info(ipath)
json_framework["projectData"]["costSetting"][info_words]['name'] = info_words
json_framework["projectData"]["costSetting"][info_words]['TypeList'] = info_words
result = xml_file_to_json(ipath)
json_framework["projectData"]["costSetting"][info_words]['tables'] = result
print("写入取费表信息")
with open("json_framework.json", "w", encoding="utf-8") as f:
json.dump(json_framework, f, ensure_ascii=False, indent=4)
###########################费用表#################################
# projectData => projectCost
from utils import list_target_xmls
from utils import filter_files_by_display_names
from utils import xml_to_json, process_tr_element, xml_file_to_json, extract_table_name_from_xml_file
files_list2 = list_target_xmls("./xml_data", recursive=False, fullpath=True, keyword="费用表")
# 调用函数
xml_file_path2 = './xml_data/费用表分类信息.xml'
filtered_files2, display_names2 = filter_files_by_display_names(xml_file_path2, files_list2, prefix="费用表_")
json_framework["projectData"]["projectCost"] = dict.fromkeys(display_names2, None)
from utils import xml_to_json, process_tr_element, xml_file_to_json, extract_table_name_from_xml_file
for ipath in filtered_files2:
result = xml_file_to_json(ipath)
keysword = extract_table_name_from_xml_file(ipath)
json_framework["projectData"]["projectCost"][keysword] = result
print("写入费用表信息")
with open("json_framework.json", "w", encoding="utf-8") as f:
json.dump(json_framework, f, ensure_ascii=False, indent=4)
###########################项目划分#################################
# projectData => projectDivision
from utils import list_target_xmls
from utils import pd_get_table_names_from_xml
from utils import create_mapping
from utils import xml_to_json, process_tr_element, xml_file_to_json
files_list3 = list_target_xmls("./xml_data", recursive=False, fullpath=True, keyword="项目划分")
# display_names = [file_path.split('/')[-1].replace('.xml', '') for file_path in files_list]
table_names = [pd_get_table_names_from_xml(ipath)[0] for ipath in files_list3]
mapping_dict = create_mapping(table_names, files_list3)
for table_name in table_names:
key = f"项目划分_{table_name}"
result = xml_file_to_json(mapping_dict[table_name])
json_framework["projectData"]["projectDivision"][key] = {table_name: result}
print("写入项目规划信息")
with open("json_framework.json", "w", encoding="utf-8") as f:
json.dump(json_framework, f, ensure_ascii=False, indent=4)
###########################费用预览(项目划分_取费)#################################
# projectData => expensePreview
from utils import list_target_xmls
files_list4 = list_target_xmls("./xml_data1.5", recursive=False, fullpath=True, keyword="项目划分")
from utils import parse_costs_from_xml_file
result = parse_costs_from_xml_file(files_list4[0])
for ipath in files_list4:
result = parse_costs_from_xml_file(ipath)
json_framework["projectData"]["expensePreview"].append(result )
print("写入费用预览信息")
with open("json_framework.json", "w", encoding="utf-8") as f:
json.dump(json_framework, f, ensure_ascii=False, indent=4)
###########################线路特征段#################################
# projectData => 线路特征段
from utils import list_target_xmls
from utils import process_xml_file
from utils import extract_title_from_path
files_list5 = list_target_xmls("./xml_data", recursive=False, fullpath=True, keyword="线路特征_")
if files_list5: # 判断列表是否为空
for ipath in files_list5:
process_xml_file(ipath, ipath)
for ipath in files_list5:
result = xml_file_to_json(ipath)
keysword = extract_title_from_path(ipath)
json_framework["projectData"]["线路特征段"][keysword] = result
print("写入线路特征段信息")
with open("json_framework.json", "w", encoding="utf-8") as f:
json.dump(json_framework, f, ensure_ascii=False, indent=4)
else:
print("未找到符合条件的 '线路特征_' XML 文件,跳过线路特征段处理。")
###########################工程属性#################################
# projectData => projectInfo
from utils import xml_to_dict
json_framework["projectData"]["projectInfo"] = xml_to_dict("./xml_data2/工程属性.xml")
print("写入工程属性信息")
with open("json_framework.json", "w", encoding="utf-8") as f:
json.dump(json_framework, f, ensure_ascii=False, indent=4)
###########################工程量#################################
# projectData => projectDivision
import re
import json
from utils import extract_file_paths_with_access_path_safe
from utils import parse_xml_to_json
from utils import file_exists_simple
from utils import get_by_strpath
# 读取 JSON 文件
with open("./json_framework.json", "r", encoding="utf-8") as f:
data = json.load(f)
root_path = "/data/Z_project/langchain_lab/xml_to_json/xml_data2/"
safe_results = extract_file_paths_with_access_path_safe(data)
for result in safe_results:
file_name = result['file_path']
file_path_dict = result['access_path']
old_path = root_path + file_name
new_path = re.sub(r"\.xmd3$", ".xml", old_path, flags=re.IGNORECASE)
if file_exists_simple(new_path) == 1:
get_by_strpath(data, file_path_dict, parse_xml_to_json(new_path, from_file=True))
# file_path_dict = parse_xml_to_json(new_path, from_file=True)
else:
pass
print("写入工程量信息")
with open("json_framework.json", "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=2)