上传文件至 kg_lab_6.13
6.24 xml自动转json脚本
This commit is contained in:
@@ -0,0 +1,117 @@
|
||||
import chardet
|
||||
import xml.etree.ElementTree as ET
|
||||
import json
|
||||
|
||||
|
||||
def read_xml_as_string(file_path):
|
||||
# 先读取部分字节探测编码
|
||||
with open(file_path, 'rb') as f:
|
||||
raw_data = f.read()
|
||||
result = chardet.detect(raw_data)
|
||||
encoding = result['encoding']
|
||||
|
||||
# 使用探测到的编码重新读取为字符串
|
||||
return raw_data.decode(encoding)
|
||||
|
||||
def parse_keyword(keyword, indicator_name):
|
||||
# 特殊处理:电压等级
|
||||
if indicator_name == "电压等级":
|
||||
return {"映射规则": "1", "指标映射": [keyword]}
|
||||
|
||||
# 处理范围表达式(包含"||"分隔符)
|
||||
if "||" in keyword:
|
||||
parts = keyword.split("||")
|
||||
table_rows = []
|
||||
all_codes = []
|
||||
|
||||
for part in parts:
|
||||
if "@@" not in part:
|
||||
continue
|
||||
codes_str, value = part.split("@@", 1)
|
||||
code_ranges = codes_str.split("、")
|
||||
|
||||
for code_range in code_ranges:
|
||||
# 处理连续编号(如YX5-67~69)
|
||||
if "~" in code_range:
|
||||
prefix, range_part = code_range.rsplit("-", 1)
|
||||
start_str, end_str = range_part.split("~")
|
||||
try:
|
||||
start = int(start_str)
|
||||
end = int(end_str)
|
||||
for num in range(start, end + 1):
|
||||
all_codes.append(f"{prefix}-{num}")
|
||||
except ValueError:
|
||||
all_codes.append(code_range)
|
||||
else:
|
||||
all_codes.append(code_range)
|
||||
|
||||
table_rows.append(f"| {codes_str} | {value} |")
|
||||
|
||||
rule_table = "| 资源识别规则 | 指标值 |\n|-------|-------|\n" + "\n".join(table_rows)
|
||||
return {"映射规则": rule_table, "指标映射": all_codes}
|
||||
|
||||
# 处理数学公式(包含"/"和括号)
|
||||
if "/" in keyword and "(" in keyword and ")" in keyword:
|
||||
# 提取分子(括号前部分)
|
||||
molecule = keyword.split("/")[0].strip()
|
||||
|
||||
# 提取分母(括号内部分)
|
||||
denominator_start = keyword.find("(") + 1
|
||||
denominator_end = keyword.find(")")
|
||||
denominator_expr = keyword[denominator_start:denominator_end]
|
||||
|
||||
# 分割分母中的加法项
|
||||
denominator_items = [item.strip() for item in denominator_expr.split("+")]
|
||||
return {"映射规则": keyword, "指标映射": [molecule] + denominator_items}
|
||||
|
||||
# 处理加法表达式
|
||||
if "+" in keyword:
|
||||
items = [item.strip() for item in keyword.split("+")]
|
||||
return {"映射规则": keyword, "指标映射": items}
|
||||
|
||||
# 默认处理(普通关键字)
|
||||
return {"映射规则": keyword, "指标映射": [keyword]}
|
||||
|
||||
def xml_to_json(xml_content):
|
||||
root = ET.fromstring(xml_content)
|
||||
records = root.findall('.//records/record')
|
||||
result = []
|
||||
|
||||
for record in records:
|
||||
indicator_name = record.get("指标名称")
|
||||
keyword = record.get("关键字")
|
||||
|
||||
# 解析关键字生成映射规则和指标映射
|
||||
parsed = parse_keyword(keyword, indicator_name)
|
||||
|
||||
result.append({
|
||||
"指标名称": indicator_name,
|
||||
"指标描述": {
|
||||
"指标映射": parsed["指标映射"],
|
||||
"映射规则": parsed["映射规则"]
|
||||
},
|
||||
"code": ""
|
||||
})
|
||||
|
||||
return json.dumps(result, ensure_ascii=False, indent=4)
|
||||
|
||||
def parse_indicator_string_to_json(indicator_str: str, output_path: str = "output.json"):
|
||||
try:
|
||||
# 解析为 JSON 对象
|
||||
result = json.loads(indicator_str)
|
||||
|
||||
# 保存为 JSON 文件
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
|
||||
return ("结果已保存") # 返回 JSON 对象本身(非字符串)
|
||||
|
||||
except json.JSONDecodeError as e:
|
||||
return {"error": f"JSON解析失败: {str(e)}"}
|
||||
|
||||
|
||||
xml_content = read_xml_as_string('./data/主网架空线路造价分析指标.xml')
|
||||
json_output = xml_to_json(xml_content)
|
||||
parse_indicator_string_to_json(json_output, output_path= "./data/result.json")
|
||||
print("转换完毕!")
|
||||
|
||||
Reference in New Issue
Block a user