diff --git a/kg_lab_6.13/xml_to_json.py b/kg_lab_6.13/xml_to_json.py new file mode 100644 index 0000000..aff8942 --- /dev/null +++ b/kg_lab_6.13/xml_to_json.py @@ -0,0 +1,117 @@ +import chardet +import xml.etree.ElementTree as ET +import json + + +def read_xml_as_string(file_path): + # 先读取部分字节探测编码 + with open(file_path, 'rb') as f: + raw_data = f.read() + result = chardet.detect(raw_data) + encoding = result['encoding'] + + # 使用探测到的编码重新读取为字符串 + return raw_data.decode(encoding) + +def parse_keyword(keyword, indicator_name): + # 特殊处理:电压等级 + if indicator_name == "电压等级": + return {"映射规则": "1", "指标映射": [keyword]} + + # 处理范围表达式(包含"||"分隔符) + if "||" in keyword: + parts = keyword.split("||") + table_rows = [] + all_codes = [] + + for part in parts: + if "@@" not in part: + continue + codes_str, value = part.split("@@", 1) + code_ranges = codes_str.split("、") + + for code_range in code_ranges: + # 处理连续编号(如YX5-67~69) + if "~" in code_range: + prefix, range_part = code_range.rsplit("-", 1) + start_str, end_str = range_part.split("~") + try: + start = int(start_str) + end = int(end_str) + for num in range(start, end + 1): + all_codes.append(f"{prefix}-{num}") + except ValueError: + all_codes.append(code_range) + else: + all_codes.append(code_range) + + table_rows.append(f"| {codes_str} | {value} |") + + rule_table = "| 资源识别规则 | 指标值 |\n|-------|-------|\n" + "\n".join(table_rows) + return {"映射规则": rule_table, "指标映射": all_codes} + + # 处理数学公式(包含"/"和括号) + if "/" in keyword and "(" in keyword and ")" in keyword: + # 提取分子(括号前部分) + molecule = keyword.split("/")[0].strip() + + # 提取分母(括号内部分) + denominator_start = keyword.find("(") + 1 + denominator_end = keyword.find(")") + denominator_expr = keyword[denominator_start:denominator_end] + + # 分割分母中的加法项 + denominator_items = [item.strip() for item in denominator_expr.split("+")] + return {"映射规则": keyword, "指标映射": [molecule] + denominator_items} + + # 处理加法表达式 + if "+" in keyword: + items = [item.strip() for item in keyword.split("+")] + return {"映射规则": keyword, "指标映射": items} + + # 默认处理(普通关键字) + return {"映射规则": keyword, "指标映射": [keyword]} + +def xml_to_json(xml_content): + root = ET.fromstring(xml_content) + records = root.findall('.//records/record') + result = [] + + for record in records: + indicator_name = record.get("指标名称") + keyword = record.get("关键字") + + # 解析关键字生成映射规则和指标映射 + parsed = parse_keyword(keyword, indicator_name) + + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": parsed["指标映射"], + "映射规则": parsed["映射规则"] + }, + "code": "" + }) + + return json.dumps(result, ensure_ascii=False, indent=4) + +def parse_indicator_string_to_json(indicator_str: str, output_path: str = "output.json"): + try: + # 解析为 JSON 对象 + result = json.loads(indicator_str) + + # 保存为 JSON 文件 + with open(output_path, 'w', encoding='utf-8') as f: + json.dump(result, f, ensure_ascii=False, indent=2) + + return ("结果已保存") # 返回 JSON 对象本身(非字符串) + + except json.JSONDecodeError as e: + return {"error": f"JSON解析失败: {str(e)}"} + + +xml_content = read_xml_as_string('./data/主网架空线路造价分析指标.xml') +json_output = xml_to_json(xml_content) +parse_indicator_string_to_json(json_output, output_path= "./data/result.json") +print("转换完毕!") +