diff --git a/kg_lab_6.13/xml_to_json.py b/kg_lab_6.13/xml_to_json.py index 55f77e6..d355276 100644 --- a/kg_lab_6.13/xml_to_json.py +++ b/kg_lab_6.13/xml_to_json.py @@ -82,7 +82,7 @@ def xml_to_json(xml_content): unit = record.get("单位") unit_type = record.get("单价类型") order = record.get("序号") - extraction_method = record.get("提取方式") + extraction_method = record.get("提取方式") indicator_type = record.get("指标类型") @@ -103,7 +103,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) elif data_sources == "主材单价": @@ -121,7 +123,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) else: @@ -137,7 +141,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) elif data_sources == "主材参数": @@ -155,7 +161,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) else: @@ -171,7 +179,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) elif data_sources == "主材数量": @@ -189,7 +199,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) else: @@ -205,7 +217,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) elif data_sources == "定额参数": @@ -223,7 +237,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) else: @@ -239,7 +255,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) elif data_sources == "定额数量": @@ -257,7 +275,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) else: @@ -273,7 +293,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) elif data_sources == "工程费用": @@ -291,7 +313,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) else: @@ -307,7 +331,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) elif data_sources == "指标库": @@ -321,7 +347,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) elif data_sources == "项目划分费用": @@ -339,7 +367,9 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) else: @@ -355,12 +385,50 @@ def xml_to_json(xml_content): "单位": unit, "单价类型": unit_type, "序号": order, - "提取方式": extraction_method + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources }) return json.dumps(result, ensure_ascii=False, indent=4) +def transform_text(text, input_str="【FFFFF】"): + import re + + # 匹配方括号内的内容 + pattern = re.compile(r"\[(.*?)\]") + + # 查找方括号内容 + match = pattern.search(text) + if not match: + # 如果没有匹配到,返回原文 + return text + + # 获取匹配的方括号内容 + bracket_content = match.group(1) + + # 在原字符串中,用sub插入新内容 + # 注意使用re.sub的count=1只替换第一个匹配 + result = pattern.sub(f"{input_str}【{bracket_content}】", text, count=1) + + return result + +def replace_last_brackets(s): + import re + # 使用正则查找所有 [] + matches = list(re.finditer(r'\[([^\[\]]*)\]', s)) + if not matches: + return s # 没有匹配,直接返回 + # 取最后一个匹配 + last = matches[-1] + # 构造替换后的字符串 + start, end = last.span() + content = last.group(1) + return s[:start] + f'【{content}】' + s[end:] + + def parse_indicator_string_to_json(indicator_str: str, output_path: str = "output.json"): + import re try: # 解析为 JSON 对象 result = json.loads(indicator_str) @@ -377,6 +445,29 @@ def parse_indicator_string_to_json(indicator_str: str, output_path: str = "outpu if first_elem.startswith("(") and first_elem.endswith(")"): del mapping[0] + # 过滤2 + for item in result: + mapping = item.get("数据来源") + if mapping == "定额数量": + temp = item["指标描述"]["指标映射"] + temp = transform_text(temp, input_str="【定额】") + item["指标描述"]["指标映射"] = temp + elif mapping == "主材数量" or mapping == "主材单价": + temp = item["指标描述"]["指标映射"] + temp = transform_text(temp, input_str="【主材】") + item["指标描述"]["指标映射"] = temp + elif mapping == "设备数量": + temp = item["指标描述"]["指标映射"] + temp = transform_text(temp, input_str="【设备】") + item["指标描述"]["指标映射"] = temp + else: + if isinstance(item["指标描述"]["指标映射"], str): + temp = item["指标描述"]["指标映射"] + temp = replace_last_brackets(temp) + item["指标描述"]["指标映射"] = temp + + + # 保存为 JSON 文件 with open(output_path, 'w', encoding='utf-8') as f: json.dump(result, f, ensure_ascii=False, indent=2) @@ -389,6 +480,6 @@ def parse_indicator_string_to_json(indicator_str: str, output_path: str = "outpu xml_content = read_xml_as_string('./data/主网架空线路造价分析指标.xml') json_output = xml_to_json(xml_content) -parse_indicator_string_to_json(json_output, output_path= "./data/result.json") +parse_indicator_string_to_json(json_output, output_path= "./data/result6.27.json") print("转换完毕!")