From 850f0476c1d1e025dacbd103e01e67e29f533329 Mon Sep 17 00:00:00 2001 From: zoujiwen Date: Thu, 3 Jul 2025 09:59:59 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20kg=5Flab=5F6.13?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 7.3 优化条件分支 --- kg_lab_6.13/xml_to_json.py | 79 +++++++++++++++++++++++++++++++++++++- 1 file changed, 78 insertions(+), 1 deletion(-) diff --git a/kg_lab_6.13/xml_to_json.py b/kg_lab_6.13/xml_to_json.py index d355276..7789f2d 100644 --- a/kg_lab_6.13/xml_to_json.py +++ b/kg_lab_6.13/xml_to_json.py @@ -72,7 +72,7 @@ def parse_keyword(keyword, indicator_name): # 默认处理(普通关键字) return {"映射规则": keyword, "指标映射": [keyword]} -def xml_to_json(xml_content): +def xml_to_json2(xml_content): root = ET.fromstring(xml_content) records = root.findall('.//records/record') @@ -392,6 +392,83 @@ def xml_to_json(xml_content): return json.dumps(result, ensure_ascii=False, indent=4) + +def xml_to_json(xml_content): + root = ET.fromstring(xml_content) + records = root.findall('.//records/record') + result = [] + + # 定义需要特殊处理的数据来源类型 + scope_based_sources = ["主材单价", "主材参数", "主材数量", "定额参数", "定额数量", "工程费用"] + direct_sources = ["报表指标", "指标库"] + project_division = ["项目划分费用"] + + + for record in records: + unit = record.get("单位") + unit_type = record.get("单价类型") + order = record.get("序号") + extraction_method = record.get("提取方式") + indicator_type = record.get("指标类型") + index_extraction_scope = record.get("指标提取范围") + data_sources = record.get("数据来源") + indicator_name = record.get("指标名称") + keyword = record.get("关键字") + parsed = parse_keyword(keyword, indicator_name) + + base_item = { + "指标名称": indicator_name, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method, + "指标类型": indicator_type, + "数据来源": data_sources + } + + if data_sources in direct_sources: + base_item["指标描述"] = { + "指标映射": parsed["指标映射"], + "映射规则": parsed["映射规则"] + } + result.append(base_item) + + elif data_sources in project_division: + mapping_desc = f"查找一下项目划分节点【{index_extraction_scope}】下费用预览的【{indicator_name}】" + base_item["指标描述"] = { + "指标映射": mapping_desc, + "映射规则": parsed["映射规则"] + } + result.append(base_item) + + elif data_sources in scope_based_sources: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + + if index_extraction_scope: + # 取数据来源的最后两个字(如"单价"、"参数"等) + temp2 = data_sources[-2:] + mapping_desc = f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}" + else: + mapping_desc = f"从【{data_sources}】中获取{temp0}的属性" + + base_item["指标描述"] = { + "指标映射": mapping_desc, + "映射规则": temp1 + } + result.append(base_item) + + else: + # 处理未定义的数据来源类型 + base_item["指标描述"] = { + "指标映射": parsed["指标映射"], + "映射规则": parsed["映射规则"] + } + result.append(base_item) + + return json.dumps(result, ensure_ascii=False, indent=4) + def transform_text(text, input_str="【FFFFF】"): import re