From c284fcbdfdd63105c0dfb39581020e0b8885b1fe Mon Sep 17 00:00:00 2001 From: zoujiwen Date: Wed, 25 Jun 2025 11:09:39 +0800 Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?= =?UTF-8?q?=20kg=5Flab=5F6.13?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 6.25 1. 更新将全部关键信息转换 2. 设计对于所有指标映射的补全规则 --- kg_lab_6.13/xml_to_json.py | 289 +++++++++++++++++++++++++++++++++++-- 1 file changed, 277 insertions(+), 12 deletions(-) diff --git a/kg_lab_6.13/xml_to_json.py b/kg_lab_6.13/xml_to_json.py index aff8942..3da238e 100644 --- a/kg_lab_6.13/xml_to_json.py +++ b/kg_lab_6.13/xml_to_json.py @@ -15,8 +15,8 @@ def read_xml_as_string(file_path): def parse_keyword(keyword, indicator_name): # 特殊处理:电压等级 - if indicator_name == "电压等级": - return {"映射规则": "1", "指标映射": [keyword]} + # if indicator_name == "电压等级": + # return {"映射规则": "1", "指标映射": [keyword]} # 处理范围表达式(包含"||"分隔符) if "||" in keyword: @@ -73,25 +73,290 @@ def parse_keyword(keyword, indicator_name): return {"映射规则": keyword, "指标映射": [keyword]} def xml_to_json(xml_content): + root = ET.fromstring(xml_content) records = root.findall('.//records/record') result = [] for record in records: + unit = record.get("单位") + unit_type = record.get("单价类型") + order = record.get("序号") + extraction_method = record.get("提取方式") + + + indicator_type = record.get("指标类型") + index_extraction_scope = record.get("指标提取范围") + data_sources = record.get("数据来源") indicator_name = record.get("指标名称") keyword = record.get("关键字") - - # 解析关键字生成映射规则和指标映射 parsed = parse_keyword(keyword, indicator_name) - result.append({ - "指标名称": indicator_name, - "指标描述": { - "指标映射": parsed["指标映射"], - "映射规则": parsed["映射规则"] - }, - "code": "" - }) + if data_sources == "报表指标": + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": parsed["指标映射"], + "映射规则": parsed["映射规则"] + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + elif data_sources == "主材单价": + if index_extraction_scope is not None: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + temp2 = data_sources[-2:] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + else: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从【{data_sources}】中获取{temp0}的属性", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + elif data_sources == "主材参数": + if index_extraction_scope is not None: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + temp2 = data_sources[-2:] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + else: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从【{data_sources}】中获取{temp0}的属性", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + elif data_sources == "主材数量": + if index_extraction_scope is not None: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + temp2 = data_sources[-2:] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + else: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从【{data_sources}】中获取{temp0}的属性", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + elif data_sources == "定额参数": + if index_extraction_scope is not None: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + temp2 = data_sources[-2:] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + else: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从【{data_sources}】中获取{temp0}的属性", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + elif data_sources == "定额数量": + if index_extraction_scope is not None: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + temp2 = data_sources[-2:] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + else: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从【{data_sources}】中获取{temp0}的属性", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + elif data_sources == "工程费用": + if index_extraction_scope is not None: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + temp2 = data_sources[-2:] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + else: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从【{data_sources}】中获取{temp0}的属性", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + elif data_sources == "指标库": + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": parsed["指标映射"], + "映射规则": parsed["映射规则"] + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + elif data_sources == "项目划分费用": + if index_extraction_scope is not None: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + temp2 = data_sources[-2:] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) + + else: + temp0 = parsed["指标映射"] + temp1 = parsed["映射规则"] + result.append({ + "指标名称": indicator_name, + "指标描述": { + "指标映射": f"从【{data_sources}】中获取{temp0}的属性", + "映射规则": f"{temp1}" + }, + "code": "", + "单位": unit, + "单价类型": unit_type, + "序号": order, + "提取方式": extraction_method + }) return json.dumps(result, ensure_ascii=False, indent=4)