上传文件至 kg_lab_6.13

6.25
1. 更新将全部关键信息转换
2. 设计对于所有指标映射的补全规则
This commit is contained in:
2025-06-25 11:09:39 +08:00
parent fbc6c06f2b
commit c284fcbdfd
+270 -5
View File
@@ -15,8 +15,8 @@ def read_xml_as_string(file_path):
def parse_keyword(keyword, indicator_name): def parse_keyword(keyword, indicator_name):
# 特殊处理:电压等级 # 特殊处理:电压等级
if indicator_name == "电压等级": # if indicator_name == "电压等级":
return {"映射规则": "1", "指标映射": [keyword]} # return {"映射规则": "1", "指标映射": [keyword]}
# 处理范围表达式(包含"||"分隔符) # 处理范围表达式(包含"||"分隔符)
if "||" in keyword: if "||" in keyword:
@@ -73,24 +73,289 @@ def parse_keyword(keyword, indicator_name):
return {"映射规则": keyword, "指标映射": [keyword]} return {"映射规则": keyword, "指标映射": [keyword]}
def xml_to_json(xml_content): def xml_to_json(xml_content):
root = ET.fromstring(xml_content) root = ET.fromstring(xml_content)
records = root.findall('.//records/record') records = root.findall('.//records/record')
result = [] result = []
for record in records: for record in records:
unit = record.get("单位")
unit_type = record.get("单价类型")
order = record.get("序号")
extraction_method = record.get("提取方式")
indicator_type = record.get("指标类型")
index_extraction_scope = record.get("指标提取范围")
data_sources = record.get("数据来源")
indicator_name = record.get("指标名称") indicator_name = record.get("指标名称")
keyword = record.get("关键字") keyword = record.get("关键字")
# 解析关键字生成映射规则和指标映射
parsed = parse_keyword(keyword, indicator_name) parsed = parse_keyword(keyword, indicator_name)
if data_sources == "报表指标":
result.append({ result.append({
"指标名称": indicator_name, "指标名称": indicator_name,
"指标描述": { "指标描述": {
"指标映射": parsed["指标映射"], "指标映射": parsed["指标映射"],
"映射规则": parsed["映射规则"] "映射规则": parsed["映射规则"]
}, },
"code": "" "code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
elif data_sources == "主材单价":
if index_extraction_scope is not None:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
temp2 = data_sources[-2:]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
else:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从【{data_sources}】中获取{temp0}的属性",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
elif data_sources == "主材参数":
if index_extraction_scope is not None:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
temp2 = data_sources[-2:]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
else:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从【{data_sources}】中获取{temp0}的属性",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
elif data_sources == "主材数量":
if index_extraction_scope is not None:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
temp2 = data_sources[-2:]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
else:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从【{data_sources}】中获取{temp0}的属性",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
elif data_sources == "定额参数":
if index_extraction_scope is not None:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
temp2 = data_sources[-2:]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
else:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从【{data_sources}】中获取{temp0}的属性",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
elif data_sources == "定额数量":
if index_extraction_scope is not None:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
temp2 = data_sources[-2:]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
else:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从【{data_sources}】中获取{temp0}的属性",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
elif data_sources == "工程费用":
if index_extraction_scope is not None:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
temp2 = data_sources[-2:]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
else:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从【{data_sources}】中获取{temp0}的属性",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
elif data_sources == "指标库":
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": parsed["指标映射"],
"映射规则": parsed["映射规则"]
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
elif data_sources == "项目划分费用":
if index_extraction_scope is not None:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
temp2 = data_sources[-2:]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
})
else:
temp0 = parsed["指标映射"]
temp1 = parsed["映射规则"]
result.append({
"指标名称": indicator_name,
"指标描述": {
"指标映射": f"从【{data_sources}】中获取{temp0}的属性",
"映射规则": f"{temp1}"
},
"code": "",
"单位": unit,
"单价类型": unit_type,
"序号": order,
"提取方式": extraction_method
}) })
return json.dumps(result, ensure_ascii=False, indent=4) return json.dumps(result, ensure_ascii=False, indent=4)