上传文件至 kg_lab_6.13
6.27 更新输出格式,对接检索模块
This commit is contained in:
+109
-18
@@ -82,7 +82,7 @@ def xml_to_json(xml_content):
|
||||
unit = record.get("单位")
|
||||
unit_type = record.get("单价类型")
|
||||
order = record.get("序号")
|
||||
extraction_method = record.get("提取方式")
|
||||
extraction_method = record.get("提取方式")
|
||||
|
||||
|
||||
indicator_type = record.get("指标类型")
|
||||
@@ -103,7 +103,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
elif data_sources == "主材单价":
|
||||
@@ -121,7 +123,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
else:
|
||||
@@ -137,7 +141,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
elif data_sources == "主材参数":
|
||||
@@ -155,7 +161,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
else:
|
||||
@@ -171,7 +179,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
elif data_sources == "主材数量":
|
||||
@@ -189,7 +199,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
else:
|
||||
@@ -205,7 +217,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
elif data_sources == "定额参数":
|
||||
@@ -223,7 +237,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
else:
|
||||
@@ -239,7 +255,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
elif data_sources == "定额数量":
|
||||
@@ -257,7 +275,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
else:
|
||||
@@ -273,7 +293,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
elif data_sources == "工程费用":
|
||||
@@ -291,7 +313,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
else:
|
||||
@@ -307,7 +331,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
elif data_sources == "指标库":
|
||||
@@ -321,7 +347,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
elif data_sources == "项目划分费用":
|
||||
@@ -339,7 +367,9 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
else:
|
||||
@@ -355,12 +385,50 @@ def xml_to_json(xml_content):
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
})
|
||||
|
||||
return json.dumps(result, ensure_ascii=False, indent=4)
|
||||
|
||||
def transform_text(text, input_str="【FFFFF】"):
|
||||
import re
|
||||
|
||||
# 匹配方括号内的内容
|
||||
pattern = re.compile(r"\[(.*?)\]")
|
||||
|
||||
# 查找方括号内容
|
||||
match = pattern.search(text)
|
||||
if not match:
|
||||
# 如果没有匹配到,返回原文
|
||||
return text
|
||||
|
||||
# 获取匹配的方括号内容
|
||||
bracket_content = match.group(1)
|
||||
|
||||
# 在原字符串中,用sub插入新内容
|
||||
# 注意使用re.sub的count=1只替换第一个匹配
|
||||
result = pattern.sub(f"{input_str}【{bracket_content}】", text, count=1)
|
||||
|
||||
return result
|
||||
|
||||
def replace_last_brackets(s):
|
||||
import re
|
||||
# 使用正则查找所有 []
|
||||
matches = list(re.finditer(r'\[([^\[\]]*)\]', s))
|
||||
if not matches:
|
||||
return s # 没有匹配,直接返回
|
||||
# 取最后一个匹配
|
||||
last = matches[-1]
|
||||
# 构造替换后的字符串
|
||||
start, end = last.span()
|
||||
content = last.group(1)
|
||||
return s[:start] + f'【{content}】' + s[end:]
|
||||
|
||||
|
||||
def parse_indicator_string_to_json(indicator_str: str, output_path: str = "output.json"):
|
||||
import re
|
||||
try:
|
||||
# 解析为 JSON 对象
|
||||
result = json.loads(indicator_str)
|
||||
@@ -377,6 +445,29 @@ def parse_indicator_string_to_json(indicator_str: str, output_path: str = "outpu
|
||||
if first_elem.startswith("(") and first_elem.endswith(")"):
|
||||
del mapping[0]
|
||||
|
||||
# 过滤2
|
||||
for item in result:
|
||||
mapping = item.get("数据来源")
|
||||
if mapping == "定额数量":
|
||||
temp = item["指标描述"]["指标映射"]
|
||||
temp = transform_text(temp, input_str="【定额】")
|
||||
item["指标描述"]["指标映射"] = temp
|
||||
elif mapping == "主材数量" or mapping == "主材单价":
|
||||
temp = item["指标描述"]["指标映射"]
|
||||
temp = transform_text(temp, input_str="【主材】")
|
||||
item["指标描述"]["指标映射"] = temp
|
||||
elif mapping == "设备数量":
|
||||
temp = item["指标描述"]["指标映射"]
|
||||
temp = transform_text(temp, input_str="【设备】")
|
||||
item["指标描述"]["指标映射"] = temp
|
||||
else:
|
||||
if isinstance(item["指标描述"]["指标映射"], str):
|
||||
temp = item["指标描述"]["指标映射"]
|
||||
temp = replace_last_brackets(temp)
|
||||
item["指标描述"]["指标映射"] = temp
|
||||
|
||||
|
||||
|
||||
# 保存为 JSON 文件
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
@@ -389,6 +480,6 @@ def parse_indicator_string_to_json(indicator_str: str, output_path: str = "outpu
|
||||
|
||||
xml_content = read_xml_as_string('./data/主网架空线路造价分析指标.xml')
|
||||
json_output = xml_to_json(xml_content)
|
||||
parse_indicator_string_to_json(json_output, output_path= "./data/result.json")
|
||||
parse_indicator_string_to_json(json_output, output_path= "./data/result6.27.json")
|
||||
print("转换完毕!")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user