上传文件至 kg_lab_6.13

6.27 更新输出格式,对接检索模块
This commit is contained in:
2025-06-27 18:10:19 +08:00
parent ace8d52dfc
commit ddcad7a1e4
+108 -17
View File
@@ -103,7 +103,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
elif data_sources == "主材单价": elif data_sources == "主材单价":
@@ -121,7 +123,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
else: else:
@@ -137,7 +141,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
elif data_sources == "主材参数": elif data_sources == "主材参数":
@@ -155,7 +161,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
else: else:
@@ -171,7 +179,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
elif data_sources == "主材数量": elif data_sources == "主材数量":
@@ -189,7 +199,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
else: else:
@@ -205,7 +217,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
elif data_sources == "定额参数": elif data_sources == "定额参数":
@@ -223,7 +237,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
else: else:
@@ -239,7 +255,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
elif data_sources == "定额数量": elif data_sources == "定额数量":
@@ -257,7 +275,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
else: else:
@@ -273,7 +293,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
elif data_sources == "工程费用": elif data_sources == "工程费用":
@@ -291,7 +313,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
else: else:
@@ -307,7 +331,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
elif data_sources == "指标库": elif data_sources == "指标库":
@@ -321,7 +347,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
elif data_sources == "项目划分费用": elif data_sources == "项目划分费用":
@@ -339,7 +367,9 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
else: else:
@@ -355,12 +385,50 @@ def xml_to_json(xml_content):
"单位": unit, "单位": unit,
"单价类型": unit_type, "单价类型": unit_type,
"序号": order, "序号": order,
"提取方式": extraction_method "提取方式": extraction_method,
"指标类型": indicator_type,
"数据来源": data_sources
}) })
return json.dumps(result, ensure_ascii=False, indent=4) return json.dumps(result, ensure_ascii=False, indent=4)
def transform_text(text, input_str="【FFFFF】"):
import re
# 匹配方括号内的内容
pattern = re.compile(r"\[(.*?)\]")
# 查找方括号内容
match = pattern.search(text)
if not match:
# 如果没有匹配到,返回原文
return text
# 获取匹配的方括号内容
bracket_content = match.group(1)
# 在原字符串中,用sub插入新内容
# 注意使用re.sub的count=1只替换第一个匹配
result = pattern.sub(f"{input_str}{bracket_content}", text, count=1)
return result
def replace_last_brackets(s):
import re
# 使用正则查找所有 []
matches = list(re.finditer(r'\[([^\[\]]*)\]', s))
if not matches:
return s # 没有匹配,直接返回
# 取最后一个匹配
last = matches[-1]
# 构造替换后的字符串
start, end = last.span()
content = last.group(1)
return s[:start] + f'{content}' + s[end:]
def parse_indicator_string_to_json(indicator_str: str, output_path: str = "output.json"): def parse_indicator_string_to_json(indicator_str: str, output_path: str = "output.json"):
import re
try: try:
# 解析为 JSON 对象 # 解析为 JSON 对象
result = json.loads(indicator_str) result = json.loads(indicator_str)
@@ -377,6 +445,29 @@ def parse_indicator_string_to_json(indicator_str: str, output_path: str = "outpu
if first_elem.startswith("(") and first_elem.endswith(")"): if first_elem.startswith("(") and first_elem.endswith(")"):
del mapping[0] del mapping[0]
# 过滤2
for item in result:
mapping = item.get("数据来源")
if mapping == "定额数量":
temp = item["指标描述"]["指标映射"]
temp = transform_text(temp, input_str="【定额】")
item["指标描述"]["指标映射"] = temp
elif mapping == "主材数量" or mapping == "主材单价":
temp = item["指标描述"]["指标映射"]
temp = transform_text(temp, input_str="【主材】")
item["指标描述"]["指标映射"] = temp
elif mapping == "设备数量":
temp = item["指标描述"]["指标映射"]
temp = transform_text(temp, input_str="【设备】")
item["指标描述"]["指标映射"] = temp
else:
if isinstance(item["指标描述"]["指标映射"], str):
temp = item["指标描述"]["指标映射"]
temp = replace_last_brackets(temp)
item["指标描述"]["指标映射"] = temp
# 保存为 JSON 文件 # 保存为 JSON 文件
with open(output_path, 'w', encoding='utf-8') as f: with open(output_path, 'w', encoding='utf-8') as f:
json.dump(result, f, ensure_ascii=False, indent=2) json.dump(result, f, ensure_ascii=False, indent=2)
@@ -389,6 +480,6 @@ def parse_indicator_string_to_json(indicator_str: str, output_path: str = "outpu
xml_content = read_xml_as_string('./data/主网架空线路造价分析指标.xml') xml_content = read_xml_as_string('./data/主网架空线路造价分析指标.xml')
json_output = xml_to_json(xml_content) json_output = xml_to_json(xml_content)
parse_indicator_string_to_json(json_output, output_path= "./data/result.json") parse_indicator_string_to_json(json_output, output_path= "./data/result6.27.json")
print("转换完毕!") print("转换完毕!")