From 850f0476c1d1e025dacbd103e01e67e29f533329 Mon Sep 17 00:00:00 2001
From: zoujiwen <zoujiwen@noreply.localhost>
Date: Thu, 3 Jul 2025 09:59:59 +0800
Subject: [PATCH] =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E6=96=87=E4=BB=B6=E8=87=B3?=
 =?UTF-8?q?=20kg=5Flab=5F6.13?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

7.3 优化条件分支
---
 kg_lab_6.13/xml_to_json.py | 79 +++++++++++++++++++++++++++++++++++++-
 1 file changed, 78 insertions(+), 1 deletion(-)

diff --git a/kg_lab_6.13/xml_to_json.py b/kg_lab_6.13/xml_to_json.py
index d355276..7789f2d 100644
--- a/kg_lab_6.13/xml_to_json.py
+++ b/kg_lab_6.13/xml_to_json.py
@@ -72,7 +72,7 @@ def parse_keyword(keyword, indicator_name):
     # 默认处理（普通关键字）
     return {"映射规则": keyword, "指标映射": [keyword]}
 
-def xml_to_json(xml_content):
+def xml_to_json2(xml_content):
 
     root = ET.fromstring(xml_content)
     records = root.findall('.//records/record')
@@ -392,6 +392,83 @@ def xml_to_json(xml_content):
 
     return json.dumps(result, ensure_ascii=False, indent=4)
 
+
+def xml_to_json(xml_content):
+    root = ET.fromstring(xml_content)
+    records = root.findall('.//records/record')
+    result = []
+    
+    # 定义需要特殊处理的数据来源类型
+    scope_based_sources = ["主材单价", "主材参数", "主材数量", "定额参数", "定额数量", "工程费用"]
+    direct_sources = ["报表指标", "指标库"]
+    project_division = ["项目划分费用"]
+    
+    
+    for record in records:
+        unit = record.get("单位")
+        unit_type = record.get("单价类型")
+        order = record.get("序号")
+        extraction_method = record.get("提取方式")
+        indicator_type = record.get("指标类型")
+        index_extraction_scope = record.get("指标提取范围")
+        data_sources = record.get("数据来源")
+        indicator_name = record.get("指标名称")
+        keyword = record.get("关键字")
+        parsed = parse_keyword(keyword, indicator_name)
+        
+        base_item = {
+            "指标名称": indicator_name,
+            "code": "",
+            "单位": unit,
+            "单价类型": unit_type,
+            "序号": order,
+            "提取方式": extraction_method,
+            "指标类型": indicator_type,
+            "数据来源": data_sources
+        }
+        
+        if data_sources in direct_sources:
+            base_item["指标描述"] = {
+                "指标映射": parsed["指标映射"],
+                "映射规则": parsed["映射规则"]
+            }
+            result.append(base_item)
+
+        elif data_sources in project_division:
+            mapping_desc = f"查找一下项目划分节点【{index_extraction_scope}】下费用预览的【{indicator_name}】"
+            base_item["指标描述"] = {
+                "指标映射": mapping_desc,
+                "映射规则": parsed["映射规则"]
+            }
+            result.append(base_item)
+            
+        elif data_sources in scope_based_sources:
+            temp0 = parsed["指标映射"]
+            temp1 = parsed["映射规则"]
+            
+            if index_extraction_scope:
+                # 取数据来源的最后两个字（如"单价"、"参数"等）
+                temp2 = data_sources[-2:]
+                mapping_desc = f"从项目划分【{index_extraction_scope}】下所有子孙项目划分中查找名称属于{temp0}的所有{temp2}"
+            else:
+                mapping_desc = f"从【{data_sources}】中获取{temp0}的属性"
+                
+            base_item["指标描述"] = {
+                "指标映射": mapping_desc,
+                "映射规则": temp1
+            }
+            result.append(base_item)
+            
+        else:
+            # 处理未定义的数据来源类型
+            base_item["指标描述"] = {
+                "指标映射": parsed["指标映射"],
+                "映射规则": parsed["映射规则"]
+            }
+            result.append(base_item)
+
+    return json.dumps(result, ensure_ascii=False, indent=4)
+
 def transform_text(text, input_str="【FFFFF】"):
     import re