更新最新代码
This commit is contained in:
@@ -0,0 +1,361 @@
|
||||
1.实体类型
|
||||
EngineeringData
|
||||
name: STRING
|
||||
|
||||
ProjectDivisionSet
|
||||
name: STRING
|
||||
|
||||
ProjectDivisionTree
|
||||
name: STRING
|
||||
original_second_level: STRING
|
||||
original_first_level: STRING
|
||||
|
||||
ProjectDivisionItem
|
||||
type: STRING
|
||||
取费表id: STRING
|
||||
name: STRING
|
||||
序号: STRING
|
||||
取费表: STRING
|
||||
GUID: STRING
|
||||
专业类型: STRING
|
||||
费率: STRING
|
||||
资源库名称: STRING
|
||||
代码: STRING
|
||||
notCheck: STRING
|
||||
颜色标记: STRING
|
||||
编码: STRING
|
||||
最小资源库编码: STRING
|
||||
原合价: STRING
|
||||
备注: STRING
|
||||
|
||||
List
|
||||
合价不含税: STRING
|
||||
数量: STRING
|
||||
type: STRING
|
||||
资源库名称: STRING
|
||||
工作内容: STRING
|
||||
取费表类型: STRING
|
||||
类型: STRING
|
||||
取费表名称: STRING
|
||||
单位: STRING
|
||||
清单名称: STRING
|
||||
编码: STRING
|
||||
计算规则: STRING
|
||||
清单全码: STRING
|
||||
GUID: STRING
|
||||
guid: STRING
|
||||
单价不含税: STRING
|
||||
单价: STRING
|
||||
name: STRING
|
||||
项目特征: STRING
|
||||
计算式: STRING
|
||||
合价: STRING
|
||||
取费表: STRING
|
||||
|
||||
ProjectQuantity
|
||||
数量: STRING
|
||||
资源库名称: STRING
|
||||
合价不含税: STRING
|
||||
特征段: STRING
|
||||
费用类型: STRING
|
||||
基准价含税: STRING
|
||||
结算市场价不含税: STRING
|
||||
投标单价: STRING
|
||||
单位: STRING
|
||||
结算市场价含税: STRING
|
||||
基准价不含税: STRING
|
||||
颜色标记: STRING
|
||||
投标数量: STRING
|
||||
编码: STRING
|
||||
类型: STRING
|
||||
规格型号: STRING
|
||||
关联父级量: STRING
|
||||
name: STRING
|
||||
单价不含税: STRING
|
||||
损耗率: STRING
|
||||
截面积: STRING
|
||||
线重: STRING
|
||||
id: STRING
|
||||
供货方: STRING
|
||||
单重: STRING
|
||||
集中配送: STRING
|
||||
市场价含税: STRING
|
||||
制造长度: STRING
|
||||
单价含税: STRING
|
||||
市场价不含税: STRING
|
||||
增值税率: STRING
|
||||
合价含税: STRING
|
||||
运杂费率: STRING
|
||||
设备类型: STRING
|
||||
计算式: STRING
|
||||
人工系数: STRING
|
||||
定额范围: STRING
|
||||
定额系数: STRING
|
||||
基价: STRING
|
||||
机械费: STRING
|
||||
人工费: STRING
|
||||
材料系数: STRING
|
||||
中标计算式: STRING
|
||||
机械系数: STRING
|
||||
材料费: STRING
|
||||
投标合价: STRING
|
||||
所属定额库: STRING
|
||||
批注: STRING
|
||||
标记: STRING
|
||||
定额调整系数: STRING
|
||||
监造物料: STRING
|
||||
备注: STRING
|
||||
|
||||
Quota
|
||||
特征段: STRING
|
||||
人工系数: STRING
|
||||
数量: STRING
|
||||
定额范围: STRING
|
||||
定额系数: STRING
|
||||
合价不含税: STRING
|
||||
基价: STRING
|
||||
机械费: STRING
|
||||
资源库名称: STRING
|
||||
人工费: STRING
|
||||
材料系数: STRING
|
||||
投标单价: STRING
|
||||
中标计算式: STRING
|
||||
费用类型: STRING
|
||||
机械系数: STRING
|
||||
类型: STRING
|
||||
材料费: STRING
|
||||
单位: STRING
|
||||
颜色标记: STRING
|
||||
投标合价: STRING
|
||||
编码: STRING
|
||||
关联父级量: STRING
|
||||
投标数量: STRING
|
||||
计算式: STRING
|
||||
id: STRING
|
||||
name: STRING
|
||||
单价不含税: STRING
|
||||
所属定额库: STRING
|
||||
批注: STRING
|
||||
标记: STRING
|
||||
定额调整系数: STRING
|
||||
颜色标记: STRING
|
||||
备注: STRING
|
||||
|
||||
MainMaterial
|
||||
数量: STRING
|
||||
资源库名称: STRING
|
||||
合价不含税: STRING
|
||||
特征段: STRING
|
||||
费用类型: STRING
|
||||
基准价含税: STRING
|
||||
结算市场价不含税: STRING
|
||||
投标单价: STRING
|
||||
单位: STRING
|
||||
结算市场价含税: STRING
|
||||
基准价不含税: STRING
|
||||
颜色标记: STRING
|
||||
投标数量: STRING
|
||||
编码: STRING
|
||||
类型: STRING
|
||||
规格型号: STRING
|
||||
关联父级量: STRING
|
||||
name: STRING
|
||||
单价不含税: STRING
|
||||
损耗率: STRING
|
||||
截面积: STRING
|
||||
线重: STRING
|
||||
id: STRING
|
||||
供货方: STRING
|
||||
单重: STRING
|
||||
集中配送: STRING
|
||||
市场价含税: STRING
|
||||
制造长度: STRING
|
||||
单价含税: STRING
|
||||
市场价不含税: STRING
|
||||
增值税率: STRING
|
||||
合价含税: STRING
|
||||
|
||||
Equipment
|
||||
特征段: STRING
|
||||
单价含税: STRING
|
||||
单位: STRING
|
||||
资源库名称: STRING
|
||||
合价不含税: STRING
|
||||
类型: STRING
|
||||
投标数量: STRING
|
||||
投标单价: STRING
|
||||
关联父级量: STRING
|
||||
颜色标记: STRING
|
||||
运杂费率: STRING
|
||||
设备类型: STRING
|
||||
编码: STRING
|
||||
供货方: STRING
|
||||
规格型号: STRING
|
||||
单价不含税: STRING
|
||||
id: STRING
|
||||
name: STRING
|
||||
数量: STRING
|
||||
计算式: STRING
|
||||
合价含税: STRING
|
||||
|
||||
MaterialOrEquipment
|
||||
type: STRING
|
||||
预算价不含税: STRING
|
||||
单位: STRING
|
||||
结算市场价不含税: STRING
|
||||
暂估价: STRING
|
||||
编码: STRING
|
||||
结算市场价含税: STRING
|
||||
全口径市场价不含税: STRING
|
||||
全口径市场价含税: STRING
|
||||
是否未计价: STRING
|
||||
unique_id: STRING
|
||||
供货方: STRING
|
||||
结算预算价含税: STRING
|
||||
结算预算价不含税: STRING
|
||||
市场价含税: STRING
|
||||
预算价含税: STRING
|
||||
id: STRING
|
||||
name: STRING
|
||||
数量: STRING
|
||||
市场价不含税: STRING
|
||||
拆分: STRING
|
||||
商品砼: STRING
|
||||
children: STRING
|
||||
|
||||
CostSet
|
||||
name: STRING
|
||||
GUID: STRING
|
||||
|
||||
CostItem
|
||||
name: STRING
|
||||
cost: STRING
|
||||
unique_id: STRING
|
||||
id: STRING
|
||||
|
||||
FeeTableTemplateSet
|
||||
name: STRING
|
||||
typeList: STRING
|
||||
|
||||
FeeTableTemplateItem
|
||||
type: STRING
|
||||
name: STRING
|
||||
profession: STRING
|
||||
outlayID: STRING
|
||||
|
||||
FeeCollection
|
||||
name: STRING
|
||||
serialNumber: STRING
|
||||
base: STRING
|
||||
code: STRING
|
||||
rate: STRING
|
||||
remark: STRING
|
||||
|
||||
FeeScheduleSet
|
||||
name: STRING
|
||||
|
||||
FeeScheduleItem
|
||||
name: STRING
|
||||
|
||||
ProjectPropertySet
|
||||
name: STRING
|
||||
|
||||
ProjectProperty
|
||||
特殊地区: STRING
|
||||
调差选择所在地: STRING
|
||||
工程所在地: STRING
|
||||
编制时间: STRING
|
||||
工程版本: STRING
|
||||
项目划分: STRING
|
||||
工程阶段: STRING
|
||||
工程名称: STRING
|
||||
专业类型: STRING
|
||||
地区类型: STRING
|
||||
组价方式: STRING
|
||||
人工调差系数: STRING
|
||||
调差选择地区类型: STRING
|
||||
机械调差系数: STRING
|
||||
架线类型: STRING
|
||||
安装机械调差系数: STRING
|
||||
清单规范: STRING
|
||||
市场价唯一: STRING
|
||||
材料调差系数: STRING
|
||||
甲供材料计入综合单价: STRING
|
||||
人工按系数调差: STRING
|
||||
最高投标限价(万元): STRING
|
||||
工程总投资: STRING
|
||||
是否按单位控制工程量精度: STRING
|
||||
安装人工调差系数: STRING
|
||||
不同土质定额归属不同清单: STRING
|
||||
住房公积金缴费费率: STRING
|
||||
是否是合并工程: STRING
|
||||
招标人: STRING
|
||||
相同清单合并: STRING
|
||||
执行规范: STRING
|
||||
安装材料调差系数: STRING
|
||||
拆除调差系数年份: STRING
|
||||
调差系数年份: STRING
|
||||
工程税率: STRING
|
||||
甲供材料计入本体: STRING
|
||||
软件名称: STRING
|
||||
电压等级: STRING
|
||||
预算类型: STRING
|
||||
本期台数: STRING
|
||||
建筑人工调差系数: STRING
|
||||
单台容量: STRING
|
||||
建筑拆除人工调差系数: STRING
|
||||
配置选项: STRING
|
||||
施工企业配合调试费费率: STRING
|
||||
安装其他设备运杂费率: STRING
|
||||
安装材机调差系数: STRING
|
||||
工程性质: STRING
|
||||
表头设置: STRING
|
||||
工程静态投资(万元): STRING
|
||||
安装主要设备运杂费率: STRING
|
||||
编制依据: STRING
|
||||
工程动态投资(万元): STRING
|
||||
基本预备费费率: STRING
|
||||
阶段类型: STRING
|
||||
BCL版本: STRING
|
||||
社会保险费缴费费率: STRING
|
||||
|
||||
|
||||
2. 实体间的关系
|
||||
(:EngineeringData)-[:HAS_CHILD]->(:ProjectPropertySet)
|
||||
(:EngineeringData)-[:HAS_CHILD]->(:FeeScheduleSet)
|
||||
(:EngineeringData)-[:HAS_CHILD]->(:FeeTableTemplateSet)
|
||||
(:EngineeringData)-[:HAS_CHILD]->(:ProjectDivisionSet)
|
||||
(:ProjectDivisionSet)-[:HAS_CHILD]->(:ProjectDivisionSet)
|
||||
(:ProjectDivisionSet)-[:HAS_CHILD]->(:ProjectDivisionItem)
|
||||
(:ProjectDivisionItem)-[:HAS_CHILD]->(:List)
|
||||
(:ProjectDivisionItem)-[:HAS_CHILD]->(:ProjectDivisionItem)
|
||||
(:ProjectDivisionItem)-[:USE]->(:CostSet)
|
||||
(:ProjectDivisionItem)-[:HAS_CHILD]->(:ProjectQuantity)
|
||||
(:ProjectDivisionItem)-[:HAS_CHILD]->(:Quota)
|
||||
(:ProjectDivisionItem)-[:HAS_CHILD]->(:MainMaterial)
|
||||
(:ProjectDivisionItem)-[:HAS_CHILD]->(:Equipment)
|
||||
(:ProjectQuantity)-[:HAS_CHILD]->(:MaterialOrEquipment)
|
||||
(:FeeTableTemplateSet)-[:HAS_CHILD]->(:FeeTableTemplateSet)
|
||||
(:FeeTableTemplateSet)-[:HAS_CHILD]->(:FeeTableTemplateItem)
|
||||
(:FeeTableTemplateItem)-[:HAS_CHILD]->(:FeeCollection)
|
||||
(:FeeCollection)-[:HAS_CHILD]->(:FeeCollection)
|
||||
(:FeeScheduleSet)-[:HAS_CHILD]->(:FeeScheduleItem)
|
||||
(:FeeScheduleItem)-[:HAS_CHILD]->(:Fee)
|
||||
(:Fee)-[:HAS_CHILD]->(:Fee)
|
||||
(:ProjectPropertySet)-[:HAS_CHILD]->(:ProjectProperty)
|
||||
(:List)-[:HAS_CHILD]->(:ProjectQuantity)
|
||||
(:List)-[:HAS_CHILD]->(:Equipment)
|
||||
(:List)-[:HAS_CHILD]->(:MainMaterial)
|
||||
(:List)-[:HAS_CHILD]->(:Quota)
|
||||
(:List)-[:USE]->(:CostSet)
|
||||
(:Quota)-[:HAS_CHILD]->(:MaterialOrEquipment)
|
||||
(:CostSet)-[:HAS_CHILD]->(:CostItem)
|
||||
(:ProjectDivisionTree)-[:HAS_CHILD]->(:ProjectDivisionItem)
|
||||
(:ProjectQuantity)-[:HAS_CHILD]->(:ProjectQuantity)
|
||||
(:ProjectQuantity)-[:HAS_CHILD]->(:MainMaterial)
|
||||
(:MainMaterial)-[:HAS_CHILD]->(:ProjectQuantity)
|
||||
(:MainMaterial)-[:HAS_CHILD]->(:MainMaterial)
|
||||
(:ProjectQuantity)-[:USE]->(:CostSet)
|
||||
(:ProjectDivisionTree)-[:USE]->(:CostSet)
|
||||
(:ProjectDivisionSet)-[:HAS_CHILD]->(:ProjectDivisionTree)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,167 @@
|
||||
import streamlit as st
|
||||
import os
|
||||
import sys
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
from datetime import datetime
|
||||
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
|
||||
|
||||
# 获取当前时间,格式化为字符串
|
||||
now_str = datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
|
||||
current_file = os.path.splitext(os.path.basename(__file__))[0]
|
||||
log_filename = f"{current_file}_{now_str}.log"
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.DEBUG,
|
||||
format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||
handlers=[
|
||||
logging.FileHandler(os.path.join("logs", log_filename), encoding="utf-8"),
|
||||
logging.StreamHandler()
|
||||
],
|
||||
)
|
||||
|
||||
logger = logging.getLogger(current_file)
|
||||
|
||||
def setup_logger(logger_name):
|
||||
"""
|
||||
设置指定名称的logger,将其级别设置为WARNING并禁用传播
|
||||
:param logger_name: logger的名称
|
||||
"""
|
||||
logger = logging.getLogger(logger_name)
|
||||
logger.setLevel(logging.WARNING) # 设置httpcore及其子模块的级别
|
||||
logger.propagate = False # 可选:禁用传播(防止被根logger处理)
|
||||
return logger
|
||||
|
||||
|
||||
logger_names = ["httpx", "openai", "langsmith.client", "neo4j", "urllib3", "httpcore"]
|
||||
for name in logger_names:
|
||||
setup_logger(name)
|
||||
|
||||
from src.config import Config
|
||||
from src.document_loader import load_file
|
||||
from src.embedding_client import EmbeddingClient
|
||||
from src.multi_llm_client import MultiAPIKeyChatOpenAI
|
||||
from src.code_executor import CodeExecutor
|
||||
from src.neo4j_raw_retriever import Neo4jRawRetriever
|
||||
from src.prompt_manager import PromptManager
|
||||
from src.project import ProjectBuilder, ProjectToolkit
|
||||
from src.project_implementation import ProjectToolkitNeo4j
|
||||
from src.code_executor import CodeExecutor
|
||||
|
||||
config = Config()
|
||||
|
||||
business_structure = load_file(config.business_object_structure_path)
|
||||
bowei_api_docs = load_file(config.bowei_api_docs_path)
|
||||
|
||||
#llm_client = MultiAPIKeyChatOpenAI(config.openai)
|
||||
|
||||
llm_client_coder = MultiAPIKeyChatOpenAI(config.openai_coder)
|
||||
|
||||
prompt_manager = PromptManager()
|
||||
|
||||
neo4j_conf = config.neo4j_conf
|
||||
embedding_conf = config.embedding
|
||||
|
||||
embedding_client = EmbeddingClient(embedding_conf)
|
||||
|
||||
# 创建Neo4j检索器
|
||||
knowledge_retriever = Neo4jRawRetriever(neo4j_conf)
|
||||
|
||||
ProjectBuilder.register(ProjectToolkitNeo4j, knowledge_retriever.driver)
|
||||
|
||||
code_executor = CodeExecutor(prompt_manager.prompts, llm_client_coder, config.max_retries)
|
||||
|
||||
def load_jsonl(file_path):
|
||||
"""加载JSONL文件并返回JSON记录列表"""
|
||||
records = []
|
||||
try:
|
||||
with open(file_path, 'r', encoding='utf-8') as file:
|
||||
for line in file:
|
||||
if line.strip():
|
||||
records.append(json.loads(line))
|
||||
return records
|
||||
except Exception as e:
|
||||
st.error(f"加载文件失败: {str(e)}")
|
||||
return []
|
||||
|
||||
def run_code(data):
|
||||
global code_executor
|
||||
|
||||
"""运行JSON记录中的代码并返回结果"""
|
||||
if not data or 'code' not in data:
|
||||
return {
|
||||
"code": 40000,
|
||||
"message": "没有可执行的代码",
|
||||
"status": False,
|
||||
"data": None
|
||||
}
|
||||
|
||||
result = code_executor.execute_code(data['code'])
|
||||
|
||||
def main():
|
||||
st.set_page_config(layout="wide", page_title="JSONL查看器")
|
||||
|
||||
st.title("JSONL文件查看器")
|
||||
|
||||
# 设置默认文件路径
|
||||
default_file_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../tests", "code.jsonl")
|
||||
|
||||
# 文件路径输入
|
||||
file_path = st.text_input("JSONL文件路径", value=default_file_path)
|
||||
|
||||
if not os.path.exists(file_path):
|
||||
st.warning(f"文件不存在: {file_path}")
|
||||
return
|
||||
|
||||
# 加载JSONL文件
|
||||
records = load_jsonl(file_path)
|
||||
|
||||
if not records:
|
||||
st.warning("没有找到有效的记录")
|
||||
return
|
||||
|
||||
# 创建两列布局
|
||||
col1, col2 = st.columns([1, 3])
|
||||
|
||||
# 左侧列表
|
||||
with col1:
|
||||
st.subheader("记录列表")
|
||||
selected_index = None
|
||||
|
||||
for i, record in enumerate(records):
|
||||
if st.button(record.get('name', f"记录 {i+1}"), key=f"btn_{i}"):
|
||||
selected_index = i
|
||||
|
||||
# 右侧详细信息
|
||||
with col2:
|
||||
if 'selected_index' not in st.session_state:
|
||||
st.session_state.selected_index = 0
|
||||
|
||||
if selected_index is not None:
|
||||
st.session_state.selected_index = selected_index
|
||||
|
||||
if st.session_state.selected_index < len(records):
|
||||
selected_record = records[st.session_state.selected_index]
|
||||
|
||||
st.subheader(f"查询问题: {selected_record.get('name', '无名称')}")
|
||||
st.info(selected_record.get('query', '无查询信息'))
|
||||
|
||||
st.subheader("代码")
|
||||
st.code(selected_record.get('code', '无代码'), language='python')
|
||||
|
||||
# 运行代码按钮
|
||||
if st.button("运行代码"):
|
||||
with st.spinner('正在执行代码...'):
|
||||
result = run_code(selected_record)
|
||||
|
||||
st.subheader("运行结果")
|
||||
if result.get('status'):
|
||||
st.success("执行成功")
|
||||
st.json(result.get('data'))
|
||||
else:
|
||||
st.error(f"执行失败: {result.get('message', '未知错误')}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
@@ -0,0 +1,117 @@
|
||||
import json
|
||||
from typing import Dict, List, Any
|
||||
import copy
|
||||
|
||||
|
||||
class ExpenseProcessor:
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
@staticmethod
|
||||
def calculate_parent_costs(node: Dict[str, Any]) -> List[Dict[str, Any]]:
|
||||
if "children" not in node:
|
||||
if "id" in node and "cost" in node:
|
||||
return [{"id": node["id"], "cost": node["cost"]}]
|
||||
elif "cost" in node:
|
||||
return [{"cost": node["cost"]}]
|
||||
return []
|
||||
|
||||
result_nodes = []
|
||||
processed_ids = set()
|
||||
|
||||
for child in node["children"]:
|
||||
child_costs = ExpenseProcessor.calculate_parent_costs(child)
|
||||
|
||||
for cost_item in child_costs:
|
||||
if "id" in cost_item:
|
||||
found = False
|
||||
for existing in result_nodes:
|
||||
if "id" in existing and existing["id"] == cost_item["id"]:
|
||||
existing["cost"] = str(float(existing["cost"]) + float(cost_item["cost"]))
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
result_nodes.append(copy.deepcopy(cost_item))
|
||||
processed_ids.add(cost_item["id"])
|
||||
else:
|
||||
found = False
|
||||
for existing in result_nodes:
|
||||
if "id" not in existing:
|
||||
existing["cost"] = str(float(existing["cost"]) + float(cost_item["cost"]))
|
||||
found = True
|
||||
break
|
||||
if not found:
|
||||
result_nodes.append(copy.deepcopy(cost_item))
|
||||
|
||||
return result_nodes
|
||||
|
||||
@staticmethod
|
||||
def process_node(node: Dict[str, Any]) -> Dict[str, Any]:
|
||||
result = copy.deepcopy(node)
|
||||
if "children" not in node or not node["children"]:
|
||||
return result
|
||||
cost_items = ExpenseProcessor.calculate_parent_costs(node)
|
||||
if cost_items:
|
||||
result["sum"] = cost_items
|
||||
result["children"] = [ExpenseProcessor.process_node(child) for child in node["children"]]
|
||||
return result
|
||||
|
||||
@staticmethod
|
||||
def process_expense_preview(expense_preview: Dict[str, Any]) -> Dict[str, Any]:
|
||||
result = copy.deepcopy(expense_preview)
|
||||
for category_key, category_value in expense_preview.items():
|
||||
for subcategory_key, subcategory_value in category_value.items():
|
||||
if isinstance(subcategory_value, list):
|
||||
result[category_key][subcategory_key] = [
|
||||
ExpenseProcessor.process_node(item) for item in subcategory_value
|
||||
]
|
||||
return result
|
||||
|
||||
@classmethod
|
||||
def load_and_process_from_file(cls, input_path: str, output_path: str = None) -> Dict[str, Any]:
|
||||
"""
|
||||
从文件加载 JSON 并处理
|
||||
:param input_path: 输入文件路径
|
||||
:param output_path: 输出文件路径(可选)
|
||||
:return: 处理后的完整数据
|
||||
"""
|
||||
with open(input_path, "r", encoding="utf-8") as f:
|
||||
data = json.load(f)
|
||||
|
||||
if "projectData" in data and "expensePreview" in data["projectData"]:
|
||||
processed_data = copy.deepcopy(data)
|
||||
processed_data["projectData"]["expensePreview"] = cls.process_expense_preview(
|
||||
data["projectData"]["expensePreview"]
|
||||
)
|
||||
|
||||
if output_path:
|
||||
with open(output_path, "w", encoding="utf-8") as f:
|
||||
json.dump(processed_data, f, ensure_ascii=False, indent=4)
|
||||
print(f"处理完成,结果已保存到 {output_path}")
|
||||
return processed_data
|
||||
else:
|
||||
raise ValueError("未找到 projectData.expensePreview 路径")
|
||||
|
||||
@classmethod
|
||||
def process_raw_data(cls, raw_data: Dict[str, Any]) -> Dict[str, Any]:
|
||||
"""
|
||||
直接处理原始数据(不涉及文件读写)
|
||||
:param raw_data: 原始数据,格式应包含 projectData.expensePreview
|
||||
:return: 处理后的数据
|
||||
"""
|
||||
if "projectData" in raw_data and "expensePreview" in raw_data["projectData"]:
|
||||
processed_data = copy.deepcopy(raw_data)
|
||||
processed_data["projectData"]["expensePreview"] = cls.process_expense_preview(
|
||||
raw_data["projectData"]["expensePreview"]
|
||||
)
|
||||
return processed_data
|
||||
else:
|
||||
raise ValueError("未找到 projectData.expensePreview 路径")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
input_file = "dataset/json/主网预算/架空_clean.json" # 输入 JSON 文件路径
|
||||
output_file = "dataset/json/主网预算/output.json" # 输出 JSON 文件路径
|
||||
|
||||
# 使用类方法加载并处理 JSON 文件
|
||||
ExpenseProcessor.load_and_process_from_file(input_file, output_file)
|
||||
@@ -0,0 +1,206 @@
|
||||
import chardet
|
||||
import xml.etree.ElementTree as ET
|
||||
import json
|
||||
import re
|
||||
|
||||
def clean_bracketed_strings(input_str: str) -> str:
|
||||
# 替换【'xxx'】为【xxx】
|
||||
result = re.sub(r"【'([^']+)'】", r"【\1】", input_str)
|
||||
# 替换【['xxx']】为【xxx】
|
||||
result = re.sub(r"【\['([^']+)'\]】", r"【\1】", result)
|
||||
# 替换【['xxx','yyy']】为【xxx,yyy】(多个项的情况)
|
||||
result = re.sub(r"【\[((?:'[^']+',?)+)\]】", lambda m: "【" + m.group(1).replace("'", "") + "】", result)
|
||||
return result
|
||||
|
||||
|
||||
def read_xml_as_string(file_path):
|
||||
# 先读取部分字节探测编码
|
||||
with open(file_path, 'rb') as f:
|
||||
raw_data = f.read()
|
||||
result = chardet.detect(raw_data)
|
||||
encoding = result['encoding']
|
||||
|
||||
# 使用探测到的编码重新读取为字符串
|
||||
return raw_data.decode(encoding)
|
||||
|
||||
def parse_keyword(keyword, indicator_name):
|
||||
# 特殊处理:电压等级
|
||||
# if indicator_name == "电压等级":
|
||||
# return {"映射规则": "1", "指标映射": [keyword]}
|
||||
|
||||
# 处理范围表达式(包含"||"分隔符)
|
||||
if "||" in keyword:
|
||||
parts = keyword.split("||")
|
||||
table_rows = []
|
||||
all_codes = []
|
||||
|
||||
for part in parts:
|
||||
if "@@" not in part:
|
||||
continue
|
||||
codes_str, value = part.split("@@", 1)
|
||||
code_ranges = codes_str.split("、")
|
||||
|
||||
for code_range in code_ranges:
|
||||
# 处理连续编号(如YX5-67~69)
|
||||
if "~" in code_range:
|
||||
prefix, range_part = code_range.rsplit("-", 1)
|
||||
start_str, end_str = range_part.split("~")
|
||||
try:
|
||||
start = int(start_str)
|
||||
end = int(end_str)
|
||||
for num in range(start, end + 1):
|
||||
all_codes.append(f"{prefix}-{num}")
|
||||
except ValueError:
|
||||
all_codes.append(code_range)
|
||||
else:
|
||||
all_codes.append(code_range)
|
||||
|
||||
table_rows.append(f"| {codes_str} | {value} |")
|
||||
|
||||
rule_table = "| 资源识别规则 | 指标值 |\n|-------|-------|\n" + "\n".join(table_rows)
|
||||
return {"映射规则": rule_table, "指标映射": all_codes}
|
||||
|
||||
# 处理数学公式(包含"/"和括号)
|
||||
if "/" in keyword and "(" in keyword and ")" in keyword:
|
||||
# 提取分子(括号前部分)
|
||||
molecule = keyword.split("/")[0].strip()
|
||||
|
||||
# 提取分母(括号内部分)
|
||||
denominator_start = keyword.find("(") + 1
|
||||
denominator_end = keyword.find(")")
|
||||
denominator_expr = keyword[denominator_start:denominator_end]
|
||||
|
||||
# 分割分母中的加法项
|
||||
denominator_items = [item.strip() for item in denominator_expr.split("+")]
|
||||
return {"映射规则": keyword, "指标映射": [molecule] + denominator_items}
|
||||
|
||||
# 处理加法表达式
|
||||
if "+" in keyword:
|
||||
items = [item.strip() for item in keyword.split("+")]
|
||||
return {"映射规则": keyword, "指标映射": items}
|
||||
|
||||
# 默认处理(普通关键字)
|
||||
return {"映射规则": keyword, "指标映射": [keyword]}
|
||||
|
||||
|
||||
def xml_to_json(xml_content, output_path):
|
||||
root = ET.fromstring(xml_content)
|
||||
records = root.findall('.//records/record')
|
||||
result = []
|
||||
|
||||
# 定义需要特殊处理的数据来源类型
|
||||
scope_based_sources = ["主材单价", "主材参数", "主材数量", "定额参数", "定额数量", "工程费用"]
|
||||
direct_sources = ["报表指标", "指标库"]
|
||||
project_division = ["项目划分费用"]
|
||||
|
||||
|
||||
for record in records:
|
||||
unit = record.get("单位")
|
||||
unit_type = record.get("单价类型")
|
||||
order = record.get("序号")
|
||||
extraction_method = record.get("提取方式")
|
||||
indicator_type = record.get("指标类型")
|
||||
index_extraction_scope = record.get("指标提取范围")
|
||||
data_sources = record.get("数据来源")
|
||||
indicator_name = record.get("指标名称")
|
||||
keyword = record.get("关键字")
|
||||
parsed = parse_keyword(keyword, indicator_name)
|
||||
|
||||
base_item = {
|
||||
"指标名称": indicator_name,
|
||||
"code": "",
|
||||
"单位": unit,
|
||||
"单价类型": unit_type,
|
||||
"序号": order,
|
||||
"提取方式": extraction_method,
|
||||
"指标类型": indicator_type,
|
||||
"数据来源": data_sources
|
||||
}
|
||||
|
||||
if data_sources in direct_sources:
|
||||
base_item["指标描述"] = {
|
||||
"指标映射": parsed["指标映射"],
|
||||
"映射规则": parsed["映射规则"]
|
||||
}
|
||||
result.append(base_item)
|
||||
|
||||
elif data_sources in project_division:
|
||||
mapping_desc = f"从【{index_extraction_scope}】项目划分中获取名称属于【{indicator_name}】的费用"
|
||||
base_item["指标描述"] = {
|
||||
"指标映射": mapping_desc,
|
||||
"映射规则": parsed["映射规则"]
|
||||
}
|
||||
result.append(base_item)
|
||||
|
||||
elif data_sources in scope_based_sources:
|
||||
temp0 = parsed["指标映射"]
|
||||
temp1 = parsed["映射规则"]
|
||||
|
||||
if index_extraction_scope:
|
||||
# 取数据来源的最后两个字(如"单价"、"参数"等)
|
||||
temp2 = data_sources[-2:]
|
||||
# 取数据来源的开头两个字(如"定额"、"清单"等)
|
||||
temp3 = data_sources[0:2]
|
||||
if temp3 in ["清单", "定额", "人材机"]:
|
||||
mapping_desc = f"从【{index_extraction_scope}】及其子孙项目划分中查找编码中包含【{temp0}】的所有【{temp3}】的【{temp2}】之和"
|
||||
elif temp3 in ["主材", "设备"]:
|
||||
mapping_desc = f"从【{index_extraction_scope}】及其子孙项目划分中查找名称中包含【{temp0}】的所有【{temp3}】的【{temp2}】之和"
|
||||
else:
|
||||
mapping_desc = f"从【{data_sources}】中获取{temp0}的属性"
|
||||
|
||||
base_item["指标描述"] = {
|
||||
"指标映射": mapping_desc,
|
||||
"映射规则": temp1
|
||||
}
|
||||
result.append(base_item)
|
||||
|
||||
else:
|
||||
# 处理未定义的数据来源类型
|
||||
base_item["指标描述"] = {
|
||||
"指标映射": parsed["指标映射"],
|
||||
"映射规则": parsed["映射规则"]
|
||||
}
|
||||
result.append(base_item)
|
||||
|
||||
for item in result:
|
||||
desc = item.get("指标描述", {})
|
||||
mapping_rule = desc.get("映射规则")
|
||||
indicator_map = desc.get("指标映射")
|
||||
|
||||
if isinstance(mapping_rule, str) and isinstance(indicator_map, str):
|
||||
match = re.search(r"@([^\.]+)\.", mapping_rule)
|
||||
if match:
|
||||
source = match.group(1)
|
||||
# 替换【工程费用】为提取的 source(如【其他费用】)
|
||||
new_mapping = re.sub(r"【.*?】", f"【{source}】", indicator_map)
|
||||
new_mapping = re.sub(r"@[^.]+\.", "", new_mapping)
|
||||
item["指标描述"]["指标映射"] = new_mapping.replace('[', '【').replace(']', '】')
|
||||
if isinstance(indicator_map, list):
|
||||
first_elem = indicator_map[0]
|
||||
if first_elem.startswith("(") and first_elem.endswith(")"):
|
||||
del indicator_map[0]
|
||||
|
||||
# 保存为 JSON 文件
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
|
||||
for item in result:
|
||||
desc = item.get("指标描述", {})
|
||||
mapping_rule = desc.get("映射规则")
|
||||
indicator_map = desc.get("指标映射")
|
||||
|
||||
if isinstance(indicator_map, str):
|
||||
new_mapping = clean_bracketed_strings(indicator_map)
|
||||
item["指标描述"]["指标映射"] = new_mapping
|
||||
|
||||
# 保存为 JSON 文件
|
||||
with open(output_path, 'w', encoding='utf-8') as f:
|
||||
json.dump(result, f, ensure_ascii=False, indent=2)
|
||||
|
||||
return "结果已保存"
|
||||
|
||||
|
||||
xml_content = read_xml_as_string('dataset/主网架空线路造价分析指标.xml')
|
||||
json_output = xml_to_json(xml_content, output_path= "./tests/zhibiao.json")
|
||||
print("转换完毕!")
|
||||
|
||||
Reference in New Issue
Block a user