增加了知识图谱导出excel
This commit is contained in:
+89
-74
@@ -1,3 +1,7 @@
|
||||
"""
|
||||
批量对比多个 calculation_results.json 与 project_data.json 中的费用
|
||||
"""
|
||||
|
||||
import json
|
||||
import os
|
||||
import re
|
||||
@@ -6,19 +10,15 @@ from difflib import SequenceMatcher
|
||||
|
||||
def extract_guid_from_filename(filename):
|
||||
"""从文件名中提取 GUID"""
|
||||
# 匹配 8-4-4-4-12 格式的 GUID
|
||||
pattern = r"([0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12})"
|
||||
match = re.search(pattern, filename, re.IGNORECASE)
|
||||
return match.group(1).upper() if match else None
|
||||
|
||||
|
||||
def normalize_cost_name(name):
|
||||
"""标准化费用名称,去除特殊符号、后缀等,便于匹配"""
|
||||
# 去除 ID 后缀如 "_GJJ", "_BZHF" 等
|
||||
name = re.sub(r"_\w+$", "", name)
|
||||
# 去除常见符号
|
||||
name = re.sub(r"[^\w]", "", name)
|
||||
# 统一转小写
|
||||
"""标准化费用名称,便于匹配"""
|
||||
name = re.sub(r"_\w+$", "", name) # 去除后缀如 _GJJ
|
||||
name = re.sub(r"[^\w]", "", name) # 去除符号
|
||||
return name.lower()
|
||||
|
||||
|
||||
@@ -40,13 +40,9 @@ def load_calculation_results(json_file_path):
|
||||
|
||||
|
||||
def find_node_by_guid(expense_preview, target_guid):
|
||||
"""
|
||||
在 expensePreview 中递归查找 GUID 对应的节点
|
||||
|
||||
"""
|
||||
"""在 expensePreview 中递归查找 GUID 对应的节点"""
|
||||
|
||||
def search_recursive(items):
|
||||
"""在列表中递归查找 GUID"""
|
||||
for item in items:
|
||||
current_guid = item.get("GUID", "").strip("{}").upper()
|
||||
if current_guid == target_guid:
|
||||
@@ -57,16 +53,11 @@ def find_node_by_guid(expense_preview, target_guid):
|
||||
return result
|
||||
return None
|
||||
|
||||
# 遍历每个大类(如 "建筑工程")
|
||||
for category_name, category_data in expense_preview.items():
|
||||
if not isinstance(category_data, dict):
|
||||
continue
|
||||
# 遍历每个子类(如 "建筑"、"安装")
|
||||
for subcategory_name, items in category_data.items():
|
||||
if isinstance(items, list):
|
||||
result = search_recursive(items)
|
||||
if result is not None:
|
||||
return result
|
||||
for category_name, items in expense_preview.items():
|
||||
if isinstance(items, list):
|
||||
result = search_recursive(items)
|
||||
if result is not None:
|
||||
return result
|
||||
return None
|
||||
|
||||
|
||||
@@ -84,7 +75,6 @@ def load_project_data_and_find_costs(project_json_path, target_guid):
|
||||
|
||||
external_costs = {}
|
||||
for item in children:
|
||||
# 有些节点只有 cost,没有 id(可能是合计)
|
||||
item_id = item.get("id", "")
|
||||
cost_str = item.get("cost", "0")
|
||||
try:
|
||||
@@ -93,10 +83,8 @@ def load_project_data_and_find_costs(project_json_path, target_guid):
|
||||
cost_val = 0.0
|
||||
|
||||
if item_id:
|
||||
# 只提取有 id 的项
|
||||
clean_id = re.sub(r"_\w+$", "", item_id) # 去掉 _GJJ 等后缀
|
||||
clean_id = re.sub(r"_\w+$", "", item_id)
|
||||
external_costs[clean_id] = cost_val
|
||||
# 如果没有 id,可以考虑用其他方式标记,这里先忽略
|
||||
return external_costs
|
||||
|
||||
|
||||
@@ -117,17 +105,16 @@ def match_and_compare_costs(calc_costs, ext_costs, similarity_threshold=0.6):
|
||||
best_score = score
|
||||
best_match = ext_name
|
||||
|
||||
# 判断是否足够相似
|
||||
if best_match and best_score >= similarity_threshold:
|
||||
ext_value = ext_costs[best_match]
|
||||
difference = calc_value - ext_value
|
||||
comparison.append(
|
||||
{
|
||||
"项目": calc_name,
|
||||
"计算值": calc_value,
|
||||
"参考值": ext_value,
|
||||
"计算值": calc_value,
|
||||
"差异": difference,
|
||||
"匹配项": best_match,
|
||||
"原数据项": best_match,
|
||||
"相似度": best_score,
|
||||
}
|
||||
)
|
||||
@@ -136,10 +123,10 @@ def match_and_compare_costs(calc_costs, ext_costs, similarity_threshold=0.6):
|
||||
comparison.append(
|
||||
{
|
||||
"项目": calc_name,
|
||||
"计算值": calc_value,
|
||||
"参考值": None,
|
||||
"计算值": calc_value,
|
||||
"差异": None,
|
||||
"匹配项": None,
|
||||
"原数据项": None,
|
||||
"相似度": best_score,
|
||||
}
|
||||
)
|
||||
@@ -148,71 +135,99 @@ def match_and_compare_costs(calc_costs, ext_costs, similarity_threshold=0.6):
|
||||
for ext_name, ext_value in ext_costs.items():
|
||||
if ext_name not in matched_ext:
|
||||
comparison.append(
|
||||
{"项目": None, "计算值": None, "参考值": ext_value, "差异": None, "匹配项": ext_name, "相似度": None}
|
||||
{
|
||||
"项目": None,
|
||||
"参考值": ext_value,
|
||||
"计算值": None,
|
||||
"差异": None,
|
||||
"原数据项": ext_name,
|
||||
"相似度": None,
|
||||
}
|
||||
)
|
||||
|
||||
return comparison
|
||||
|
||||
|
||||
def save_comparison_to_txt(comparison, output_txt_path):
|
||||
"""保存对比结果到 TXT 文件,差异保留两位小数,交换计算值和参考值位置"""
|
||||
"""保存对比结果到 TXT 文件"""
|
||||
with open(output_txt_path, "w", encoding="utf-8") as f:
|
||||
# 表头:项目、参考值、计算值、差异、原数据项
|
||||
f.write(f"{'项目':<20} {'参考值':<25} {'计算值':<25} {'差异':<25} {'原数据项':<30}\n")
|
||||
f.write("-" * 120 + "\n")
|
||||
|
||||
for item in comparison:
|
||||
# 原始字段
|
||||
project = (item["项目"] or "").ljust(20)[:20] # 最多20字符,左对齐
|
||||
|
||||
# 交换计算值和参考值的位置
|
||||
ref = str(item["参考值"]) if item["参考值"] is not None else ""
|
||||
project = (item["项目"] or "").ljust(20)[:20]
|
||||
ref = f"{item['参考值']:.2f}" if item["参考值"] is not None else ""
|
||||
ref = ref.ljust(25)[:25]
|
||||
|
||||
calc = str(item["计算值"]) if item["计算值"] is not None else ""
|
||||
calc = calc.ljust(25)[:25] # 最多25字符宽度
|
||||
|
||||
# 差异保留两位小数
|
||||
if item["差异"] is not None:
|
||||
diff = f"{item['差异']:.2f}"
|
||||
else:
|
||||
diff = ""
|
||||
calc = f"{item['计算值']:.2f}" if item["计算值"] is not None else ""
|
||||
calc = calc.ljust(25)[:25]
|
||||
diff = f"{item['差异']:.2f}" if item["差异"] is not None else ""
|
||||
diff = diff.ljust(25)[:25]
|
||||
|
||||
original = (item["匹配项"] or "").ljust(30)[:30] # 原数据项字段更宽
|
||||
|
||||
original = (item["原数据项"] or "").ljust(30)[:30]
|
||||
f.write(f"{project}{ref}{calc}{diff}{original}\n")
|
||||
print(f"对比结果已保存至: {output_txt_path}")
|
||||
print(f"✅ 对比结果已保存至: {output_txt_path}")
|
||||
|
||||
|
||||
def main():
|
||||
# 配置路径
|
||||
calculation_json_path = (
|
||||
"project2json/outputs/bclresult/一般土建_496A54BB-8A38-4BE1-B116-AD4780E6874A_预算工程_calculation_results.json"
|
||||
)
|
||||
project_data_json_path = "project2json/outputs/json/220kV变电站工程_readable.json"
|
||||
# ================== 配置路径 ==================
|
||||
# 存放所有 calculation_results.json 的文件夹
|
||||
calc_results_folder = "project2json/outputs/bclresults/变电技改国网"
|
||||
|
||||
# 1. 提取 GUID
|
||||
guid = extract_guid_from_filename(calculation_json_path)
|
||||
if not guid:
|
||||
raise ValueError("无法从文件名中提取 GUID")
|
||||
print(f"提取到 GUID: {guid}")
|
||||
# 主 project_data.json 路径(参考数据源)
|
||||
project_data_json_path = "project2json/outputs/json/变电技改国网.json"
|
||||
|
||||
# 2. 读取计算结果并汇总
|
||||
calc_costs = load_calculation_results(calculation_json_path)
|
||||
print(f"共加载 {len(calc_costs)} 个费用项")
|
||||
# 输出对比结果的文件夹
|
||||
output_folder = "project2json/outputs/comparison_results"
|
||||
os.makedirs(output_folder, exist_ok=True)
|
||||
|
||||
# 3. 从 project_data.json 中查找对应 GUID 的费用项
|
||||
ext_costs = load_project_data_and_find_costs(project_data_json_path, guid)
|
||||
print(f"从 project_data 中找到 {len(ext_costs)} 个参考费用项")
|
||||
# 支持的文件名关键词(可根据实际命名调整)
|
||||
result_file_keyword = "_calculation_results.json"
|
||||
|
||||
# 4. 匹配并对比
|
||||
comparison = match_and_compare_costs(calc_costs, ext_costs, similarity_threshold=0.6)
|
||||
# ==================================================
|
||||
|
||||
# 5. 输出到同名 .txt 文件
|
||||
base_name = os.path.splitext(calculation_json_path)[0]
|
||||
output_txt_path = base_name + ".txt"
|
||||
save_comparison_to_txt(comparison, output_txt_path)
|
||||
print(f"开始批量处理文件夹: {calc_results_folder}")
|
||||
processed_count = 0
|
||||
|
||||
for filename in os.listdir(calc_results_folder):
|
||||
if not filename.endswith(".json") or result_file_keyword not in filename:
|
||||
continue
|
||||
|
||||
calc_json_path = os.path.join(calc_results_folder, filename)
|
||||
|
||||
try:
|
||||
# 提取 GUID
|
||||
guid = extract_guid_from_filename(filename)
|
||||
if not guid:
|
||||
print(f"⚠️ 无法从文件名提取 GUID: {filename}")
|
||||
continue
|
||||
|
||||
print(f"\n📄 处理文件: {filename}")
|
||||
print(f" 提取 GUID: {guid}")
|
||||
|
||||
# 读取计算结果
|
||||
calc_costs = load_calculation_results(calc_json_path)
|
||||
print(f" 加载 {len(calc_costs)} 个计算费用项")
|
||||
|
||||
# 从主 JSON 获取参考费用
|
||||
ext_costs = load_project_data_and_find_costs(project_data_json_path, guid)
|
||||
print(f" 找到 {len(ext_costs)} 个参考费用项")
|
||||
|
||||
# 对比
|
||||
comparison = match_and_compare_costs(calc_costs, ext_costs, similarity_threshold=0.6)
|
||||
|
||||
# 生成输出文件名(与原 JSON 同名,但输出到指定文件夹)
|
||||
base_name = os.path.splitext(filename)[0]
|
||||
output_txt_path = os.path.join(output_folder, base_name + ".txt")
|
||||
|
||||
# 保存结果
|
||||
save_comparison_to_txt(comparison, output_txt_path)
|
||||
|
||||
processed_count += 1
|
||||
|
||||
except Exception as e:
|
||||
print(f"❌ 处理文件 {filename} 时出错: {e}")
|
||||
|
||||
print(f"\n✅ 批量处理完成!共处理 {processed_count} 个文件。")
|
||||
print(f"📊 所有对比结果已保存至: {output_folder}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
|
||||
Reference in New Issue
Block a user