更新代码
This commit is contained in:
+130
@@ -0,0 +1,130 @@
|
||||
import os
|
||||
import sys
|
||||
import shutil
|
||||
import argparse
|
||||
import uuid
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
import tempfile
|
||||
|
||||
# 仅保留与本流程相关的依赖
|
||||
from transform_expense_preview import process_directory
|
||||
from supplement_kg import costsummary_upwards
|
||||
from build_kg_ontolo import (
|
||||
create_KGs_from_folder,
|
||||
connect_to_neo4j,
|
||||
read_config,
|
||||
)
|
||||
|
||||
TEMP_DIR = tempfile.gettempdir()
|
||||
DEFAULT_INPUT_PATH = r"E:\文件\LLM_model\RAG\code\Engineering_data_KG-1\4、模版指标库\test" # 在此填写固定路径(目录或单个JSON文件),例如:r"D:\\your\\json_dir"
|
||||
|
||||
|
||||
def _is_json_file(p: Path) -> bool:
|
||||
return p.is_file() and p.suffix.lower() == ".json"
|
||||
|
||||
|
||||
def _prepare_workdir(input_path: Path) -> Path:
|
||||
"""
|
||||
根据输入路径准备工作目录:
|
||||
- 如果输入是目录,直接返回该目录作为工作目录;
|
||||
- 如果输入是单个 JSON 文件,则拷贝到临时会话目录并返回该目录;
|
||||
"""
|
||||
if input_path.is_dir():
|
||||
return input_path
|
||||
|
||||
if _is_json_file(input_path):
|
||||
session_id = uuid.uuid4().hex
|
||||
work_dir = Path(TEMP_DIR) / f"nobcl2kg_{session_id}"
|
||||
work_dir.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(str(input_path), str(work_dir / input_path.name))
|
||||
return work_dir
|
||||
|
||||
raise ValueError(f"输入路径既不是目录也不是JSON文件: {input_path}")
|
||||
|
||||
|
||||
def _ensure_dir(p: Path):
|
||||
p.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
|
||||
def run_pipeline(input_path: str) -> None:
|
||||
"""
|
||||
从一个路径出发,自动处理其中的 JSON 文件并生成知识图谱。
|
||||
会跳过 BCL 计算和写回步骤,仅依赖费用向上汇总后的结果生成 KG。
|
||||
"""
|
||||
try:
|
||||
in_path = Path(input_path).expanduser().resolve()
|
||||
if not in_path.exists():
|
||||
print(f"输入路径不存在: {in_path}")
|
||||
sys.exit(1)
|
||||
|
||||
# 连接 Neo4j
|
||||
print("[步骤0] 连接 Neo4j 数据库 ...")
|
||||
config = read_config()
|
||||
if not connect_to_neo4j(
|
||||
config.get("neo4j", "uri"),
|
||||
config.get("neo4j", "user"),
|
||||
config.get("neo4j", "password"),
|
||||
):
|
||||
print("连接 Neo4j 失败,请检查配置")
|
||||
sys.exit(2)
|
||||
|
||||
# 准备工作目录
|
||||
work_json_dir = _prepare_workdir(in_path)
|
||||
print(f"工作目录: {work_json_dir}")
|
||||
|
||||
# 简单统计 JSON 数量(递归)
|
||||
json_count = sum(1 for _ in work_json_dir.rglob("*.json")) if work_json_dir.is_dir() else 0
|
||||
if json_count == 0:
|
||||
print("未在输入路径中发现任何 JSON 文件,终止处理。")
|
||||
sys.exit(3)
|
||||
print(f"发现 JSON 文件数量: {json_count}")
|
||||
|
||||
# 步骤1.3: 处理 JSON 文件结构(就地处理)
|
||||
print("[步骤1] 规范化 JSON 结构 ...")
|
||||
process_directory(str(work_json_dir))
|
||||
|
||||
# 步骤2: 费用向上汇总 -> 输出到临时合并目录
|
||||
print("[步骤2] 费用向上汇总 ...")
|
||||
session_id = uuid.uuid4().hex
|
||||
merged_dir = Path(TEMP_DIR) / f"nobcl2kg_merged_{session_id}"
|
||||
_ensure_dir(merged_dir)
|
||||
result_step2 = costsummary_upwards(str(work_json_dir), str(merged_dir))
|
||||
merged_count = len(result_step2) if result_step2 else 0
|
||||
print(f"完成费用汇总文件: {merged_count}")
|
||||
|
||||
# 步骤4: 直接基于合并后的结果生成知识图谱(跳过 BCL 相关步骤)
|
||||
print("[步骤3] 创建知识图谱(基于汇总结果) ...")
|
||||
success_count, total_count, deleted_projects = create_KGs_from_folder(str(merged_dir))
|
||||
if deleted_projects:
|
||||
print(f"注意:已删除 {len(deleted_projects)} 个同名工程:{', '.join(deleted_projects)}")
|
||||
|
||||
print("\n处理完成!\n" f"成功创建知识图谱: {success_count}/{total_count}\n" f"合并目录: {merged_dir}\n")
|
||||
|
||||
except Exception as e:
|
||||
print(f"运行出错: {e}\n{traceback.format_exc()}")
|
||||
sys.exit(10)
|
||||
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser(description="从目录或单个JSON文件生成知识图谱(无BCL步骤,无前端)")
|
||||
parser.add_argument(
|
||||
"path",
|
||||
nargs="?",
|
||||
default=None,
|
||||
metavar="PATH",
|
||||
help="包含 JSON 文件的目录,或单个 JSON 文件的路径(可不填,则使用 DEFAULT_INPUT_PATH)",
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
path = args.path if args.path else DEFAULT_INPUT_PATH
|
||||
if not path:
|
||||
print("未提供路径,且 DEFAULT_INPUT_PATH 为空。请在脚本顶部设置 DEFAULT_INPUT_PATH,或通过命令行提供路径。")
|
||||
print('示例:python nobcl2kg.py "D:\\data\\jsons"')
|
||||
sys.exit(1)
|
||||
|
||||
run_pipeline(path)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user