Files
KG_generation/nodetype_ex.py
T
chentianrui 9609bb67b4 上传文件
2025-08-01 15:31:56 +08:00

169 lines
5.5 KiB
Python

from neo4j import GraphDatabase
import pandas as pd
import json
class KnowledgeGraphQuerier:
def __init__(self, uri, user, password):
self.driver = GraphDatabase.driver(uri, auth=(user, password))
def close(self):
self.driver.close()
def get_node_name(self, node_types, properties):
"""根据节点类型和可用属性确定节点名称"""
# 根据用户提供的信息,为不同类型的节点设置名称属性
priority_map = {
"ProjectDivisionSet": ["name"],
"ProjectDivisionItem": ["name"],
"Quota": ["name"],
"MainMaterial": ["name"],
"Equipment": ["name"],
"MaterialOrEquipment": ["name"],
"CostSet": ["GUID"],
"CostItem": ["id"],
"FeeTableTemplateSet": ["name"],
"FeeTableTemplateItem": ["name"],
"FeeCollection": ["name"],
"FeeScheduleSet": ["name"],
"FeeScheduleItem": ["name"],
"Fee": ["name"],
"ProjectPropertySet": ["name"],
"ProjectProperty": ["软件名称"],
}
# 默认优先级顺序(适用于未明确定义的节点类型)
default_priority = ["名称", "项目名称", "费用名称", "工程名称", "id", "GUID", "序号", "编码", "代码"]
# 确定使用哪个优先级列表
priority_list = None
for node_type in node_types:
if node_type in priority_map:
priority_list = priority_map[node_type]
break
if not priority_list:
priority_list = default_priority
# 按优先级查找第一个存在的属性
for prop in priority_list:
if prop in properties and properties[prop]:
return properties[prop]
# 如果没有找到任何匹配的属性,返回第一个非空属性
for prop, value in properties.items():
if value:
return value
return "无名称"
def get_simple_hierarchy(self):
"""获取简洁的图谱层级结构"""
# 首先获取所有节点和它们的名称
with self.driver.session() as session:
result = session.run(
"""
MATCH (n)
RETURN id(n) AS node_id, labels(n) AS node_types, properties(n) AS properties
"""
)
nodes = {}
for record in result:
node_id = record["node_id"]
node_types = record["node_types"]
properties = record["properties"]
# 获取节点名称
name = self.get_node_name(node_types, properties)
# 以第一个类型作为节点的主要类型
main_type = node_types[0] if node_types else "Unknown"
nodes[node_id] = {"id": node_id, "type": main_type, "name": name, "children": []}
# 获取节点之间的关系
with self.driver.session() as session:
result = session.run(
"""
MATCH (a)-[r]->(b)
RETURN id(a) AS source_id, type(r) AS relationship_type, id(b) AS target_id
"""
)
# 构建父子关系
for record in result:
source_id = record["source_id"]
target_id = record["target_id"]
if source_id in nodes and target_id in nodes:
nodes[source_id]["children"].append(target_id)
# 找出根节点(没有父节点的节点)
all_nodes = set(nodes.keys())
child_nodes = set()
for node in nodes.values():
child_nodes.update(node["children"])
root_nodes = all_nodes - child_nodes
# 构建层级结构
hierarchy = []
for root_id in root_nodes:
hierarchy.append(self._build_simple_hierarchy(root_id, nodes))
# 如果没有根节点,则返回所有节点的平级结构
if not hierarchy:
for node_id in nodes:
node_info = nodes[node_id]
hierarchy.append({node_info["type"]: node_info["name"]})
return hierarchy
def _build_simple_hierarchy(self, node_id, nodes):
"""递归构建简洁的节点层级结构"""
node = nodes[node_id]
node_type = node["type"]
node_name = node["name"]
# 创建当前节点的键值对
result = {node_type: node_name}
# 如果有子节点,递归处理
if node["children"]:
children = []
for child_id in node["children"]:
children.append(self._build_simple_hierarchy(child_id, nodes))
# 将子节点添加到当前节点下
result["children"] = children
return result
def main():
# 连接信息
uri = "bolt://172.20.0.145:7687"
user = "neo4j"
password = "password"
querier = KnowledgeGraphQuerier(uri, user, password)
try:
# 获取简洁的图谱层级结构
print("正在构建简洁的图谱层级结构...")
hierarchy = querier.get_simple_hierarchy()
# 将层级结构保存为JSON文件
print("正在保存图谱层级结构到JSON文件...")
with open("kg_simple_hierarchy.json", "w", encoding="utf-8") as f:
json.dump(hierarchy, f, ensure_ascii=False, indent=2)
print("简洁的图谱层级结构已保存到 kg_simple_hierarchy.json")
finally:
querier.close()
if __name__ == "__main__":
main()