Files
langchain_KG/kg_lab_6.13/utils.py
T
zoujiwen 8a44b9780d 上传文件至 kg_lab_6.13
6.16 更新对于资源编码不能正确识别bug
2025-06-16 18:00:34 +08:00

234 lines
7.9 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
def Dictionary_content_mapping(input_str, data, key="指标名称"):
import re
match = re.search(r'【(.*?)】', input_str)
if match:
extracted = match.group(1)
else:
return None # 如果没有匹配到,提前返回
for i in range(len(data)):
if data[i].get(key) == extracted:
return data[i]
return None
def find_target_item(input_str, data):
result = [None, None]
temp = None
for item in data:
if isinstance(item["指标描述"], dict):
if item["指标描述"]["指标映射"][0] == input_str:
result[0] = item["指标描述"]["映射规则"]
temp = item["指标描述"]["指标映射"][0]
for item in data:
if item["指标名称"] == temp:
result[1] = item['指标描述']
return result
def find_target_items(ele, input_str, data):
result = [None, ele]
for item in data:
if isinstance(item["指标描述"], dict):
if item["指标名称"] == input_str or item["指标描述"]["指标映射"][0] == input_str:
if len(item["指标描述"]["指标映射"]) == 1:
result[0] = item["指标描述"]["指标映射"][0]
return result
def is_yx_prefix(s):
import re
# 使用正则表达式检测是否以 'YX' 开头(可后跟任意字符)
return bool(re.match(r"^YX.*", s))
def judge_exists(input_str, data):
if is_yx_prefix(input_str):
return True
for item in data:
if isinstance(item["指标描述"], dict):
if item["指标名称"] == input_str or item["指标描述"]["指标映射"][0] == input_str:
return True
return False
def judge_str(ceshi, data):
for item in data:
if isinstance(item["指标描述"], str):
if item["指标名称"] == ceshi["指标描述"]["指标映射"][0]:
return True
return False
def pre_mapping(keywords, data):
import re
match = re.search(r'【(.*?)】', keywords)
if match:
extracted = match.group(1)
for i in range(len(data)):
if data[i]["指标名称"] == extracted:
ceshi = data[i]
break
if isinstance(ceshi["指标描述"], str):
return ceshi["指标描述"]
elif isinstance(ceshi["指标描述"], dict):
if ceshi["指标描述"]["映射规则"] == "1":
temp = ceshi["指标描述"]["指标映射"][0]
return f"模糊查找一下【{temp}】"
else:
if len(ceshi["指标描述"]["指标映射"]) == 1:
temp0, temp1 = find_target_item(ceshi["指标描述"]["指标映射"][0], data)
if "【" in temp0:
return f"{temp1},换算规则:【{temp0}】"
else:
return f"查找一下【{temp1}】,换算规则:【{temp0}】"
elif len(ceshi["指标描述"]["指标映射"]) > 1:
result = []
if judge_str(ceshi, data) == True:
for ele in ceshi["指标描述"]["指标映射"]:
for item in data:
if isinstance(item["指标描述"], str) and item["指标名称"] == ele:
temp1 = item["指标描述"]
temp2 = ceshi["指标描述"]["映射规则"]
result.append(f"{temp1},换算规则:【{temp2}】")
else:
for item in ceshi["指标描述"]["指标映射"]:
temp0, temp1 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
if temp0 is None and temp1 is None:
pass
elif temp1 != None and temp0 is None:
if judge_exists(item, data):
temp0 = item
# temp0 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}】,233")
else:
continue
elif temp0 != None and temp1 != None:
if judge_exists(item, data):
# temp0, temp1 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}】")
else:
continue
return result
def extract_concrete_info(outputs):
import re
from typing import List
"""
从多个句子中提取第一个“【】”作为查找信息,最后一个“【】”作为换算规则,
返回格式为:[合并的查找句子, 换算规则]
"""
prefixes = []
suffix = ''
for item in outputs:
matches = re.findall(r'【([^】]+)】', item)
if len(matches) >= 2:
prefixes.append(f"查找一下【{matches[0]}】")
# 假设所有换算规则一致,取第一个即可
if not suffix:
suffix = f'换算规则:【{matches[-1]}】'
if not prefixes or not suffix:
return []
return ['; '.join(prefixes), suffix]
def extract_query_prefix_list(input_list):
import re
"""
输入一个字符串列表,提取每个字符串中符合格式的前缀内容(例如:'查找一下【样式】')
参数:
input_list (list[str]): 包含描述性语句的字符串列表
返回:
list[str]: 提取出的前缀部分列表(如 '查找一下【大板式】'
"""
pattern = r'(查找一下【[^】]+】)'
return [re.match(pattern, text).group(1) for text in input_list if re.match(pattern, text)]
def pre_mapping2(keywords, data):
import re
# 提取关键字中中括号内的内容
match = re.search(r'【(.*?)】', keywords)
if not match:
return "未找到匹配的关键字"
extracted = match.group(1)
# 查找对应的指标项
ceshi = next((item for item in data if item["指标名称"] == extracted), None)
if not ceshi:
return "未找到对应的指标"
desc = ceshi.get("指标描述")
if isinstance(desc, str):
return f"测试:{desc}"
elif isinstance(desc, dict):
mapping_rule = desc.get("映射规则")
mappings = desc.get("指标映射", [])
# 单一映射规则为1时
if mapping_rule == "1":
return f"模糊查找一下【{mappings[0]}】"
# 仅有一个映射项
if len(mappings) == 1:
temp0, temp1 = find_target_item(mappings[0], data)
if "【" in temp0:
return f"{temp1},换算规则:【{temp0}】"
else:
return f"查找一下【{temp1}】,换算规则:【{temp0}】"
# 多个映射项
result = []
if judge_str(ceshi, data):
for ele in mappings:
item = next((d for d in data if d["指标名称"] == ele and isinstance(d["指标描述"], str)), None)
if item:
result.append(f"{item['指标描述']},换算规则:【{mapping_rule}】")
else:
for item_name in mappings:
temp0, temp1 = find_target_items(mapping_rule, item_name, data)
if temp0 is None and temp1 is None:
continue
if judge_exists(item_name, data):
if temp1 is None:
temp0 = item_name
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}】")
return result or "未匹配到有效的映射项"
else:
return "不支持的指标描述格式"