Files
zoujiwen fad7c5de4a 上传文件至 kg_lab_6.13
6.17 更新对检索工程数据复杂表达式的能力
2025-06-17 17:17:20 +08:00

228 lines
7.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
def Dictionary_content_mapping(input_str, data, key="指标名称"):
import re
match = re.search(r'【(.*?)】', input_str)
if match:
extracted = match.group(1)
else:
return None # 如果没有匹配到,提前返回
for i in range(len(data)):
if data[i].get(key) == extracted:
return data[i]
return None
def find_target_item(input_str, data):
result = [None, None]
temp = None
for item in data:
if isinstance(item["指标描述"], dict):
if item["指标描述"]["指标映射"][0] == input_str:
result[0] = item["指标描述"]["映射规则"]
temp = item["指标描述"]["指标映射"][0]
for item in data:
if item["指标名称"] == temp:
result[1] = item['指标描述']
return result
def find_target_items(ele, input_str, data):
result = [None, ele]
for item in data:
if isinstance(item["指标描述"], dict):
if item["指标名称"] == input_str or item["指标描述"]["指标映射"][0] == input_str:
if len(item["指标描述"]["指标映射"]) == 1:
result[0] = item["指标描述"]["指标映射"][0]
return result
def is_yx_prefix(s):
import re
# 使用正则表达式检测是否以 'YX' 开头(可后跟任意字符)
return bool(re.match(r"^YX.*", s))
def judge_exists(input_str, data):
if is_yx_prefix(input_str):
return True
for item in data:
if isinstance(item["指标描述"], dict):
if item["指标名称"] == input_str or item["指标描述"]["指标映射"][0] == input_str:
return True
return False
def judge_str(ceshi, data):
for item in data:
if isinstance(item["指标描述"], str):
if item["指标名称"] == ceshi["指标描述"]["指标映射"][0]:
return True
return False
def pre_mapping(keywords, data):
import re
match = re.search(r'【(.*?)】', keywords)
if match:
extracted = match.group(1)
for i in range(len(data)):
if data[i]["指标名称"] == extracted:
ceshi = data[i]
break
if isinstance(ceshi["指标描述"], str):
return ceshi["指标描述"]
elif isinstance(ceshi["指标描述"], dict):
if ceshi["指标描述"]["映射规则"] == "1":
temp = ceshi["指标描述"]["指标映射"][0]
return f"模糊查找一下【{temp}】"
else:
if len(ceshi["指标描述"]["指标映射"]) == 1:
temp0, temp1 = find_target_item(ceshi["指标描述"]["指标映射"][0], data)
if "【" in temp0:
return f"{temp1},换算规则:【{temp0}】"
else:
return f"查找一下【{temp1}】,换算规则:【{temp0}】"
elif len(ceshi["指标描述"]["指标映射"]) > 1:
result = []
if judge_str(ceshi, data) == True:
for ele in ceshi["指标描述"]["指标映射"]:
for item in data:
if isinstance(item["指标描述"], str) and item["指标名称"] == ele:
temp1 = item["指标描述"]
temp2 = ceshi["指标描述"]["映射规则"]
result.append(f"{temp1},换算规则:【{temp2}】")
else:
for item in ceshi["指标描述"]["指标映射"]:
temp0, temp1 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
if temp0 is None and temp1 is None:
pass
elif temp1 != None and temp0 is None:
if judge_exists(item, data):
temp0 = item
# temp0 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}】")
else:
continue
elif temp0 != None and temp1 != None:
if judge_exists(item, data):
# temp0, temp1 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}】")
else:
continue
return result
#
def extract_query_prefix_list(text_list):
import re
pattern = r'^.*?【[^】]*】'
return [re.search(pattern, s).group() for s in text_list if re.search(pattern, s)]
def extract_concrete_info(ceshi):
import re
keyword_list = []
rule_text = None
for item in ceshi:
# 提取关键词
keyword_match = re.search(r'查找一下【(.*?)】', item)
if keyword_match:
keyword_list.append(keyword_match.group(1))
# 提取多行规则,使用 DOTALL 模式使 . 匹配换行符
rule_match = re.search(r'换算规则:【(.*?)】', item, re.DOTALL)
if rule_match and rule_text is None:
rule_text = rule_match.group(1) # 只取第一个规则内容,假设所有项规则一致
merged = f"模糊查找一下【{''.join(keyword_list)}】,换算规则:【{rule_text}】"
return [merged]
def split_chinese_bracketed_phrases(text):
import re
# 使用正则匹配【...】结构和其前面的标识词
pattern = r'[^【]*?【[^】]*】'
matches = re.findall(pattern, text)
return [match.strip() for match in matches]
def pre_mapping2(keywords, data):
import re
# 提取关键字中中括号内的内容
match = re.search(r'【(.*?)】', keywords)
if not match:
return "未找到匹配的关键字"
extracted = match.group(1)
# 查找对应的指标项
ceshi = next((item for item in data if item["指标名称"] == extracted), None)
if not ceshi:
return "未找到对应的指标"
desc = ceshi.get("指标描述")
if isinstance(desc, str):
return f"测试:{desc}"
elif isinstance(desc, dict):
mapping_rule = desc.get("映射规则")
mappings = desc.get("指标映射", [])
# 单一映射规则为1时
if mapping_rule == "1":
return f"模糊查找一下【{mappings[0]}】"
# 仅有一个映射项
if len(mappings) == 1:
temp0, temp1 = find_target_item(mappings[0], data)
if "【" in temp0:
return f"{temp1},换算规则:【{temp0}】"
else:
return f"查找一下【{temp1}】,换算规则:【{temp0}】"
# 多个映射项
result = []
if judge_str(ceshi, data):
for ele in mappings:
item = next((d for d in data if d["指标名称"] == ele and isinstance(d["指标描述"], str)), None)
if item:
result.append(f"{item['指标描述']},换算规则:【{mapping_rule}】")
else:
for item_name in mappings:
temp0, temp1 = find_target_items(mapping_rule, item_name, data)
if temp0 is None and temp1 is None:
continue
if judge_exists(item_name, data):
if temp1 is None:
temp0 = item_name
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}】")
return result or "未匹配到有效的映射项"
else:
return "不支持的指标描述格式"