fad7c5de4a
6.17 更新对检索工程数据复杂表达式的能力
228 lines
7.8 KiB
Python
228 lines
7.8 KiB
Python
def Dictionary_content_mapping(input_str, data, key="指标名称"):
|
||
import re
|
||
|
||
match = re.search(r'【(.*?)】', input_str)
|
||
|
||
if match:
|
||
extracted = match.group(1)
|
||
else:
|
||
return None # 如果没有匹配到,提前返回
|
||
|
||
for i in range(len(data)):
|
||
if data[i].get(key) == extracted:
|
||
return data[i]
|
||
|
||
return None
|
||
|
||
def find_target_item(input_str, data):
|
||
result = [None, None]
|
||
temp = None
|
||
for item in data:
|
||
if isinstance(item["指标描述"], dict):
|
||
if item["指标描述"]["指标映射"][0] == input_str:
|
||
result[0] = item["指标描述"]["映射规则"]
|
||
temp = item["指标描述"]["指标映射"][0]
|
||
|
||
for item in data:
|
||
if item["指标名称"] == temp:
|
||
result[1] = item['指标描述']
|
||
return result
|
||
|
||
def find_target_items(ele, input_str, data):
|
||
result = [None, ele]
|
||
for item in data:
|
||
if isinstance(item["指标描述"], dict):
|
||
if item["指标名称"] == input_str or item["指标描述"]["指标映射"][0] == input_str:
|
||
if len(item["指标描述"]["指标映射"]) == 1:
|
||
result[0] = item["指标描述"]["指标映射"][0]
|
||
return result
|
||
|
||
|
||
|
||
def is_yx_prefix(s):
|
||
import re
|
||
# 使用正则表达式检测是否以 'YX' 开头(可后跟任意字符)
|
||
return bool(re.match(r"^YX.*", s))
|
||
|
||
|
||
def judge_exists(input_str, data):
|
||
if is_yx_prefix(input_str):
|
||
return True
|
||
for item in data:
|
||
if isinstance(item["指标描述"], dict):
|
||
if item["指标名称"] == input_str or item["指标描述"]["指标映射"][0] == input_str:
|
||
return True
|
||
return False
|
||
|
||
def judge_str(ceshi, data):
|
||
for item in data:
|
||
if isinstance(item["指标描述"], str):
|
||
if item["指标名称"] == ceshi["指标描述"]["指标映射"][0]:
|
||
return True
|
||
return False
|
||
|
||
def pre_mapping(keywords, data):
|
||
import re
|
||
|
||
match = re.search(r'【(.*?)】', keywords)
|
||
|
||
if match:
|
||
extracted = match.group(1)
|
||
|
||
for i in range(len(data)):
|
||
if data[i]["指标名称"] == extracted:
|
||
ceshi = data[i]
|
||
break
|
||
|
||
if isinstance(ceshi["指标描述"], str):
|
||
return ceshi["指标描述"]
|
||
|
||
elif isinstance(ceshi["指标描述"], dict):
|
||
if ceshi["指标描述"]["映射规则"] == "1":
|
||
temp = ceshi["指标描述"]["指标映射"][0]
|
||
return f"模糊查找一下【{temp}】"
|
||
else:
|
||
if len(ceshi["指标描述"]["指标映射"]) == 1:
|
||
temp0, temp1 = find_target_item(ceshi["指标描述"]["指标映射"][0], data)
|
||
if "【" in temp0:
|
||
return f"{temp1},换算规则:【{temp0}】"
|
||
else:
|
||
return f"查找一下【{temp1}】,换算规则:【{temp0}】"
|
||
|
||
elif len(ceshi["指标描述"]["指标映射"]) > 1:
|
||
|
||
|
||
result = []
|
||
if judge_str(ceshi, data) == True:
|
||
for ele in ceshi["指标描述"]["指标映射"]:
|
||
for item in data:
|
||
if isinstance(item["指标描述"], str) and item["指标名称"] == ele:
|
||
temp1 = item["指标描述"]
|
||
temp2 = ceshi["指标描述"]["映射规则"]
|
||
result.append(f"{temp1},换算规则:【{temp2}】")
|
||
|
||
else:
|
||
|
||
for item in ceshi["指标描述"]["指标映射"]:
|
||
temp0, temp1 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
|
||
|
||
if temp0 is None and temp1 is None:
|
||
pass
|
||
elif temp1 != None and temp0 is None:
|
||
if judge_exists(item, data):
|
||
temp0 = item
|
||
# temp0 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
|
||
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}】")
|
||
else:
|
||
continue
|
||
|
||
elif temp0 != None and temp1 != None:
|
||
if judge_exists(item, data):
|
||
# temp0, temp1 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
|
||
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}】")
|
||
else:
|
||
continue
|
||
|
||
return result
|
||
|
||
#
|
||
def extract_query_prefix_list(text_list):
|
||
import re
|
||
pattern = r'^.*?【[^】]*】'
|
||
return [re.search(pattern, s).group() for s in text_list if re.search(pattern, s)]
|
||
|
||
def extract_concrete_info(ceshi):
|
||
import re
|
||
keyword_list = []
|
||
rule_text = None
|
||
|
||
for item in ceshi:
|
||
# 提取关键词
|
||
keyword_match = re.search(r'查找一下【(.*?)】', item)
|
||
if keyword_match:
|
||
keyword_list.append(keyword_match.group(1))
|
||
|
||
# 提取多行规则,使用 DOTALL 模式使 . 匹配换行符
|
||
rule_match = re.search(r'换算规则:【(.*?)】', item, re.DOTALL)
|
||
if rule_match and rule_text is None:
|
||
rule_text = rule_match.group(1) # 只取第一个规则内容,假设所有项规则一致
|
||
|
||
merged = f"模糊查找一下【{';'.join(keyword_list)}】,换算规则:【{rule_text}】"
|
||
return [merged]
|
||
|
||
def split_chinese_bracketed_phrases(text):
|
||
import re
|
||
# 使用正则匹配【...】结构和其前面的标识词
|
||
pattern = r'[^【]*?【[^】]*】'
|
||
matches = re.findall(pattern, text)
|
||
return [match.strip() for match in matches]
|
||
|
||
|
||
|
||
|
||
|
||
def pre_mapping2(keywords, data):
|
||
import re
|
||
|
||
# 提取关键字中中括号内的内容
|
||
match = re.search(r'【(.*?)】', keywords)
|
||
if not match:
|
||
return "未找到匹配的关键字"
|
||
|
||
extracted = match.group(1)
|
||
|
||
# 查找对应的指标项
|
||
ceshi = next((item for item in data if item["指标名称"] == extracted), None)
|
||
if not ceshi:
|
||
return "未找到对应的指标"
|
||
|
||
desc = ceshi.get("指标描述")
|
||
|
||
if isinstance(desc, str):
|
||
return f"测试:{desc}"
|
||
|
||
elif isinstance(desc, dict):
|
||
mapping_rule = desc.get("映射规则")
|
||
mappings = desc.get("指标映射", [])
|
||
|
||
# 单一映射规则为1时
|
||
if mapping_rule == "1":
|
||
return f"模糊查找一下【{mappings[0]}】"
|
||
|
||
# 仅有一个映射项
|
||
if len(mappings) == 1:
|
||
temp0, temp1 = find_target_item(mappings[0], data)
|
||
if "【" in temp0:
|
||
return f"{temp1},换算规则:【{temp0}】"
|
||
else:
|
||
return f"查找一下【{temp1}】,换算规则:【{temp0}】"
|
||
|
||
|
||
# 多个映射项
|
||
result = []
|
||
if judge_str(ceshi, data):
|
||
for ele in mappings:
|
||
item = next((d for d in data if d["指标名称"] == ele and isinstance(d["指标描述"], str)), None)
|
||
if item:
|
||
result.append(f"{item['指标描述']},换算规则:【{mapping_rule}】")
|
||
else:
|
||
for item_name in mappings:
|
||
temp0, temp1 = find_target_items(mapping_rule, item_name, data)
|
||
|
||
if temp0 is None and temp1 is None:
|
||
continue
|
||
|
||
if judge_exists(item_name, data):
|
||
if temp1 is None:
|
||
temp0 = item_name
|
||
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}】")
|
||
|
||
return result or "未匹配到有效的映射项"
|
||
|
||
else:
|
||
return "不支持的指标描述格式"
|
||
|
||
|
||
|
||
|
||
|