6.13提交,语义处理测试

This commit is contained in:
Zdao032
2025-06-13 10:11:42 +08:00
parent 026f9bd70c
commit 89b0154217
11 changed files with 2481 additions and 0 deletions
+221
View File
@@ -0,0 +1,221 @@
def Dictionary_content_mapping(input_str, data, key="指标名称"):
import re
match = re.search(r'【(.*?)】', input_str)
if match:
extracted = match.group(1)
else:
return None # 如果没有匹配到,提前返回
for i in range(len(data)):
if data[i].get(key) == extracted:
return data[i]
return None
def find_target_item(input_str, data):
result = [None, None]
temp = None
for item in data:
if isinstance(item["指标描述"], dict):
if item["指标描述"]["指标映射"][0] == input_str:
result[0] = item["指标描述"]["映射规则"]
temp = item["指标描述"]["指标映射"][0]
for item in data:
if item["指标名称"] == temp:
result[1] = item['指标描述']
return result
def find_target_items(ele, input_str, data):
result = [None, ele]
for item in data:
if isinstance(item["指标描述"], dict):
if item["指标名称"] == input_str or item["指标描述"]["指标映射"][0] == input_str:
if len(item["指标描述"]["指标映射"]) == 1:
result[0] = item["指标描述"]["指标映射"][0]
return result
def judge_exists(input_str, data):
for item in data:
if isinstance(item["指标描述"], dict):
if item["指标名称"] == input_str or item["指标描述"]["指标映射"][0] == input_str:
return True
return False
def judge_str(ceshi, data):
for item in data:
if isinstance(item["指标描述"], str):
if item["指标名称"] == ceshi["指标描述"]["指标映射"][0]:
return True
return False
def pre_mapping(keywords, data):
import re
match = re.search(r'【(.*?)】', keywords)
if match:
extracted = match.group(1)
for i in range(len(data)):
if data[i]["指标名称"] == extracted:
ceshi = data[i]
break
if isinstance(ceshi["指标描述"], str):
return ceshi["指标描述"]
elif isinstance(ceshi["指标描述"], dict):
if ceshi["指标描述"]["映射规则"] == "1":
temp = ceshi["指标描述"]["指标映射"][0]
return f"模糊查找一下【{temp}"
else:
if len(ceshi["指标描述"]["指标映射"]) == 1:
temp0, temp1 = find_target_item(ceshi["指标描述"]["指标映射"][0], data)
return f"{temp1},换算规则:【{temp0}"
elif len(ceshi["指标描述"]["指标映射"]) > 1:
result = []
if judge_str(ceshi, data) == True:
for ele in ceshi["指标描述"]["指标映射"]:
for item in data:
if isinstance(item["指标描述"], str) and item["指标名称"] == ele:
temp1 = item["指标描述"]
temp2 = ceshi["指标描述"]["映射规则"]
result.append(f"{temp1},换算规则:【{temp2}")
else:
for item in ceshi["指标描述"]["指标映射"]:
temp0, temp1 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
if temp0 is None and temp1 is None:
pass
elif temp0 != None and temp1 is None:
if judge_exists(item, data):
temp1 = item
# temp0 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}")
else:
continue
elif temp0 != None and temp1 != None:
if judge_exists(item, data):
# temp0, temp1 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}")
else:
continue
return result
def extract_concrete_info(outputs):
import re
from typing import List
"""
从多个句子中提取第一个“【】”作为查找信息,最后一个“【】”作为换算规则,
返回格式为:[合并的查找句子, 换算规则]
"""
prefixes = []
suffix = ''
for item in outputs:
matches = re.findall(r'【([^】]+)】', item)
if len(matches) >= 2:
prefixes.append(f"查找一下【{matches[0]}")
# 假设所有换算规则一致,取第一个即可
if not suffix:
suffix = f'换算规则:【{matches[-1]}'
if not prefixes or not suffix:
return []
return ['; '.join(prefixes), suffix]
def extract_query_prefix_list(input_list):
import re
"""
输入一个字符串列表,提取每个字符串中符合格式的前缀内容(例如:'查找一下【样式】'
参数:
input_list (list[str]): 包含描述性语句的字符串列表
返回:
list[str]: 提取出的前缀部分列表(如 '查找一下【大板式】'
"""
pattern = r'(查找一下【[^】]+】)'
return [re.match(pattern, text).group(1) for text in input_list if re.match(pattern, text)]
def pre_mapping2(keywords, data):
import re
# 提取关键字中中括号内的内容
match = re.search(r'【(.*?)】', keywords)
if not match:
return "未找到匹配的关键字"
extracted = match.group(1)
# 查找对应的指标项
ceshi = next((item for item in data if item["指标名称"] == extracted), None)
if not ceshi:
return "未找到对应的指标"
desc = ceshi.get("指标描述")
if isinstance(desc, str):
return f"测试:{desc}"
elif isinstance(desc, dict):
mapping_rule = desc.get("映射规则")
mappings = desc.get("指标映射", [])
# 单一映射规则为1时
if mapping_rule == "1":
return f"模糊查找一下【{mappings[0]}"
# 仅有一个映射项
if len(mappings) == 1:
temp0, temp1 = find_target_item(mappings[0], data)
if "" in temp0:
return f"{temp1},换算规则:【{temp0}"
else:
return f"查找一下【{temp1}】,换算规则:【{temp0}"
# 多个映射项
result = []
if judge_str(ceshi, data):
for ele in mappings:
item = next((d for d in data if d["指标名称"] == ele and isinstance(d["指标描述"], str)), None)
if item:
result.append(f"{item['指标描述']},换算规则:【{mapping_rule}")
else:
for item_name in mappings:
temp0, temp1 = find_target_items(mapping_rule, item_name, data)
if temp0 is None and temp1 is None:
continue
if judge_exists(item_name, data):
if temp1 is None:
temp1 = item_name
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}")
return result or "未匹配到有效的映射项"
else:
return "不支持的指标描述格式"