上传文件至 kg_lab_6.13
6.17 更新对检索工程数据复杂表达式的能力
This commit is contained in:
+29
-35
@@ -112,7 +112,7 @@ def pre_mapping(keywords, data):
|
||||
if judge_exists(item, data):
|
||||
temp0 = item
|
||||
# temp0 = find_target_items(ceshi["指标描述"]["映射规则"], item, data)
|
||||
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}】,233")
|
||||
result.append(f"模糊查找一下【{temp0}】,换算规则:【{temp1}】")
|
||||
else:
|
||||
continue
|
||||
|
||||
@@ -125,45 +125,39 @@ def pre_mapping(keywords, data):
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def extract_concrete_info(outputs):
|
||||
#
|
||||
def extract_query_prefix_list(text_list):
|
||||
import re
|
||||
from typing import List
|
||||
pattern = r'^.*?【[^】]*】'
|
||||
return [re.search(pattern, s).group() for s in text_list if re.search(pattern, s)]
|
||||
|
||||
"""
|
||||
从多个句子中提取第一个“【】”作为查找信息,最后一个“【】”作为换算规则,
|
||||
返回格式为:[合并的查找句子, 换算规则]
|
||||
"""
|
||||
prefixes = []
|
||||
suffix = ''
|
||||
|
||||
for item in outputs:
|
||||
matches = re.findall(r'【([^】]+)】', item)
|
||||
if len(matches) >= 2:
|
||||
prefixes.append(f"查找一下【{matches[0]}】")
|
||||
# 假设所有换算规则一致,取第一个即可
|
||||
if not suffix:
|
||||
suffix = f'换算规则:【{matches[-1]}】'
|
||||
|
||||
if not prefixes or not suffix:
|
||||
return []
|
||||
|
||||
return ['; '.join(prefixes), suffix]
|
||||
|
||||
|
||||
def extract_query_prefix_list(input_list):
|
||||
def extract_concrete_info(ceshi):
|
||||
import re
|
||||
"""
|
||||
输入一个字符串列表,提取每个字符串中符合格式的前缀内容(例如:'查找一下【样式】')
|
||||
keyword_list = []
|
||||
rule_text = None
|
||||
|
||||
for item in ceshi:
|
||||
# 提取关键词
|
||||
keyword_match = re.search(r'查找一下【(.*?)】', item)
|
||||
if keyword_match:
|
||||
keyword_list.append(keyword_match.group(1))
|
||||
|
||||
# 提取多行规则,使用 DOTALL 模式使 . 匹配换行符
|
||||
rule_match = re.search(r'换算规则:【(.*?)】', item, re.DOTALL)
|
||||
if rule_match and rule_text is None:
|
||||
rule_text = rule_match.group(1) # 只取第一个规则内容,假设所有项规则一致
|
||||
|
||||
merged = f"模糊查找一下【{';'.join(keyword_list)}】,换算规则:【{rule_text}】"
|
||||
return [merged]
|
||||
|
||||
def split_chinese_bracketed_phrases(text):
|
||||
import re
|
||||
# 使用正则匹配【...】结构和其前面的标识词
|
||||
pattern = r'[^【]*?【[^】]*】'
|
||||
matches = re.findall(pattern, text)
|
||||
return [match.strip() for match in matches]
|
||||
|
||||
参数:
|
||||
input_list (list[str]): 包含描述性语句的字符串列表
|
||||
|
||||
返回:
|
||||
list[str]: 提取出的前缀部分列表(如 '查找一下【大板式】')
|
||||
"""
|
||||
pattern = r'(查找一下【[^】]+】)'
|
||||
return [re.match(pattern, text).group(1) for text in input_list if re.match(pattern, text)]
|
||||
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user