def extract_names_from_json(file_path): import json with open(file_path, 'r', encoding='utf-8') as file: data = json.load(file) # 确保数据是一个列表 if isinstance(data, list): names = [item.get("name") for item in data if "name" in item] return names else: raise ValueError("JSON 文件的格式应为包含对象的列表") # list 生成 问题类别 def define_suffix(input_str, fields): import re # 定义要检测的字段列表 # fields = [ # "xzwb", "bxqd2", "bpz17", "zwqd", "bwpw", "BJGX", "bt2", "BDQ3", "BT2", # "gec5", "BDY3", "dwg", "bwpwz", "BDD3", "bt1", "bphq18", "zwzj", "bczc2", # "BPQ", "BPY", "BDQ3", "SXZB23", "SXZ", "xzwb2", "bpz17", "bwpw", "BJGX", # "bt1", "bpz17", "SXZB23", ".SXZ", # ] # 构建正则表达式模式,匹配大小写不敏感且前面可能带有. # 去掉 \b 以允许字段是其他字符串的一部分 pattern = r'(?:\.?)(' + '|'.join(re.escape(field) for field in fields) + r')' # 使用 re.IGNORECASE 标志来忽略大小写 if re.search(pattern, input_str, re.IGNORECASE): return ['后缀名问题'] else: return ['未知'] def output_suffix(input_str, fields): import re pattern = r'(?:\.?)(' + '|'.join(re.escape(field) for field in fields) + r')' matches = re.findall(pattern, input_str, re.IGNORECASE) return matches[0] # str提取后缀名str def match_suffix(input_str, fields): import re pattern = r'(?:\.?)(' + '|'.join(re.escape(field) for field in fields) + r')' matches = re.findall(pattern, input_str, re.IGNORECASE) return matches[0] if matches else '未知' # 将pydantic的str类型转为model def str_to_pydantic(class_str): from pydantic import BaseModel, Field namespace = {} # exec(class_str, globals(), namespace) exec(class_str, {"BaseModel": BaseModel, "Field": Field}, namespace) return namespace[list(namespace.keys())[0]] # 将pydantic的str类型转为template的input str def parse_pydantic_fields(class_str): import ast tree = ast.parse(class_str) fields = {} for node in ast.walk(tree): if isinstance(node, ast.ClassDef): # 找到类定义 for body_item in node.body: if isinstance(body_item, ast.AnnAssign): # 找到字段定义 field_name = body_item.target.id fields[field_name] = '' # 设为空值,可改为空字符串 "" formatted_str = "\n".join([f"'{key}': ," for key in fields.keys()]) return formatted_str # 后缀名chains 后处理 def extract_values(json_str): import re # 使用正则匹配 JSON 中的值 matches = re.findall(r'"(?:一级意图|二级意图)"\s*:\s*"([^"]+)"', json_str) # 替换双引号为单引号 return [match.replace('"', "'") for match in matches] # 意图 槽位结构str返回 def check_and_return(intention_result, parser): key = intention_result[0] # 获取 `intention_result` 的第一个值 value_lv1 = None value_lv2 = None # 检查 `pydantic_dict_lv1` if key in parser.pydantic_dict_lv1: temp_value_lv1 = parser.pydantic_dict_lv1[key] if len(temp_value_lv1) >= 35: value_lv1 = temp_value_lv1 # 检查 `pydantic_dict_lv2` for item in parser.pydantic_dict_lv2: for i in item: intention_tuple = tuple(intention_result) if intention_tuple in i: temp_value_lv2 = i[intention_tuple] if len(temp_value_lv2) >= 35: value_lv2 = temp_value_lv2 return [value_lv1, value_lv2] """ --------------------------------------------- 其他工具 --------------------------------------------- """ def transform_json(input_file: str, output_file: str): import json """ 读取JSON文件并转换格式后保存 :param input_file: 输入的JSON文件路径 :param output_file: 输出的JSON文件路径 """ try: with open(input_file, 'r', encoding='utf-8') as f: data = json.load(f) transformed_data = [ { "name": item["name"], "synonymous": [], "description": { "software_name": item["software_name"], "works_category": item["works_category"] } } for item in data ] with open(output_file, 'w', encoding='utf-8') as f: json.dump(transformed_data, f, ensure_ascii=False, indent=2) print(f"转换完成,结果已保存至 {output_file}") except Exception as e: print(f"转换失败: {e}") def extract_required_values(data, result=None): """ 递归查找 JSON 数据中所有包含 "_required" 的键,并返回对应的值。 :param data: 输入的 JSON 数据(字典或列表) :param result: 结果列表(用于递归) :return: 包含所有符合条件值的列表 """ if result is None: result = [] if isinstance(data, dict): for key, value in data.items(): if "name_required" in key: result.append(value) if isinstance(value, (dict, list)): extract_required_values(value, result) elif isinstance(data, list): for item in data: extract_required_values(item, result) return result