167 lines
5.3 KiB
Python
167 lines
5.3 KiB
Python
def extract_names_from_json(file_path):
|
|
import json
|
|
|
|
with open(file_path, 'r', encoding='utf-8') as file:
|
|
data = json.load(file)
|
|
|
|
# 确保数据是一个列表
|
|
if isinstance(data, list):
|
|
names = [item.get("name") for item in data if "name" in item]
|
|
return names
|
|
else:
|
|
raise ValueError("JSON 文件的格式应为包含对象的列表")
|
|
|
|
# list 生成 问题类别
|
|
def define_suffix(input_str, fields):
|
|
import re
|
|
# 定义要检测的字段列表
|
|
# fields = [
|
|
# "xzwb", "bxqd2", "bpz17", "zwqd", "bwpw", "BJGX", "bt2", "BDQ3", "BT2",
|
|
# "gec5", "BDY3", "dwg", "bwpwz", "BDD3", "bt1", "bphq18", "zwzj", "bczc2",
|
|
# "BPQ", "BPY", "BDQ3", "SXZB23", "SXZ", "xzwb2", "bpz17", "bwpw", "BJGX",
|
|
# "bt1", "bpz17", "SXZB23", ".SXZ",
|
|
# ]
|
|
|
|
# 构建正则表达式模式,匹配大小写不敏感且前面可能带有.
|
|
# 去掉 \b 以允许字段是其他字符串的一部分
|
|
pattern = r'(?:\.?)(' + '|'.join(re.escape(field) for field in fields) + r')'
|
|
|
|
# 使用 re.IGNORECASE 标志来忽略大小写
|
|
if re.search(pattern, input_str, re.IGNORECASE):
|
|
return ['后缀名问题']
|
|
else:
|
|
return ['未知']
|
|
|
|
def output_suffix(input_str, fields):
|
|
import re
|
|
pattern = r'(?:\.?)(' + '|'.join(re.escape(field) for field in fields) + r')'
|
|
|
|
matches = re.findall(pattern, input_str, re.IGNORECASE)
|
|
|
|
return matches[0]
|
|
|
|
# str提取后缀名str
|
|
def match_suffix(input_str, fields):
|
|
import re
|
|
|
|
pattern = r'(?:\.?)(' + '|'.join(re.escape(field) for field in fields) + r')'
|
|
|
|
matches = re.findall(pattern, input_str, re.IGNORECASE)
|
|
|
|
return matches[0] if matches else '未知'
|
|
|
|
# 将pydantic的str类型转为model
|
|
def str_to_pydantic(class_str):
|
|
from pydantic import BaseModel, Field
|
|
|
|
namespace = {}
|
|
# exec(class_str, globals(), namespace)
|
|
exec(class_str, {"BaseModel": BaseModel, "Field": Field}, namespace)
|
|
return namespace[list(namespace.keys())[0]]
|
|
|
|
# 将pydantic的str类型转为template的input str
|
|
def parse_pydantic_fields(class_str):
|
|
import ast
|
|
|
|
tree = ast.parse(class_str)
|
|
fields = {}
|
|
|
|
for node in ast.walk(tree):
|
|
if isinstance(node, ast.ClassDef): # 找到类定义
|
|
for body_item in node.body:
|
|
if isinstance(body_item, ast.AnnAssign): # 找到字段定义
|
|
field_name = body_item.target.id
|
|
fields[field_name] = '' # 设为空值,可改为空字符串 ""
|
|
formatted_str = "\n".join([f"'{key}': ," for key in fields.keys()])
|
|
|
|
return formatted_str
|
|
|
|
# 后缀名chains 后处理
|
|
def extract_values(json_str):
|
|
import re
|
|
# 使用正则匹配 JSON 中的值
|
|
matches = re.findall(r'"(?:一级意图|二级意图)"\s*:\s*"([^"]+)"', json_str)
|
|
|
|
# 替换双引号为单引号
|
|
return [match.replace('"', "'") for match in matches]
|
|
|
|
# 意图 槽位结构str返回
|
|
def check_and_return(intention_result, parser):
|
|
key = intention_result[0] # 获取 `intention_result` 的第一个值
|
|
value_lv1 = None
|
|
value_lv2 = None
|
|
|
|
# 检查 `pydantic_dict_lv1`
|
|
if key in parser.pydantic_dict_lv1:
|
|
temp_value_lv1 = parser.pydantic_dict_lv1[key]
|
|
if len(temp_value_lv1) >= 35:
|
|
value_lv1 = temp_value_lv1
|
|
|
|
# 检查 `pydantic_dict_lv2`
|
|
for item in parser.pydantic_dict_lv2:
|
|
for i in item:
|
|
intention_tuple = tuple(intention_result)
|
|
if intention_tuple in i:
|
|
temp_value_lv2 = i[intention_tuple]
|
|
if len(temp_value_lv2) >= 35:
|
|
value_lv2 = temp_value_lv2
|
|
|
|
return [value_lv1, value_lv2]
|
|
|
|
|
|
|
|
|
|
""" --------------------------------------------- 其他工具 --------------------------------------------- """
|
|
|
|
def transform_json(input_file: str, output_file: str):
|
|
import json
|
|
|
|
"""
|
|
读取JSON文件并转换格式后保存
|
|
:param input_file: 输入的JSON文件路径
|
|
:param output_file: 输出的JSON文件路径
|
|
"""
|
|
try:
|
|
with open(input_file, 'r', encoding='utf-8') as f:
|
|
data = json.load(f)
|
|
|
|
transformed_data = [
|
|
{
|
|
"name": item["name"],
|
|
"synonymous": [],
|
|
"description": {
|
|
"software_name": item["software_name"],
|
|
"works_category": item["works_category"]
|
|
}
|
|
} for item in data
|
|
]
|
|
|
|
with open(output_file, 'w', encoding='utf-8') as f:
|
|
json.dump(transformed_data, f, ensure_ascii=False, indent=2)
|
|
|
|
print(f"转换完成,结果已保存至 {output_file}")
|
|
except Exception as e:
|
|
print(f"转换失败: {e}")
|
|
|
|
|
|
def extract_required_values(data, result=None):
|
|
"""
|
|
递归查找 JSON 数据中所有包含 "_required" 的键,并返回对应的值。
|
|
:param data: 输入的 JSON 数据(字典或列表)
|
|
:param result: 结果列表(用于递归)
|
|
:return: 包含所有符合条件值的列表
|
|
"""
|
|
if result is None:
|
|
result = []
|
|
|
|
if isinstance(data, dict):
|
|
for key, value in data.items():
|
|
if "name_required" in key:
|
|
result.append(value)
|
|
if isinstance(value, (dict, list)):
|
|
extract_required_values(value, result)
|
|
elif isinstance(data, list):
|
|
for item in data:
|
|
extract_required_values(item, result)
|
|
|
|
return result |