首次提交:上传本地文件夹

This commit is contained in:
ruxia
2025-03-31 17:28:23 +08:00
commit 0de349447c
439 changed files with 36643 additions and 0 deletions
+167
View File
@@ -0,0 +1,167 @@
def extract_names_from_json(file_path):
import json
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
# 确保数据是一个列表
if isinstance(data, list):
names = [item.get("name") for item in data if "name" in item]
return names
else:
raise ValueError("JSON 文件的格式应为包含对象的列表")
# list 生成 问题类别
def define_suffix(input_str, fields):
import re
# 定义要检测的字段列表
# fields = [
# "xzwb", "bxqd2", "bpz17", "zwqd", "bwpw", "BJGX", "bt2", "BDQ3", "BT2",
# "gec5", "BDY3", "dwg", "bwpwz", "BDD3", "bt1", "bphq18", "zwzj", "bczc2",
# "BPQ", "BPY", "BDQ3", "SXZB23", "SXZ", "xzwb2", "bpz17", "bwpw", "BJGX",
# "bt1", "bpz17", "SXZB23", ".SXZ",
# ]
# 构建正则表达式模式,匹配大小写不敏感且前面可能带有.
# 去掉 \b 以允许字段是其他字符串的一部分
pattern = r'(?:\.?)(' + '|'.join(re.escape(field) for field in fields) + r')'
# 使用 re.IGNORECASE 标志来忽略大小写
if re.search(pattern, input_str, re.IGNORECASE):
return ['后缀名问题']
else:
return ['未知']
def output_suffix(input_str, fields):
import re
pattern = r'(?:\.?)(' + '|'.join(re.escape(field) for field in fields) + r')'
matches = re.findall(pattern, input_str, re.IGNORECASE)
return matches[0]
# str提取后缀名str
def match_suffix(input_str, fields):
import re
pattern = r'(?:\.?)(' + '|'.join(re.escape(field) for field in fields) + r')'
matches = re.findall(pattern, input_str, re.IGNORECASE)
return matches[0] if matches else '未知'
# 将pydantic的str类型转为model
def str_to_pydantic(class_str):
from pydantic import BaseModel, Field
namespace = {}
# exec(class_str, globals(), namespace)
exec(class_str, {"BaseModel": BaseModel, "Field": Field}, namespace)
return namespace[list(namespace.keys())[0]]
# 将pydantic的str类型转为template的input str
def parse_pydantic_fields(class_str):
import ast
tree = ast.parse(class_str)
fields = {}
for node in ast.walk(tree):
if isinstance(node, ast.ClassDef): # 找到类定义
for body_item in node.body:
if isinstance(body_item, ast.AnnAssign): # 找到字段定义
field_name = body_item.target.id
fields[field_name] = '' # 设为空值,可改为空字符串 ""
formatted_str = "\n".join([f"'{key}': ," for key in fields.keys()])
return formatted_str
# 后缀名chains 后处理
def extract_values(json_str):
import re
# 使用正则匹配 JSON 中的值
matches = re.findall(r'"(?:一级意图|二级意图)"\s*:\s*"([^"]+)"', json_str)
# 替换双引号为单引号
return [match.replace('"', "'") for match in matches]
# 意图 槽位结构str返回
def check_and_return(intention_result, parser):
key = intention_result[0] # 获取 `intention_result` 的第一个值
value_lv1 = None
value_lv2 = None
# 检查 `pydantic_dict_lv1`
if key in parser.pydantic_dict_lv1:
temp_value_lv1 = parser.pydantic_dict_lv1[key]
if len(temp_value_lv1) >= 35:
value_lv1 = temp_value_lv1
# 检查 `pydantic_dict_lv2`
for item in parser.pydantic_dict_lv2:
for i in item:
intention_tuple = tuple(intention_result)
if intention_tuple in i:
temp_value_lv2 = i[intention_tuple]
if len(temp_value_lv2) >= 35:
value_lv2 = temp_value_lv2
return [value_lv1, value_lv2]
""" --------------------------------------------- 其他工具 --------------------------------------------- """
def transform_json(input_file: str, output_file: str):
import json
"""
读取JSON文件并转换格式后保存
:param input_file: 输入的JSON文件路径
:param output_file: 输出的JSON文件路径
"""
try:
with open(input_file, 'r', encoding='utf-8') as f:
data = json.load(f)
transformed_data = [
{
"name": item["name"],
"synonymous": [],
"description": {
"software_name": item["software_name"],
"works_category": item["works_category"]
}
} for item in data
]
with open(output_file, 'w', encoding='utf-8') as f:
json.dump(transformed_data, f, ensure_ascii=False, indent=2)
print(f"转换完成,结果已保存至 {output_file}")
except Exception as e:
print(f"转换失败: {e}")
def extract_required_values(data, result=None):
"""
递归查找 JSON 数据中所有包含 "_required" 的键,并返回对应的值。
:param data: 输入的 JSON 数据(字典或列表)
:param result: 结果列表(用于递归)
:return: 包含所有符合条件值的列表
"""
if result is None:
result = []
if isinstance(data, dict):
for key, value in data.items():
if "name_required" in key:
result.append(value)
if isinstance(value, (dict, list)):
extract_required_values(value, result)
elif isinstance(data, list):
for item in data:
extract_required_values(item, result)
return result