上传代码

This commit is contained in:
chentianrui
2025-09-08 17:58:02 +08:00
parent be848c3e78
commit f5f26c5cf8
76 changed files with 839039 additions and 2695 deletions
+89 -28
View File
@@ -1,41 +1,102 @@
import json
import os
import chardet
def convert_json_to_readable(input_file, output_file=None):
def detect_encoding(file_path):
"""
将JSON文件转换为可读格式并保存
参数:
input_file: 输入JSON文件路径
output_file: 输出文件路径,如果为None则自动生成
检测文件编码,优先考虑中文编码兼容性
"""
try:
# 读取JSON文件
with open(input_file, "r", encoding="utf-8") as f:
data = json.load(f)
with open(file_path, "rb") as f:
raw_data = f.read()
result = chardet.detect(raw_data)
encoding = result["encoding"]
print(f"初步检测编码: {encoding}")
return encoding
# 如果未指定输出文件,则自动生成
if output_file is None:
base_name = os.path.splitext(input_file)[0]
output_file = f"{base_name}_readable.json"
# 以美化格式写入新文件
with open(output_file, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
def convert_json_to_readable(input_folder, output_folder=None):
"""
批量将文件夹下所有JSON文件转换为可读格式(UTF-8 + 缩进美化)
支持 GBK/GB2312/gb18030 等中文编码
"""
if not os.path.exists(input_folder):
print(f"错误:输入文件夹不存在: {input_folder}")
return
print(f"转换成功!可读格式的文件已保存为: {output_file}")
return output_file
except Exception as e:
print(f"转换过程中出现错误: {str(e)}")
return None
if output_folder is None:
output_folder = input_folder
os.makedirs(output_folder, exist_ok=True)
json_files = [f for f in os.listdir(input_folder) if f.lower().endswith(".json")]
if not json_files:
print(f"警告:在 {input_folder} 中未找到任何 JSON 文件。")
return
processed_count = 0
failed_files = []
# 中文编码优先列表
chinese_encodings = ["utf-8", "gb18030", "gbk", "gb2312"]
for filename in json_files:
input_file = os.path.join(input_folder, filename)
output_file = os.path.join(output_folder, filename)
try:
print(f"正在处理: {filename}")
# 先尝试 chardet 检测
detected_encoding = detect_encoding(input_file)
# 构建尝试的编码列表:检测结果 + 常见中文编码
encodings_to_try = []
if detected_encoding:
encodings_to_try.append(detected_encoding.lower())
encodings_to_try.extend([enc for enc in chinese_encodings if enc.lower() != detected_encoding])
data = None
content_str = None
for enc in encodings_to_try:
try:
with open(input_file, "r", encoding=enc) as f:
content_str = f.read()
data = json.loads(content_str)
print(f" 使用编码 '{enc}' 成功解码并解析 JSON")
break
except UnicodeDecodeError:
print(f" 编码 '{enc}' 解码失败,尝试下一个...")
continue
except json.JSONDecodeError as je:
print(f" 编码 '{enc}' 解码成功,但 JSON 格式错误: {je}")
continue
except Exception as e:
print(f" 使用编码 '{enc}' 失败: {e}")
continue
if data is None:
raise ValueError(f"所有编码尝试均失败: {encodings_to_try}")
# 成功解析后,以标准 UTF-8 保存美化格式
with open(output_file, "w", encoding="utf-8") as f:
json.dump(data, f, ensure_ascii=False, indent=4)
print(f"✅ 转换成功: {output_file}")
processed_count += 1
except Exception as e:
print(f"❌ 转换失败: {filename} -> 错误: {str(e)}")
failed_files.append(filename)
print(f"\n--- 处理完成 ---")
print(f"共处理 {len(json_files)} 个文件,成功 {processed_count} 个。")
if failed_files:
print(f"失败文件: {failed_files}")
if __name__ == "__main__":
# 指定输入文件路径
input_file = (
r"E:/文件/LLM_model/RAG/code/Engineering_data_KG-1/equipment_dataset/数据工程/技改/预算/通信线路检修国网.json"
)
input_folder = r"project2json/outputs/json"
# 调用转换函数
convert_json_to_readable(input_file)
convert_json_to_readable(input_folder)