103 lines
3.4 KiB
Python
103 lines
3.4 KiB
Python
import json
|
|
import os
|
|
import chardet
|
|
|
|
|
|
def detect_encoding(file_path):
|
|
"""
|
|
检测文件编码,优先考虑中文编码兼容性
|
|
"""
|
|
with open(file_path, "rb") as f:
|
|
raw_data = f.read()
|
|
result = chardet.detect(raw_data)
|
|
encoding = result["encoding"]
|
|
print(f"初步检测编码: {encoding}")
|
|
return encoding
|
|
|
|
|
|
def convert_json_to_readable(input_folder, output_folder=None):
|
|
"""
|
|
批量将文件夹下所有JSON文件转换为可读格式(UTF-8 + 缩进美化)
|
|
支持 GBK/GB2312/gb18030 等中文编码
|
|
"""
|
|
if not os.path.exists(input_folder):
|
|
print(f"错误:输入文件夹不存在: {input_folder}")
|
|
return
|
|
|
|
if output_folder is None:
|
|
output_folder = input_folder
|
|
os.makedirs(output_folder, exist_ok=True)
|
|
|
|
json_files = [f for f in os.listdir(input_folder) if f.lower().endswith(".json")]
|
|
|
|
if not json_files:
|
|
print(f"警告:在 {input_folder} 中未找到任何 JSON 文件。")
|
|
return
|
|
|
|
processed_count = 0
|
|
failed_files = []
|
|
|
|
# 中文编码优先列表
|
|
chinese_encodings = ["utf-8", "gb18030", "gbk", "gb2312"]
|
|
|
|
for filename in json_files:
|
|
input_file = os.path.join(input_folder, filename)
|
|
output_file = os.path.join(output_folder, filename)
|
|
|
|
try:
|
|
print(f"正在处理: {filename}")
|
|
|
|
# 先尝试 chardet 检测
|
|
detected_encoding = detect_encoding(input_file)
|
|
|
|
# 构建尝试的编码列表:检测结果 + 常见中文编码
|
|
encodings_to_try = []
|
|
if detected_encoding:
|
|
encodings_to_try.append(detected_encoding.lower())
|
|
encodings_to_try.extend([enc for enc in chinese_encodings if enc.lower() != detected_encoding])
|
|
|
|
data = None
|
|
content_str = None
|
|
|
|
for enc in encodings_to_try:
|
|
try:
|
|
with open(input_file, "r", encoding=enc) as f:
|
|
content_str = f.read()
|
|
data = json.loads(content_str)
|
|
print(f" 使用编码 '{enc}' 成功解码并解析 JSON")
|
|
break
|
|
except UnicodeDecodeError:
|
|
print(f" 编码 '{enc}' 解码失败,尝试下一个...")
|
|
continue
|
|
except json.JSONDecodeError as je:
|
|
print(f" 编码 '{enc}' 解码成功,但 JSON 格式错误: {je}")
|
|
continue
|
|
except Exception as e:
|
|
print(f" 使用编码 '{enc}' 失败: {e}")
|
|
continue
|
|
|
|
if data is None:
|
|
raise ValueError(f"所有编码尝试均失败: {encodings_to_try}")
|
|
|
|
# 成功解析后,以标准 UTF-8 保存美化格式
|
|
with open(output_file, "w", encoding="utf-8") as f:
|
|
json.dump(data, f, ensure_ascii=False, indent=4)
|
|
|
|
print(f"✅ 转换成功: {output_file}")
|
|
processed_count += 1
|
|
|
|
except Exception as e:
|
|
print(f"❌ 转换失败: {filename} -> 错误: {str(e)}")
|
|
failed_files.append(filename)
|
|
|
|
print(f"\n--- 处理完成 ---")
|
|
print(f"共处理 {len(json_files)} 个文件,成功 {processed_count} 个。")
|
|
if failed_files:
|
|
print(f"失败文件: {failed_files}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
input_folder = r"project2json/outputs/json"
|
|
|
|
convert_json_to_readable(input_folder)
|