import json import os import chardet def detect_encoding(file_path): """ 检测文件编码,优先考虑中文编码兼容性 """ with open(file_path, "rb") as f: raw_data = f.read() result = chardet.detect(raw_data) encoding = result["encoding"] print(f"初步检测编码: {encoding}") return encoding def convert_json_to_readable(input_folder, output_folder=None): """ 批量将文件夹下所有JSON文件转换为可读格式(UTF-8 + 缩进美化) 支持 GBK/GB2312/gb18030 等中文编码 """ if not os.path.exists(input_folder): print(f"错误:输入文件夹不存在: {input_folder}") return if output_folder is None: output_folder = input_folder os.makedirs(output_folder, exist_ok=True) json_files = [f for f in os.listdir(input_folder) if f.lower().endswith(".json")] if not json_files: print(f"警告:在 {input_folder} 中未找到任何 JSON 文件。") return processed_count = 0 failed_files = [] # 中文编码优先列表 chinese_encodings = ["utf-8", "gb18030", "gbk", "gb2312"] for filename in json_files: input_file = os.path.join(input_folder, filename) output_file = os.path.join(output_folder, filename) try: print(f"正在处理: {filename}") # 先尝试 chardet 检测 detected_encoding = detect_encoding(input_file) # 构建尝试的编码列表:检测结果 + 常见中文编码 encodings_to_try = [] if detected_encoding: encodings_to_try.append(detected_encoding.lower()) encodings_to_try.extend([enc for enc in chinese_encodings if enc.lower() != detected_encoding]) data = None content_str = None for enc in encodings_to_try: try: with open(input_file, "r", encoding=enc) as f: content_str = f.read() data = json.loads(content_str) print(f" 使用编码 '{enc}' 成功解码并解析 JSON") break except UnicodeDecodeError: print(f" 编码 '{enc}' 解码失败,尝试下一个...") continue except json.JSONDecodeError as je: print(f" 编码 '{enc}' 解码成功,但 JSON 格式错误: {je}") continue except Exception as e: print(f" 使用编码 '{enc}' 失败: {e}") continue if data is None: raise ValueError(f"所有编码尝试均失败: {encodings_to_try}") # 成功解析后,以标准 UTF-8 保存美化格式 with open(output_file, "w", encoding="utf-8") as f: json.dump(data, f, ensure_ascii=False, indent=4) print(f"✅ 转换成功: {output_file}") processed_count += 1 except Exception as e: print(f"❌ 转换失败: {filename} -> 错误: {str(e)}") failed_files.append(filename) print(f"\n--- 处理完成 ---") print(f"共处理 {len(json_files)} 个文件,成功 {processed_count} 个。") if failed_files: print(f"失败文件: {failed_files}") if __name__ == "__main__": input_folder = r"E:\文件\LLM_model\RAG\code\Engineering_data_KG-1\4、模版指标库\test" convert_json_to_readable(input_folder)