Files
QueryRewrite/rag2_0/demo/vectorize_save_noun.py
T

48 lines
1.3 KiB
Python
Executable File

#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
File: vectorize_save_noun.py
Date: 2025-05-15
Description: 专业名词向量化和保存的示例程序
"""
import os
import json
from dotenv import load_dotenv
import sys
from rag2_0.intent_recognition import ProfessionalNounVectorizer
import logging
# 加载环境变量
load_dotenv()
def main():
"""
主函数:创建索引并保存
"""
# 指定文件路径
current_dir = os.path.dirname(os.path.abspath(__file__))
output_dir = os.path.join(current_dir, "..", "..", "data", "nouns")
# 创建向量化器并指定路径
noun_vectorizer = ProfessionalNounVectorizer(
output_dir=output_dir
)
file_paths = [
os.path.join(current_dir, "..", "..", "data/nouns/merged_nouns.json"),
]
# 执行向量化和保存(一步完成)
success = noun_vectorizer.vectorize_files_and_save(file_paths)
if success:
logging.info("✓ 索引创建和保存成功")
logging.info(f" 索引保存路径: {os.path.join(output_dir, 'professional_nouns_index')}")
else:
logging.error("✗ 索引创建失败")
if __name__ == "__main__":
# 配置日志输出到控制台
logging.basicConfig(
level=logging.INFO,
format='%(message)s'
)
main()