""" =================================== @Auther:WenZ @Company: BooWay @project:dify_lab =================================== """ import pandas as pd def read_title_column(csv_file: str) -> list: """ 读取CSV文件中的'title'列,并返回一个列表,使用UTF-8编码。 :param csv_file: CSV文件的路径 :return: 包含title列数据的列表 """ try: df = pd.read_csv(csv_file, encoding='utf-8') # 使用 UTF-8 编码读取 CSV if 'title' in df.columns: return df['title'].dropna().tolist() # 去除缺失值并转换为列表 else: raise ValueError("CSV文件中未找到'title'列") except Exception as e: print(f"读取文件时发生错误: {e}") return [] titles = read_title_column("info_data.csv") from langchain_huggingface import HuggingFaceEmbeddings embedding_path = "D:/迅雷下载/模型权重/bge-m3" embeddings = HuggingFaceEmbeddings(model_name=embedding_path) from langchain_community.vectorstores import Chroma chroma_archived = "chroma_titles" vectorstore_txt_chroma = Chroma.from_texts(titles, embeddings, persist_directory=chroma_archived)