import os from langchain_community.vectorstores import FAISS from langchain_huggingface import HuggingFaceEmbeddings from langchain.embeddings.base import Embeddings from openai import OpenAI import requests import httpx import logging from extraction_info import info_data_txt, info_faiss_archived class SiliconFlowEmbeddings(Embeddings): """SiliconFlow嵌入模型封装""" def __init__(self, api_key: str, model: str = "bge-m3"): self.api_key = api_key self.model = model self.url = "http://10.1.16.39:9995/v1/embeddings" self.headers = { "Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json" } def _embed(self, input): payload = { "model": self.model, "input": input, "encoding_format": "float" } response = requests.post(self.url, json=payload, headers=self.headers) response.raise_for_status() data = response.json() return [item["embedding"] for item in data["data"]] def embed_documents(self, texts): return self._embed(texts) def embed_query(self, text): return self._embed([text])[0] # embeddings = Embedding(url="http://10.1.16.39:9995/v1", api_key="xxx", model_name="bge-m3") embeddings = SiliconFlowEmbeddings(api_key="xxx") with open(info_data_txt, 'r', encoding='utf-8') as file: txt_list = [line.strip() for line in file] # embedding_path = "/data/Z_LLM_data/Embed_data/bge-m3" # embeddings = HuggingFaceEmbeddings(model_name=embedding_path) # faiss_archived = "./data/faiss_data/data" vectorstore_txt_faiss = FAISS.from_texts(txt_list, embeddings) vectorstore_txt_faiss.save_local(info_faiss_archived) retriever_txt_faiss1 = vectorstore_txt_faiss.as_retriever(search_kwargs={"k":3}) retriever_txt_faiss2 = vectorstore_txt_faiss.as_retriever( search_type="mmr", search_kwargs={"k": 3, # 检索结果 "fetch_k": 1, # 候选结果数量 "lambda_mult": 0.5} # 平衡指数,1为相关性;0为多样性 ) retriever_txt_faiss3 = vectorstore_txt_faiss.as_retriever( search_type="similarity_score_threshold", search_kwargs={"score_threshold": 0.5} ) def intersection_of_three_lists(input_str): list1 = retriever_txt_faiss1.invoke(input_str) list2 = retriever_txt_faiss2.invoke(input_str) list3 = retriever_txt_faiss3.invoke(input_str) def _intersection_of_three_lists(retrieval_results): return [doc.page_content for doc in retrieval_results] list11 = _intersection_of_three_lists(list1) list22 = _intersection_of_three_lists(list2) list33 = _intersection_of_three_lists(list3) return list(set(list11) & set(list22) & set(list33))