上传文件至 /
4.3更新
This commit is contained in:
@@ -120,6 +120,17 @@ def get_keywords_v3(input_str):
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_keywords_v4(input_str):
|
||||
import re
|
||||
matches = re.findall(r'【(.*?)】', input_str)
|
||||
|
||||
# 获取第一个和第二个【】里的内容(索引为0和1)
|
||||
first = matches[0] if len(matches) >= 1 else None
|
||||
second = matches[1] if len(matches) >= 2 else None
|
||||
|
||||
return first, second
|
||||
|
||||
|
||||
|
||||
def normalize_text(text: str, synonym_dict: dict) -> str:
|
||||
import re
|
||||
@@ -141,3 +152,5 @@ def normalize_text(text: str, synonym_dict: dict) -> str:
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
+40
-3
@@ -1,9 +1,44 @@
|
||||
import os
|
||||
from langchain_community.vectorstores import FAISS
|
||||
from langchain_huggingface import HuggingFaceEmbeddings
|
||||
# from langchain_huggingface import HuggingFaceEmbeddings
|
||||
|
||||
embedding_path = "/data/Z_LLM_data/Embed_data/bge-m3"
|
||||
embeddings = HuggingFaceEmbeddings(model_name=embedding_path)
|
||||
# embedding_path = "/data/Z/Z_llm_dm/vector_data/bge-m3"
|
||||
# embeddings = HuggingFaceEmbeddings(model_name=embedding_path)
|
||||
|
||||
|
||||
from typing import List
|
||||
import requests
|
||||
from langchain.embeddings.base import Embeddings
|
||||
|
||||
|
||||
class SiliconFlowEmbeddings(Embeddings):
|
||||
def __init__(self, api_key: str, model: str = "bge-m3"):
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.url = "http://10.1.16.39:9995/v1/embeddings"
|
||||
self.headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json"
|
||||
}
|
||||
|
||||
def _embed(self, input: List[str]) -> List[List[float]]:
|
||||
payload = {
|
||||
"model": self.model,
|
||||
"input": input,
|
||||
"encoding_format": "float"
|
||||
}
|
||||
response = requests.post(self.url, json=payload, headers=self.headers)
|
||||
response.raise_for_status()
|
||||
data = response.json()
|
||||
return [item["embedding"] for item in data["data"]]
|
||||
|
||||
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||
return self._embed(texts)
|
||||
|
||||
def embed_query(self, text: str) -> List[float]:
|
||||
return self._embed([text])[0]
|
||||
|
||||
embeddings = SiliconFlowEmbeddings(api_key="sk-ftnofbucchwnscojohyxwmfzgaykdxihafnlphohsinftkbr")
|
||||
|
||||
def Mixed_retrieval(input_path):
|
||||
file_name = os.path.splitext(os.path.basename(input_path))[0]
|
||||
@@ -33,6 +68,8 @@ def Mixed_retrieval(input_path):
|
||||
|
||||
return retriever_txt_faiss1, retriever_txt_faiss2, retriever_txt_faiss3
|
||||
|
||||
|
||||
|
||||
def interface_search(input_str, retriever_txt_faiss1, retriever_txt_faiss2, retriever_txt_faiss3):
|
||||
index_keyword1 = []
|
||||
for i in retriever_txt_faiss1.invoke(input_str):
|
||||
|
||||
Reference in New Issue
Block a user