上传文件至 /
4.3更新
This commit is contained in:
@@ -120,6 +120,17 @@ def get_keywords_v3(input_str):
|
|||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_keywords_v4(input_str):
|
||||||
|
import re
|
||||||
|
matches = re.findall(r'【(.*?)】', input_str)
|
||||||
|
|
||||||
|
# 获取第一个和第二个【】里的内容(索引为0和1)
|
||||||
|
first = matches[0] if len(matches) >= 1 else None
|
||||||
|
second = matches[1] if len(matches) >= 2 else None
|
||||||
|
|
||||||
|
return first, second
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def normalize_text(text: str, synonym_dict: dict) -> str:
|
def normalize_text(text: str, synonym_dict: dict) -> str:
|
||||||
import re
|
import re
|
||||||
@@ -141,3 +152,5 @@ def normalize_text(text: str, synonym_dict: dict) -> str:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
+40
-3
@@ -1,9 +1,44 @@
|
|||||||
import os
|
import os
|
||||||
from langchain_community.vectorstores import FAISS
|
from langchain_community.vectorstores import FAISS
|
||||||
from langchain_huggingface import HuggingFaceEmbeddings
|
# from langchain_huggingface import HuggingFaceEmbeddings
|
||||||
|
|
||||||
embedding_path = "/data/Z_LLM_data/Embed_data/bge-m3"
|
# embedding_path = "/data/Z/Z_llm_dm/vector_data/bge-m3"
|
||||||
embeddings = HuggingFaceEmbeddings(model_name=embedding_path)
|
# embeddings = HuggingFaceEmbeddings(model_name=embedding_path)
|
||||||
|
|
||||||
|
|
||||||
|
from typing import List
|
||||||
|
import requests
|
||||||
|
from langchain.embeddings.base import Embeddings
|
||||||
|
|
||||||
|
|
||||||
|
class SiliconFlowEmbeddings(Embeddings):
|
||||||
|
def __init__(self, api_key: str, model: str = "bge-m3"):
|
||||||
|
self.api_key = api_key
|
||||||
|
self.model = model
|
||||||
|
self.url = "http://10.1.16.39:9995/v1/embeddings"
|
||||||
|
self.headers = {
|
||||||
|
"Authorization": f"Bearer {self.api_key}",
|
||||||
|
"Content-Type": "application/json"
|
||||||
|
}
|
||||||
|
|
||||||
|
def _embed(self, input: List[str]) -> List[List[float]]:
|
||||||
|
payload = {
|
||||||
|
"model": self.model,
|
||||||
|
"input": input,
|
||||||
|
"encoding_format": "float"
|
||||||
|
}
|
||||||
|
response = requests.post(self.url, json=payload, headers=self.headers)
|
||||||
|
response.raise_for_status()
|
||||||
|
data = response.json()
|
||||||
|
return [item["embedding"] for item in data["data"]]
|
||||||
|
|
||||||
|
def embed_documents(self, texts: List[str]) -> List[List[float]]:
|
||||||
|
return self._embed(texts)
|
||||||
|
|
||||||
|
def embed_query(self, text: str) -> List[float]:
|
||||||
|
return self._embed([text])[0]
|
||||||
|
|
||||||
|
embeddings = SiliconFlowEmbeddings(api_key="sk-ftnofbucchwnscojohyxwmfzgaykdxihafnlphohsinftkbr")
|
||||||
|
|
||||||
def Mixed_retrieval(input_path):
|
def Mixed_retrieval(input_path):
|
||||||
file_name = os.path.splitext(os.path.basename(input_path))[0]
|
file_name = os.path.splitext(os.path.basename(input_path))[0]
|
||||||
@@ -33,6 +68,8 @@ def Mixed_retrieval(input_path):
|
|||||||
|
|
||||||
return retriever_txt_faiss1, retriever_txt_faiss2, retriever_txt_faiss3
|
return retriever_txt_faiss1, retriever_txt_faiss2, retriever_txt_faiss3
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def interface_search(input_str, retriever_txt_faiss1, retriever_txt_faiss2, retriever_txt_faiss3):
|
def interface_search(input_str, retriever_txt_faiss1, retriever_txt_faiss2, retriever_txt_faiss3):
|
||||||
index_keyword1 = []
|
index_keyword1 = []
|
||||||
for i in retriever_txt_faiss1.invoke(input_str):
|
for i in retriever_txt_faiss1.invoke(input_str):
|
||||||
|
|||||||
Reference in New Issue
Block a user