125 lines
4.1 KiB
Python
125 lines
4.1 KiB
Python
# import requests
|
|
|
|
|
|
# class LLM:
|
|
# def __init__(self, model_uid, server_url):
|
|
|
|
# self.model_uid = model_uid
|
|
# self.server_url = server_url.rstrip("/")
|
|
|
|
# def generate(self, prompt, max_tokens=32000, temperature=0.2, **kwargs):
|
|
|
|
# url = f"{self.server_url}/v1/completions"
|
|
# headers = {"Content-Type": "application/json"}
|
|
# data = {
|
|
# "model": self.model_uid,
|
|
# "prompt": prompt,
|
|
# "max_tokens": max_tokens,
|
|
# "temperature": temperature,
|
|
# **kwargs,
|
|
# }
|
|
|
|
# try:
|
|
# response = requests.post(url, headers=headers, json=data)
|
|
# response.raise_for_status()
|
|
# result = response.json()
|
|
# return result["choices"][0]["text"]
|
|
# except requests.exceptions.RequestException as e:
|
|
# raise RuntimeError(f"请求失败: {e}")
|
|
# except KeyError:
|
|
# raise ValueError("响应格式错误,无法解析生成结果")
|
|
|
|
|
|
# llm = LLM(model_uid="QwQ-32b", server_url="http://172.20.0.145:9995")
|
|
|
|
|
|
class Embedding:
|
|
def __init__(self, model_uid, server_url):
|
|
|
|
self.model_uid = model_uid
|
|
self.server_url = server_url.rstrip("/")
|
|
|
|
def embed(self, text):
|
|
|
|
url = f"{self.server_url}/v1/embeddings"
|
|
headers = {"Content-Type": "application/json"}
|
|
data = {"model": self.model_uid, "input": text}
|
|
|
|
try:
|
|
response = requests.post(url, headers=headers, json=data)
|
|
response.raise_for_status()
|
|
result = response.json()
|
|
return result["data"][0]["embedding"]
|
|
except requests.exceptions.RequestException as e:
|
|
raise RuntimeError(f"请求失败: {e}")
|
|
except KeyError:
|
|
raise ValueError("响应格式错误,无法解析嵌入结果")
|
|
|
|
|
|
embedding = Embedding(model_uid="bge-m3", server_url="http://10.1.16.39:9995")
|
|
|
|
from langchain_openai import ChatOpenAI
|
|
import requests
|
|
from typing import Any, Dict, List, Optional
|
|
from langchain_core.language_models import BaseLLM
|
|
from langchain_core.callbacks import CallbackManagerForLLMRun
|
|
from langchain_core.outputs import LLMResult, Generation
|
|
from langchain_community.llms.yi import YiLLM
|
|
from tenacity import retry, stop_after_attempt, wait_exponential
|
|
|
|
class SiliconFlowLLM(BaseLLM):
|
|
"""自定义硅基流动大模型调用类"""
|
|
|
|
api_url: str
|
|
api_key: str
|
|
model: str
|
|
|
|
def _generate(
|
|
self,
|
|
prompts: List[str],
|
|
stop: Optional[List[str]] = None,
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
**kwargs: Any,
|
|
) -> LLMResult:
|
|
from langchain_core.outputs import Generation, LLMResult
|
|
|
|
generations = []
|
|
for prompt in prompts:
|
|
try:
|
|
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
|
payload = {
|
|
"model": self.model,
|
|
"messages": [{"role": "user", "content": prompt}],
|
|
}
|
|
response = requests.post(self.api_url, json=payload, headers=headers)
|
|
response.raise_for_status()
|
|
text = response.json()["choices"][0]["message"]["content"]
|
|
generations.append([Generation(text=text)])
|
|
except Exception as e:
|
|
raise Exception(f"调用硅基流动API失败: {str(e)}")
|
|
|
|
return LLMResult(generations=generations)
|
|
|
|
@property
|
|
def _llm_type(self) -> str:
|
|
return "siliconflow"
|
|
|
|
|
|
search_llm = SiliconFlowLLM(
|
|
api_url="https://api.siliconflow.cn/v1/chat/completions",
|
|
api_key="sk-bbeamiumkouptsrueilgufqqyuumelcsivxwjbdugqwsqhwj",
|
|
model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
|
)
|
|
|
|
keyword_llm = SiliconFlowLLM(
|
|
api_url="https://api.siliconflow.cn/v1/chat/completions",
|
|
api_key="sk-bbeamiumkouptsrueilgufqqyuumelcsivxwjbdugqwsqhwj",
|
|
model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B",
|
|
)
|
|
|
|
rewrite_llm = SiliconFlowLLM(
|
|
api_url="https://api.siliconflow.cn/v1/chat/completions",
|
|
api_key="sk-bbeamiumkouptsrueilgufqqyuumelcsivxwjbdugqwsqhwj",
|
|
model="Qwen/Qwen2.5-72B-Instruct",
|
|
)
|