112 lines
3.5 KiB
Python
112 lines
3.5 KiB
Python
import requests
|
|
from typing import Any, Dict, List, Optional
|
|
from langchain_core.language_models import BaseLLM
|
|
from langchain_core.callbacks import CallbackManagerForLLMRun
|
|
from langchain_core.outputs import LLMResult, Generation
|
|
from openai import OpenAI
|
|
import httpx
|
|
import logging
|
|
|
|
|
|
# class OpenAiLLM:
|
|
|
|
# def __init__(self, url, api_key, model_name):
|
|
# self._api_key = api_key
|
|
# self._url = url
|
|
# self._model = model_name
|
|
|
|
# def generate(self, prompt):
|
|
|
|
# client = OpenAI(api_key=self._api_key, base_url=self._url)
|
|
|
|
# try:
|
|
# # 创建 Completion 请求
|
|
# completion = client.chat.completions.create(
|
|
# model=self._model,
|
|
# messages=[
|
|
# {"role": "system", "content": "You are a helpful assistant"},
|
|
# {"role": "user", "content": prompt}
|
|
# ],
|
|
# timeout=httpx.Timeout(300.0),
|
|
# temperature=0.7,
|
|
# )
|
|
# return completion.choices[0].message.content
|
|
# except Exception as e:
|
|
# logging.error(f"LLM调用出错: {e}")
|
|
# return f"模型调用失败: {str(e)}"
|
|
|
|
|
|
# llm = OpenAiLLM(
|
|
# url="http://172.20.0.145:9995/v1",
|
|
# api_key="xxx",
|
|
# model_name="deepseek-r1-distill-qwen2.5-32b",
|
|
# )
|
|
|
|
|
|
class Embedding:
|
|
def __init__(self, url, api_key, model_name):
|
|
self._api_key = api_key
|
|
self._url = url
|
|
self._model = model_name
|
|
|
|
def embed(self, text):
|
|
# 使用OpenAI客户端
|
|
client = OpenAI(api_key=self._api_key, base_url=self._url)
|
|
|
|
try:
|
|
# 调用embeddings API
|
|
response = client.embeddings.create(model=self._model, input=text, timeout=httpx.Timeout(60.0))
|
|
# 返回嵌入向量
|
|
return response.data[0].embedding
|
|
except Exception as e:
|
|
logging.error(f"嵌入模型调用出错: {e}")
|
|
raise RuntimeError(f"嵌入请求失败: {str(e)}")
|
|
|
|
|
|
embedding = Embedding(url="http://172.20.0.145:9995/v1", api_key="xxx", model_name="bge-m3")
|
|
|
|
|
|
class SiliconFlowLLM(BaseLLM):
|
|
"""自定义硅基流动大模型调用类"""
|
|
|
|
api_url: str
|
|
api_key: str
|
|
model: str
|
|
|
|
def _generate(
|
|
self,
|
|
prompts: List[str],
|
|
stop: Optional[List[str]] = None,
|
|
run_manager: Optional[CallbackManagerForLLMRun] = None,
|
|
**kwargs: Any,
|
|
) -> LLMResult:
|
|
from langchain_core.outputs import Generation, LLMResult
|
|
|
|
generations = []
|
|
for prompt in prompts:
|
|
try:
|
|
headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"}
|
|
payload = {
|
|
"model": self.model,
|
|
"messages": [{"role": "user", "content": prompt}],
|
|
}
|
|
response = requests.post(self.api_url, json=payload, headers=headers)
|
|
response.raise_for_status()
|
|
text = response.json()["choices"][0]["message"]["content"]
|
|
generations.append([Generation(text=text)])
|
|
except Exception as e:
|
|
raise Exception(f"调用硅基流动API失败: {str(e)}")
|
|
|
|
return LLMResult(generations=generations)
|
|
|
|
@property
|
|
def _llm_type(self) -> str:
|
|
return "siliconflow"
|
|
|
|
|
|
llm = SiliconFlowLLM(
|
|
api_url="https://api.siliconflow.cn/v1/chat/completions",
|
|
api_key="sk-bbeamiumkouptsrueilgufqqyuumelcsivxwjbdugqwsqhwj",
|
|
model="Qwen/Qwen2.5-72B-Instruct",
|
|
)
|