import requests from typing import Any, Dict, List, Optional from langchain_core.language_models import BaseLLM from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.outputs import LLMResult, Generation from openai import OpenAI import httpx import logging # class OpenAiLLM: # def __init__(self, url, api_key, model_name): # self._api_key = api_key # self._url = url # self._model = model_name # def generate(self, prompt): # client = OpenAI(api_key=self._api_key, base_url=self._url) # try: # # 创建 Completion 请求 # completion = client.chat.completions.create( # model=self._model, # messages=[ # {"role": "system", "content": "You are a helpful assistant"}, # {"role": "user", "content": prompt} # ], # timeout=httpx.Timeout(300.0), # temperature=0.7, # ) # return completion.choices[0].message.content # except Exception as e: # logging.error(f"LLM调用出错: {e}") # return f"模型调用失败: {str(e)}" # llm = OpenAiLLM( # url="http://172.20.0.145:9995/v1", # api_key="xxx", # model_name="deepseek-r1-distill-qwen2.5-32b", # ) class Embedding: def __init__(self, url, api_key, model_name): self._api_key = api_key self._url = url self._model = model_name def embed(self, text): # 使用OpenAI客户端 client = OpenAI(api_key=self._api_key, base_url=self._url) try: # 调用embeddings API response = client.embeddings.create(model=self._model, input=text, timeout=httpx.Timeout(60.0)) # 返回嵌入向量 return response.data[0].embedding except Exception as e: logging.error(f"嵌入模型调用出错: {e}") raise RuntimeError(f"嵌入请求失败: {str(e)}") embedding = Embedding(url="http://10.1.16.39:9995/v1", api_key="xxx", model_name="bge-m3") class SiliconFlowLLM(BaseLLM): """自定义硅基流动大模型调用类""" api_url: str api_key: str model: str def _generate( self, prompts: List[str], stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> LLMResult: from langchain_core.outputs import Generation, LLMResult generations = [] for prompt in prompts: try: headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"} payload = { "model": self.model, "messages": [{"role": "user", "content": prompt}], } response = requests.post(self.api_url, json=payload, headers=headers) response.raise_for_status() text = response.json()["choices"][0]["message"]["content"] generations.append([Generation(text=text)]) except Exception as e: raise Exception(f"调用硅基流动API失败: {str(e)}") return LLMResult(generations=generations) @property def _llm_type(self) -> str: return "siliconflow" llm = SiliconFlowLLM( api_url="https://api.siliconflow.cn/v1/chat/completions", api_key="sk-bbeamiumkouptsrueilgufqqyuumelcsivxwjbdugqwsqhwj", model="Qwen/Qwen2.5-72B-Instruct", )