# import requests # class LLM: # def __init__(self, model_uid, server_url): # self.model_uid = model_uid # self.server_url = server_url.rstrip("/") # def generate(self, prompt, max_tokens=32000, temperature=0.2, **kwargs): # url = f"{self.server_url}/v1/completions" # headers = {"Content-Type": "application/json"} # data = { # "model": self.model_uid, # "prompt": prompt, # "max_tokens": max_tokens, # "temperature": temperature, # **kwargs, # } # try: # response = requests.post(url, headers=headers, json=data) # response.raise_for_status() # result = response.json() # return result["choices"][0]["text"] # except requests.exceptions.RequestException as e: # raise RuntimeError(f"请求失败: {e}") # except KeyError: # raise ValueError("响应格式错误,无法解析生成结果") # llm = LLM(model_uid="QwQ-32b", server_url="http://172.20.0.145:9995") class Embedding: def __init__(self, model_uid, server_url): self.model_uid = model_uid self.server_url = server_url.rstrip("/") def embed(self, text): url = f"{self.server_url}/v1/embeddings" headers = {"Content-Type": "application/json"} data = {"model": self.model_uid, "input": text} try: response = requests.post(url, headers=headers, json=data) response.raise_for_status() result = response.json() return result["data"][0]["embedding"] except requests.exceptions.RequestException as e: raise RuntimeError(f"请求失败: {e}") except KeyError: raise ValueError("响应格式错误,无法解析嵌入结果") embedding = Embedding(model_uid="bge-m3", server_url="http://10.1.16.39:9995") from langchain_openai import ChatOpenAI import requests from typing import Any, Dict, List, Optional from langchain_core.language_models import BaseLLM from langchain_core.callbacks import CallbackManagerForLLMRun from langchain_core.outputs import LLMResult, Generation from langchain_community.llms.yi import YiLLM from tenacity import retry, stop_after_attempt, wait_exponential class SiliconFlowLLM(BaseLLM): """自定义硅基流动大模型调用类""" api_url: str api_key: str model: str def _generate( self, prompts: List[str], stop: Optional[List[str]] = None, run_manager: Optional[CallbackManagerForLLMRun] = None, **kwargs: Any, ) -> LLMResult: from langchain_core.outputs import Generation, LLMResult generations = [] for prompt in prompts: try: headers = {"Authorization": f"Bearer {self.api_key}", "Content-Type": "application/json"} payload = { "model": self.model, "messages": [{"role": "user", "content": prompt}], } response = requests.post(self.api_url, json=payload, headers=headers) response.raise_for_status() text = response.json()["choices"][0]["message"]["content"] generations.append([Generation(text=text)]) except Exception as e: raise Exception(f"调用硅基流动API失败: {str(e)}") return LLMResult(generations=generations) @property def _llm_type(self) -> str: return "siliconflow" search_llm = SiliconFlowLLM( api_url="https://api.siliconflow.cn/v1/chat/completions", api_key="sk-bbeamiumkouptsrueilgufqqyuumelcsivxwjbdugqwsqhwj", model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", ) keyword_llm = SiliconFlowLLM( api_url="https://api.siliconflow.cn/v1/chat/completions", api_key="sk-bbeamiumkouptsrueilgufqqyuumelcsivxwjbdugqwsqhwj", model="deepseek-ai/DeepSeek-R1-Distill-Qwen-32B", ) rewrite_llm = SiliconFlowLLM( api_url="https://api.siliconflow.cn/v1/chat/completions", api_key="sk-bbeamiumkouptsrueilgufqqyuumelcsivxwjbdugqwsqhwj", model="Qwen/Qwen2.5-72B-Instruct", )