diff --git a/backend/.env.example b/backend/.env.example index 041d6b9..3eb0d8c 100644 --- a/backend/.env.example +++ b/backend/.env.example @@ -4,34 +4,48 @@ SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zj #SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2 SQLITE_DATABASE_URL=sqlite:///./source.db -DASHSCOPE_API_KEY=sk-02c8540e86d84b7ca0e6f4f51bac6e60 -# The provider for the AI models to use. -MODEL_PROVIDER=dashscope -# The name of LLM model to use. -MODEL=qwen-max +# The number of similar embeddings to return when retrieving documents. +TOP_K=10 +#-------------------------- +# 是否启用混合检索 +HYBRID_ENABLED = false +# 混合检索阈值 +HYBRID_ALPHA = 0.6 # 是否启用检索重排功能 -ENABLE_RERANK=true -# Name of the embedding model to use. -EMBEDDING_MODEL=text-embedding-v2 +RERANK_ENABLED=true -# Dimension of the embedding model to use. +#---------- rerank- Xinference ---------------- +RERANK_PROVIDER=xinference +RERANK_MODEL=bge-reranker-v2-m3 +RERANK_BASE_URL=http://10.1.16.39:9995 +RERANK_TOP_N=5 +RERANK_THRESHOLD=0.3 + +#---------- model - Xinference ---------------- +#MODEL_PROVIDER=xinference +#OPENAI_API_KEY=xinference +#BASE_URL=http://172.20.0.145:9995 +#MODEL=Qwen2-72B-Instruct-GPTQ-Int8 +## Temperature for sampling from the model. +#LLM_TEMPERATURE=0.1 + +#---------- model - dashscope ---------------- +MODEL_PROVIDER=dashscope +DASHSCOPE_API_KEY=sk-221d2d202e104618a56002ce2e7dc0d0 +MODEL=qwen-max + + + +#---------- embedding - Xinference ---------------- +EMBEDDING_PROVIDER=xinference +EMBEDDING_MODEL=bge-m3 +EMBEDDING_BASE_URL=http://10.1.16.39:9995 EMBEDDING_DIM=1024 + # The questions to help users get started (multi-line). CONVERSATION_STARTERS=本工程指什么?\n总算表有哪些费用?\n项目划分哪些内容构成?\n其他费用表有哪些内容? -# The OpenAI API key to use. -# OPENAI_API_KEY= - -# Temperature for sampling from the model. -# LLM_TEMPERATURE= - -# Maximum number of tokens to generate. -# LLM_MAX_TOKENS= - -# The number of similar embeddings to return when retrieving documents. -TOP_K=5 - # The time in milliseconds to wait for the stream to return a response. STREAM_TIMEOUT=60000 @@ -53,9 +67,8 @@ VECTOR_STORE_PATH=./storage_vector BM_RETRIEVER_PATH =./storage_bm - PHOENIX_API_KEY=123456 -PHOENIX_URL=http://localhost:6006/v1/traces +PHOENIX_URL=http://10.1.6.103:6006/v1/traces PHOENIX_PROJECT_NAME=ly_zjapp #OTEL_SERVICE_NAME=ly_zjapp #OTEL_RESOURCE_ATTRIBUTES=openinference.project.name=ly_zjapp @@ -82,4 +95,5 @@ SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weath PRJTOJSON_URL = 'http://10.1.6.60:8092' PROJECT_TITLE = "您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!" + CHAT_UPLOAD_FILECACHE = "./output/uploaded" \ No newline at end of file diff --git a/backend/.env.xinference b/backend/.env.xinference index b71d658..a78ab4b 100644 --- a/backend/.env.xinference +++ b/backend/.env.xinference @@ -14,27 +14,28 @@ HYBRID_ALPHA = 0.6 #-------------------------- # 是否启用检索重排功能 RERANK_ENABLED=true -# Rerank model + +#---------- rerank- Xinference ---------------- +RERANK_PROVIDER=xinference RERANK_MODEL=bge-reranker-v2-m3 RERANK_BASE_URL=http://10.1.16.39:9995 RERANK_TOP_N=5 RERANK_THRESHOLD=0.3 -#---------- Xinference ---------------- -# The provider for the AI models to use. -MODEL_PROVIDER=xinference -# The OpenAI API key to use. -OPENAI_API_KEY=xinference + +#---------- model - Xinference ---------------- +MODEL_PROVIDER=xinference # The provider for the AI models to use. +OPENAI_API_KEY=xinference # The OpenAI API key to use. BASE_URL=http://10.1.0.142:9995 MODEL=Qwen2-72B-Instruct-GPTQ-Int8 -# Temperature for sampling from the model. -LLM_TEMPERATURE=0.1 -# Maximum number of tokens to generate. -#LLM_MAX_TOKENS= -# Name of the embedding model to use. +LLM_TEMPERATURE=0.1 # Temperature for sampling from the model. +#LLM_MAX_TOKENS= # Maximum number of tokens to generate. + + +#---------- embedding - Xinference ---------------- +EMBEDDING_PROVIDER=xinference EMBEDDING_MODEL=bge-m3 EMBEDDING_BASE_URL=http://10.1.16.39:9995 -# Dimension of the embedding model to use. -EMBEDDING_DIM=1024 +EMBEDDING_DIM=1024 # Dimension of the embedding model to use. ##---------- OpenAI ---------------- ## The provider for the AI models to use. diff --git a/backend/app/api/routers/app.py b/backend/app/api/routers/app.py index fa61dac..bf9f19d 100644 --- a/backend/app/api/routers/app.py +++ b/backend/app/api/routers/app.py @@ -24,14 +24,11 @@ from app.api.routers.services.fileServices import PrjFileLoadService,ChatFileSer from app.api.routers.services.suggestion import NextQuestionSuggestion import time from llama_index.core.settings import Settings -from llama_index.core.callbacks import CallbackManager logger = logging.getLogger("uvicorn") v1_router = v = APIRouter() -Settings.llm.callback_manager = CallbackManager() - gEvent_handler = None diff --git a/backend/app/settings.py b/backend/app/settings.py index 63b222f..0a0f65e 100644 --- a/backend/app/settings.py +++ b/backend/app/settings.py @@ -1,6 +1,6 @@ import os from typing import Dict - +from abc import abstractmethod from llama_index.core.constants import DEFAULT_TEMPERATURE from llama_index.core.settings import Settings from llama_index.llms.xinference import Xinference @@ -9,229 +9,322 @@ from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP from app.xinference.base import XinferenceEmbedding, XinferenceRerank from app.engine.loaders import getProjectInfos from app.api.routers.request.base import ProjectInfo +from util.register import * +from llama_index.core.callbacks import CallbackManager +from modelProvide.customDashScope import CustomDashScope +ModelPlateCategory = '模型平台' def get_node_postprocessors(): rerank_enabled = os.getenv("RERANK_ENABLED").title() if rerank_enabled is None or rerank_enabled == 'False': return [] - - rerank_model = os.getenv("RERANK_MODEL") - rerank_url = os.getenv("RERANK_BASE_URL") - rerank_top_n = os.getenv("RERANK_TOP_N") - rerank_threshold = os.getenv("RERANK_THRESHOLD") + + Rerank_provider = os.getenv("RERANK_PROVIDER") + modelPaltCls:ModelPlatform = ClsRegister.get(ModelPlateCategory,Rerank_provider) postprocess = None - if rerank_model is not None: - postprocess = [XinferenceRerank(rerank_model, rerank_url, top_n=rerank_top_n, threshold=rerank_threshold)] + if modelPaltCls is not None: + modelPalt:ModelPlatform = modelPaltCls() + postprocess = modelPalt.rerank() + else: + raise ValueError(f"Invalid rerank provider: {Rerank_provider}") return postprocess def init_settings(): model_provider = os.getenv("MODEL_PROVIDER") - match model_provider: - case "openai": - init_openai() - case "dashscope": - init_dashscope() - case "groq": - init_groq() - case "ollama": - init_ollama() - case "anthropic": - init_anthropic() - case "gemini": - init_gemini() - case "mistral": - init_mistral() - case "azure-openai": - init_azure_openai() - case "t-systems": - from .llmhub import init_llmhub - init_llmhub() - case "xinference": - init_xinference() - case _: - raise ValueError(f"Invalid model provider: {model_provider}") + modelPaltCls:ModelPlatform = ClsRegister.get(ModelPlateCategory,model_provider) + if modelPaltCls is not None: + modelPalt:ModelPlatform = modelPaltCls() + Settings.llm = modelPalt.model() + else: + raise ValueError(f"Invalid model provider: {model_provider}") + + embedding_provider = os.getenv("EMBEDDING_PROVIDER") + modelPaltCls:ModelPlatform = ClsRegister.get(ModelPlateCategory,embedding_provider) + if modelPalt is not None: + modelPalt:ModelPlatform = modelPaltCls() + Settings.embed_model = modelPalt.embedding() + else: + raise ValueError(f"Invalid embedding provider: {embedding_provider}") + Settings.llm.callback_manager = CallbackManager() Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024")) Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20")) -def init_ollama(): - # from llama_index.embeddings.ollama import OllamaEmbedding - # from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama - # - # base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434" - # request_timeout = float( - # os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT) - # ) - # Settings.embed_model = OllamaEmbedding( - # base_url=base_url, - # model_name=os.getenv("EMBEDDING_MODEL"), - # ) - # Settings.llm = Ollama( - # base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout - # ) - pass +class ModelPlatform: + @abstractmethod + def model(self): + pass -def init_xinference(): - base_url = os.getenv("BASE_URL") - model = os.getenv("MODEL") - max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None - temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP)) + @abstractmethod + def embedding(self): + pass - Settings.llm = Xinference(model, base_url, temperature, max_tokens) + @abstractmethod + def rerank(self): + pass - embedding_base_url = os.getenv("EMBEDDING_BASE_URL") - embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url +@register(ModelPlateCategory,'ollama') +class OllamaPlatform(ModelPlatform): + def model(self): + #from llama_index.embeddings.ollama import OllamaEmbedding + #from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama + # + # base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434" + # request_timeout = float( + # os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT) + # ) + # Settings.llm = Ollama( + # base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout + # ) + pass - embed_model_name = os.getenv("EMBEDDING_MODEL") - dimensions = os.getenv("EMBEDDING_DIM") - dimensions = int(dimensions) if dimensions is not None else None - Settings.embed_model = XinferenceEmbedding(embed_model_name, embedding_base_url, dimensions=dimensions) + def embedding(self): + #from llama_index.embeddings.ollama import OllamaEmbedding + # base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434" + # Settings.embed_model = OllamaEmbedding( + # base_url=base_url, + # model_name=os.getenv("EMBEDDING_MODEL"), + # ) + pass -def init_openai(): - from llama_index.core.constants import DEFAULT_TEMPERATURE - from llama_index.embeddings.openai import OpenAIEmbedding - from llama_index.llms.openai import OpenAI + def rerank(self): + pass - max_tokens = os.getenv("LLM_MAX_TOKENS") - config = { - "model": os.getenv("MODEL"), - "temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)), - "max_tokens": int(max_tokens) if max_tokens is not None else None, - } - Settings.llm = OpenAI(**config) +@register(ModelPlateCategory,'xinference') +class XinferencePlatform(ModelPlatform): + def model(self): + base_url = os.getenv("BASE_URL") + model = os.getenv("MODEL") + max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None + temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP)) + return Xinference(model, base_url, temperature, max_tokens) + + def embedding(self): + base_url = os.getenv("BASE_URL") + embedding_base_url = os.getenv("EMBEDDING_BASE_URL") + embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url - dimensions = os.getenv("EMBEDDING_DIM") - config = { - "model": os.getenv("EMBEDDING_MODEL"), - "dimensions": int(dimensions) if dimensions is not None else None, - } - Settings.embed_model = OpenAIEmbedding(**config) + embed_model_name = os.getenv("EMBEDDING_MODEL") + dimensions = os.getenv("EMBEDDING_DIM") + dimensions = int(dimensions) if dimensions is not None else None + return XinferenceEmbedding(embed_model_name, embedding_base_url, dimensions=dimensions) + + def rerank(self): + rerank_model = os.getenv("RERANK_MODEL") + rerank_url = os.getenv("RERANK_BASE_URL") + rerank_top_n = os.getenv("RERANK_TOP_N") + rerank_threshold = os.getenv("RERANK_THRESHOLD") + postprocess = None + if rerank_model is not None: + postprocess = [XinferenceRerank(rerank_model, rerank_url, top_n=rerank_top_n, threshold=rerank_threshold)] + return postprocess -def init_dashscope(): - from llama_index.llms.dashscope import DashScope,DashScopeGenerationModels - from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeBatchTextEmbeddingModels,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels +@register(ModelPlateCategory,'openai') +class OpenAIPlatform(ModelPlatform): + def model(self): + from llama_index.core.constants import DEFAULT_TEMPERATURE + from llama_index.llms.openai import OpenAI - max_tokens = os.getenv("LLM_MAX_TOKENS") - config = { - "model": os.getenv("MODEL"), - "temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)), - "max_tokens": int(max_tokens) if max_tokens is not None else None, - } - Settings.llm = llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX) + max_tokens = os.getenv("LLM_MAX_TOKENS") + config = { + "model": os.getenv("MODEL"), + "temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)), + "max_tokens": int(max_tokens) if max_tokens is not None else None, + } + return OpenAI(**config) + + def embedding(self): + from llama_index.embeddings.openai import OpenAIEmbedding + dimensions = os.getenv("EMBEDDING_DIM") + config = { + "model": os.getenv("EMBEDDING_MODEL"), + "dimensions": int(dimensions) if dimensions is not None else None, + } + return OpenAIEmbedding(**config) + + def rerank(self): + pass - dimensions = os.getenv("EMBEDDING_DIM") - config = { - "model": os.getenv("EMBEDDING_MODEL"), - "dimensions": int(dimensions) if dimensions is not None else None, - } - Settings.embed_model = DashScopeEmbedding(model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2, - text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY) +@register(ModelPlateCategory,'dashscope') +class DashscopePlatform(ModelPlatform): + def model(self): + apikey = os.getenv('DASHSCOPE_API_KEY') + modelName = os.getenv('MODEL') + return CustomDashScope(model_name=modelName,api_key = apikey) -def init_azure_openai(): - # from llama_index.core.constants import DEFAULT_TEMPERATURE - # from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding - # from llama_index.llms.azure_openai import AzureOpenAI - # - # llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"] - # embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"] - # max_tokens = os.getenv("LLM_MAX_TOKENS") - # temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) - # dimensions = os.getenv("EMBEDDING_DIM") - # - # azure_config = { - # "api_key": os.environ["AZURE_OPENAI_KEY"], - # "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"], - # "api_version": os.getenv("AZURE_OPENAI_API_VERSION") - # or os.getenv("OPENAI_API_VERSION"), - # } - # - # Settings.llm = AzureOpenAI( - # model=os.getenv("MODEL"), - # max_tokens=int(max_tokens) if max_tokens is not None else None, - # temperature=float(temperature), - # deployment_name=llm_deployment, - # **azure_config, - # ) - # - # Settings.embed_model = AzureOpenAIEmbedding( - # model=os.getenv("EMBEDDING_MODEL"), - # dimensions=int(dimensions) if dimensions is not None else None, - # deployment_name=embedding_deployment, - # **azure_config, - # ) - pass + def embedding(self): + from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels + api_key = os.getenv('DASHSCOPE_API_KEY') + modelName = os.getenv('EMBEDDING_MODEL') + return DashScopeEmbedding(model_name=modelName, + text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY,api_key = api_key) -def init_fastembed(): - """ - Use Qdrant Fastembed as the local embedding provider. - """ - # from llama_index.embeddings.fastembed import FastEmbedEmbedding - # - # embed_model_map: Dict[str, str] = { - # # Small and multilingual - # "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", - # # Large and multilingual - # "paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", # noqa: E501 - # } - # - # # This will download the model automatically if it is not already downloaded - # Settings.embed_model = FastEmbedEmbedding( - # model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")] - # ) - pass + def rerank(self): + pass +@register(ModelPlateCategory,'azure-openai') +class AzureOpenaiPlatform(ModelPlatform): + def model(self): + # from llama_index.core.constants import DEFAULT_TEMPERATURE + # from llama_index.llms.azure_openai import AzureOpenAI + # + # llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"] + # embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"] + # max_tokens = os.getenv("LLM_MAX_TOKENS") + # temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) + # dimensions = os.getenv("EMBEDDING_DIM") + # + # azure_config = { + # "api_key": os.environ["AZURE_OPENAI_KEY"], + # "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"], + # "api_version": os.getenv("AZURE_OPENAI_API_VERSION") + # or os.getenv("OPENAI_API_VERSION"), + # } + # + # return AzureOpenAI( + # model=os.getenv("MODEL"), + # max_tokens=int(max_tokens) if max_tokens is not None else None, + # temperature=float(temperature), + # deployment_name=llm_deployment, + # **azure_config, + # ) + pass -def init_groq(): - # from llama_index.llms.groq import Groq - # - # model_map: Dict[str, str] = { - # "llama3-8b": "llama3-8b-8192", - # "llama3-70b": "llama3-70b-8192", - # "mixtral-8x7b": "mixtral-8x7b-32768", - # } - # - # Settings.llm = Groq(model=model_map[os.getenv("MODEL")]) - # # Groq does not provide embeddings, so we use FastEmbed instead - # init_fastembed() - pass + def embedding(self): + # from llama_index.core.constants import DEFAULT_TEMPERATURE + # from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding + # + # llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"] + # embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"] + # max_tokens = os.getenv("LLM_MAX_TOKENS") + # temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) + # dimensions = os.getenv("EMBEDDING_DIM") + # + # azure_config = { + # "api_key": os.environ["AZURE_OPENAI_KEY"], + # "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"], + # "api_version": os.getenv("AZURE_OPENAI_API_VERSION") + # or os.getenv("OPENAI_API_VERSION"), + # } + # return AzureOpenAIEmbedding( + # model=os.getenv("EMBEDDING_MODEL"), + # dimensions=int(dimensions) if dimensions is not None else None, + # deployment_name=embedding_deployment, + # **azure_config, + # ) + pass + def rerank(self): + pass -def init_anthropic(): - # from llama_index.llms.anthropic import Anthropic - # - # model_map: Dict[str, str] = { - # "claude-3-opus": "claude-3-opus-20240229", - # "claude-3-sonnet": "claude-3-sonnet-20240229", - # "claude-3-haiku": "claude-3-haiku-20240307", - # "claude-2.1": "claude-2.1", - # "claude-instant-1.2": "claude-instant-1.2", - # } - # - # Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")]) - # # Anthropic does not provide embeddings, so we use FastEmbed instead - # init_fastembed() - pass +@register(ModelPlateCategory,'fastembed') +class FastembedPlatform(ModelPlatform): + @abstractmethod + def model(self): + pass + @abstractmethod + def embedding(self): + # from llama_index.embeddings.fastembed import FastEmbedEmbedding + # + # embed_model_map: Dict[str, str] = { + # # Small and multilingual + # "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", + # # Large and multilingual + # "paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", # noqa: E501 + # } + # + # # This will download the model automatically if it is not already downloaded + # Settings.embed_model = FastEmbedEmbedding( + # model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")] + # ) + pass -def init_gemini(): - # from llama_index.embeddings.gemini import GeminiEmbedding - # from llama_index.llms.gemini import Gemini - # - # model_name = f"models/{os.getenv('MODEL')}" - # embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}" - # - # Settings.llm = Gemini(model=model_name) - # Settings.embed_model = GeminiEmbedding(model_name=embed_model_name) - pass + @abstractmethod + def rerank(self): + pass -def init_mistral(): - # from llama_index.embeddings.mistralai import MistralAIEmbedding - # from llama_index.llms.mistralai import MistralAI - # - # Settings.llm = MistralAI(model=os.getenv("MODEL")) - # Settings.embed_model = MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL")) - pass +@register(ModelPlateCategory,'groq') +class GroqPlatform(ModelPlatform): + @abstractmethod + def model(self): + # from llama_index.llms.groq import Groq + # + # model_map: Dict[str, str] = { + # "llama3-8b": "llama3-8b-8192", + # "llama3-70b": "llama3-70b-8192", + # "mixtral-8x7b": "mixtral-8x7b-32768", + # } + # + # Settings.llm = Groq(model=model_map[os.getenv("MODEL")]) + # # Groq does not provide embeddings, so we use FastEmbed instead + # init_fastembed() + pass + + @abstractmethod + def embedding(self): + pass + + @abstractmethod + def rerank(self): + pass + +@register(ModelPlateCategory,'anthropic') +class AnthropicPlatform(ModelPlatform): + def model(self): + # from llama_index.llms.anthropic import Anthropic + # + # model_map: Dict[str, str] = { + # "claude-3-opus": "claude-3-opus-20240229", + # "claude-3-sonnet": "claude-3-sonnet-20240229", + # "claude-3-haiku": "claude-3-haiku-20240307", + # "claude-2.1": "claude-2.1", + # "claude-instant-1.2": "claude-instant-1.2", + # } + # + # Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")]) + # # Anthropic does not provide embeddings, so we use FastEmbed instead + # init_fastembed() + pass + + def embedding(self): + pass + + def rerank(self): + pass + +@register(ModelPlateCategory,'gemini') +class GeminiPlatform(ModelPlatform): + def model(self): + # from llama_index.llms.gemini import Gemini + # model_name = f"models/{os.getenv('MODEL')}" + # return Gemini(model=model_name) + pass + + def embedding(self): + # from llama_index.embeddings.gemini import GeminiEmbedding + # embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}" + # return GeminiEmbedding(model_name=embed_model_name) + pass + + def rerank(self): + pass + +@register(ModelPlateCategory,'mistral') +class MistralPlatform(ModelPlatform): + def model(self): + # from llama_index.llms.mistralai import MistralAI + # return MistralAI(model=os.getenv("MODEL")) + pass + + def embedding(self): + # from llama_index.embeddings.mistralai import MistralAIEmbedding + # return MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL")) + pass + + def rerank(self): + pass def init_ProjectInfo(): prjObj = ProjectInfo() @@ -239,3 +332,6 @@ def init_ProjectInfo(): for prjInfo in prjInfos: prjObj.add(prjInfo['name'],prjInfo['flag']) + + + diff --git a/backend/modelProvide/customDashScope.py b/backend/modelProvide/customDashScope.py new file mode 100644 index 0000000..6eddd20 --- /dev/null +++ b/backend/modelProvide/customDashScope.py @@ -0,0 +1,58 @@ +from llama_index.llms.dashscope import DashScope +from llama_index.core.base.llms.types import LLMMetadata + +class DashScopeGenerationModels: + """DashScope Qwen serial models.""" + + QWEN_TURBO = "qwen-turbo" + QWEN_PLUS = "qwen-plus" + QWEN_MAX = "qwen-max" + QWEN_MAX_1201 = "qwen-max-1201" + QWEN_MAX_LONGCONTEXT = "qwen-max-longcontext" + QWEN2_MATH_72B_INSTRUCT = 'qwen2-math-72b-instruct' + +DASHSCOPE_MODEL_META = { + DashScopeGenerationModels.QWEN_TURBO: { + "context_window": 1024 * 8, + "num_output": 1024 * 8, + "is_chat_model": True, + }, + DashScopeGenerationModels.QWEN_PLUS: { + "context_window": 1024 * 32, + "num_output": 1024 * 32, + "is_chat_model": True, + }, + DashScopeGenerationModels.QWEN_MAX: { + "context_window": 1024 * 8, + "num_output": 1024 * 8, + "is_chat_model": True, + }, + DashScopeGenerationModels.QWEN_MAX_1201: { + "context_window": 1024 * 8, + "num_output": 1024 * 8, + "is_chat_model": True, + }, + DashScopeGenerationModels.QWEN_MAX_LONGCONTEXT: { + "context_window": 1024 * 30, + "num_output": 1024 * 30, + "is_chat_model": True, + }, + DashScopeGenerationModels.QWEN2_MATH_72B_INSTRUCT: { + "context_window": 1024 * 8, + "num_output": 1024 * 8, + "is_chat_model": True, + }, +} + + +class CustomDashScope(DashScope): + @property + def metadata(self) -> LLMMetadata: + DASHSCOPE_MODEL_META[self.model_name]["num_output"] = ( + self.max_tokens or DASHSCOPE_MODEL_META[self.model_name]["num_output"] + ) + return LLMMetadata( + model_name=self.model_name, **DASHSCOPE_MODEL_META[self.model_name] + ) + + diff --git a/backend/util/register.py b/backend/util/register.py new file mode 100644 index 0000000..c914ec1 --- /dev/null +++ b/backend/util/register.py @@ -0,0 +1,43 @@ +from typing import Dict, List + +class ClsRegister: + clsLst:Dict[str,Dict[str,str]] = {} + + @classmethod + def add(cls,catalog,name,obj) -> None: + if catalog in cls.clsLst: + registry = cls.clsLst[catalog] + registry[name] = obj + else: + registry:Dict[str,str] = {} + registry[name] = obj + cls.clsLst[catalog] = registry + + @classmethod + def get(cls,catalog,name,fuzzy:bool=False) -> None: + if catalog in cls.clsLst: + registry = cls.clsLst[catalog] + for key,value in registry.items(): + if fuzzy: + if key in name: + return value + else: + if key == name: + return value + return None + + @classmethod + def getClsList(cls,catalog) -> None: + res_Lst = [] + if catalog in cls.clsLst: + registry = cls.clsLst[catalog] + for key,value in registry.items(): + res_Lst.append(value) + return res_Lst + + +def register(catalog,name): + def decorator(className): + ClsRegister.add(catalog,name,className) + return className + return decorator \ No newline at end of file