dev #5
+21
-1
@@ -3,6 +3,10 @@ from typing import Dict
|
|||||||
|
|
||||||
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||||
from llama_index.core.settings import Settings
|
from llama_index.core.settings import Settings
|
||||||
|
from llama_index.llms.xinference import Xinference
|
||||||
|
from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP
|
||||||
|
|
||||||
|
from app.xinference.base import XinferenceEmbedding
|
||||||
|
|
||||||
|
|
||||||
def init_settings():
|
def init_settings():
|
||||||
@@ -26,8 +30,9 @@ def init_settings():
|
|||||||
init_azure_openai()
|
init_azure_openai()
|
||||||
case "t-systems":
|
case "t-systems":
|
||||||
from .llmhub import init_llmhub
|
from .llmhub import init_llmhub
|
||||||
|
|
||||||
init_llmhub()
|
init_llmhub()
|
||||||
|
case "xinference":
|
||||||
|
init_xinference()
|
||||||
case _:
|
case _:
|
||||||
raise ValueError(f"Invalid model provider: {model_provider}")
|
raise ValueError(f"Invalid model provider: {model_provider}")
|
||||||
|
|
||||||
@@ -52,6 +57,21 @@ def init_ollama():
|
|||||||
# )
|
# )
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def init_xinference():
|
||||||
|
base_url = os.getenv("BASE_URL")
|
||||||
|
model = os.getenv("MODEL")
|
||||||
|
max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None
|
||||||
|
temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP))
|
||||||
|
|
||||||
|
Settings.llm = Xinference(model, base_url, temperature, max_tokens)
|
||||||
|
|
||||||
|
embedding_base_url = os.getenv("EMBEDDING_BASE_URL")
|
||||||
|
embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url
|
||||||
|
|
||||||
|
embed_model_name = os.getenv("EMBEDDING_MODEL")
|
||||||
|
dimensions = os.getenv("EMBEDDING_DIM")
|
||||||
|
dimensions = int(dimensions) if dimensions is not None else None
|
||||||
|
Settings.embed_model = XinferenceEmbedding(embed_model_name, embedding_base_url)
|
||||||
|
|
||||||
def init_openai():
|
def init_openai():
|
||||||
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||||
|
|||||||
@@ -23,6 +23,9 @@ llama-index-callbacks-arize-phoenix = "^0.1.4"
|
|||||||
llama-index-llms-dashscope = "^0.1.2"
|
llama-index-llms-dashscope = "^0.1.2"
|
||||||
llama-index-embeddings-dashscope = "^0.1.4"
|
llama-index-embeddings-dashscope = "^0.1.4"
|
||||||
llama-index-postprocessor-dashscope-rerank-custom = "0.1.0"
|
llama-index-postprocessor-dashscope-rerank-custom = "0.1.0"
|
||||||
|
#xinference = "^0.14.1"
|
||||||
|
xinference.client = "^0.14.1"
|
||||||
|
llama-index-llms-xinference = "^0.1.2"
|
||||||
qdrant-client="^1.10.1"
|
qdrant-client="^1.10.1"
|
||||||
llama-index-vector-stores-qdrant = "^0.2.14"
|
llama-index-vector-stores-qdrant = "^0.2.14"
|
||||||
chroma="^0.5.5"
|
chroma="^0.5.5"
|
||||||
|
|||||||
Reference in New Issue
Block a user