Compare commits
1 Commits
d3df62f454
..
dev-db
| Author | SHA1 | Date | |
|---|---|---|---|
| 7e58a1a223 |
@@ -1,80 +0,0 @@
|
||||
# The Llama Cloud API key.
|
||||
# LLAMA_CLOUD_API_KEY=
|
||||
SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
|
||||
#SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
|
||||
|
||||
DASHSCOPE_API_KEY=sk-02c8540e86d84b7ca0e6f4f51bac6e60
|
||||
# The provider for the AI models to use.
|
||||
MODEL_PROVIDER=dashscope
|
||||
# The name of LLM model to use.
|
||||
MODEL=qwen-max
|
||||
# 是否启用检索重排功能
|
||||
ENABLE_RERANK=true
|
||||
# Name of the embedding model to use.
|
||||
EMBEDDING_MODEL=text-embedding-v2
|
||||
|
||||
# Dimension of the embedding model to use.
|
||||
EMBEDDING_DIM=1024
|
||||
|
||||
# The questions to help users get started (multi-line).
|
||||
CONVERSATION_STARTERS=本工程指什么?\n总算表有哪些费用?\n项目划分哪些内容构成?\n其他费用表有哪些内容?
|
||||
|
||||
# The OpenAI API key to use.
|
||||
# OPENAI_API_KEY=
|
||||
|
||||
# Temperature for sampling from the model.
|
||||
# LLM_TEMPERATURE=
|
||||
|
||||
# Maximum number of tokens to generate.
|
||||
# LLM_MAX_TOKENS=
|
||||
|
||||
# The number of similar embeddings to return when retrieving documents.
|
||||
TOP_K=5
|
||||
|
||||
# The time in milliseconds to wait for the stream to return a response.
|
||||
STREAM_TIMEOUT=60000
|
||||
|
||||
# 向量存储数据库类型,目前可选:chroma、qdrant
|
||||
VECTOR_STORE_TYPE=chroma
|
||||
# The name of the collection in your vector database
|
||||
VECTOR_STORE_COLLECTION=default
|
||||
|
||||
# The API endpoint for your vector database
|
||||
# VECTOR_STORE_HOST=
|
||||
|
||||
# The port for your vector database
|
||||
# VECTOR_STORE_PORT=
|
||||
|
||||
# The local path to the vector database.
|
||||
# Specify this if you are using a local vector database.
|
||||
# Otherwise, use VECTOR_STORE__HOST and VECTOR_STORE__PORT config above
|
||||
VECTOR_STORE_PATH=./storage_vector
|
||||
|
||||
|
||||
|
||||
PHOENIX_API_KEY=123456
|
||||
PHOENIX_URL=http://localhost:6006/v1/traces
|
||||
PHOENIX_PROJECT_NAME=ly_zjapp
|
||||
#OTEL_SERVICE_NAME=ly_zjapp
|
||||
#OTEL_RESOURCE_ATTRIBUTES=openinference.project.name=ly_zjapp
|
||||
# The address to start the backend app.
|
||||
APP_HOST=0.0.0.0
|
||||
|
||||
# The port to start the backend app.
|
||||
APP_PORT=8000
|
||||
|
||||
FILESERVER_URL_PREFIX=/api/files
|
||||
|
||||
# E2B_API_KEY key is required to run code interpreter tool. Get it here: https://e2b.dev/docs/getting-started/api-key
|
||||
# E2B_API_KEY=
|
||||
|
||||
# The system prompt for the AI model.
|
||||
SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weather forecast for a given location.
|
||||
-You are a Python interpreter that can run any python code in a secure environment.
|
||||
- The python code runs in a Jupyter notebook. Every time you call the 'interpreter' tool, the python code is executed in a separate cell.
|
||||
- You are given tasks to complete and you run python code to solve them.
|
||||
- It's okay to make multiple calls to interpreter tool. If you get an error or the result is not what you expected, you can call the tool again. Don't give up too soon!
|
||||
- Plot visualizations using matplotlib or any other visualization library directly in the notebook.
|
||||
- You can install any pip package (if it exists) by running a cell with pip install.
|
||||
"
|
||||
|
||||
@@ -1,103 +0,0 @@
|
||||
# The Llama Cloud API key.
|
||||
# LLAMA_CLOUD_API_KEY=
|
||||
SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
|
||||
#SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
|
||||
|
||||
#---------- Xinference ----------------
|
||||
# The provider for the AI models to use.
|
||||
MODEL_PROVIDER=xinference
|
||||
# The OpenAI API key to use.
|
||||
OPENAI_API_KEY=xinference
|
||||
BASE_URL=http://10.1.0.142:9995
|
||||
MODEL=Qwen2-72B-Instruct-GPTQ-Int8
|
||||
# Temperature for sampling from the model.
|
||||
LLM_TEMPERATURE=0.1
|
||||
# Maximum number of tokens to generate.
|
||||
#LLM_MAX_TOKENS=
|
||||
# Name of the embedding model to use.
|
||||
EMBEDDING_MODEL=bge-m3
|
||||
EMBEDDING_BASE_URL=http://10.1.16.39:9995
|
||||
# Dimension of the embedding model to use.
|
||||
EMBEDDING_DIM=1024
|
||||
##---------- OpenAI ----------------
|
||||
## The provider for the AI models to use.
|
||||
#MODEL_PROVIDER=openai
|
||||
## The OpenAI API key to use.
|
||||
#OPENAI_API_KEY=xinference
|
||||
#BASE_URL=http://10.1.0.142:9995/v1
|
||||
#MODEL=Qwen2-72B-Instruct-GPTQ-Int4
|
||||
## Temperature for sampling from the model.
|
||||
#LLM_TEMPERATURE=0.1
|
||||
## Maximum number of tokens to generate.
|
||||
##LLM_MAX_TOKENS=
|
||||
## Name of the embedding model to use.
|
||||
#EMBEDDING_MODEL=text-embedding-v2
|
||||
## Dimension of the embedding model to use.
|
||||
#EMBEDDING_DIM=1024
|
||||
#---------- DashScope ----------------
|
||||
#DASHSCOPE_API_KEY=sk-02c8540e86d84b7ca0e6f4f51bac6e60
|
||||
## The provider for the AI models to use.
|
||||
#MODEL_PROVIDER=dashscope
|
||||
## The name of LLM model to use.
|
||||
#MODEL=qwen-max
|
||||
## Name of the embedding model to use.
|
||||
#EMBEDDING_MODEL=text-embedding-v2
|
||||
|
||||
#--------------------------
|
||||
# 是否启用检索重排功能
|
||||
ENABLE_RERANK=true
|
||||
|
||||
|
||||
# The questions to help users get started (multi-line).
|
||||
CONVERSATION_STARTERS=本工程指什么?\n总算表有哪些费用?\n项目划分哪些内容构成?\n其他费用表有哪些内容?
|
||||
|
||||
# The number of similar embeddings to return when retrieving documents.
|
||||
TOP_K=5
|
||||
|
||||
# The time in milliseconds to wait for the stream to return a response.
|
||||
STREAM_TIMEOUT=60000
|
||||
|
||||
# 向量存储数据库类型,目前可选:chroma、qdrant
|
||||
VECTOR_STORE_TYPE=chroma
|
||||
# The name of the collection in your vector database
|
||||
VECTOR_STORE_COLLECTION=default
|
||||
|
||||
# The API endpoint for your vector database
|
||||
# VECTOR_STORE_HOST=
|
||||
|
||||
# The port for your vector database
|
||||
# VECTOR_STORE_PORT=
|
||||
|
||||
# The local path to the vector database.
|
||||
# Specify this if you are using a local vector database.
|
||||
# Otherwise, use VECTOR_STORE__HOST and VECTOR_STORE__PORT config above
|
||||
VECTOR_STORE_PATH=./storage_vector
|
||||
|
||||
|
||||
|
||||
PHOENIX_API_KEY=123456
|
||||
PHOENIX_URL=http://localhost:6006/v1/traces
|
||||
PHOENIX_PROJECT_NAME=ly_zjapp
|
||||
#OTEL_SERVICE_NAME=ly_zjapp
|
||||
#OTEL_RESOURCE_ATTRIBUTES=openinference.project.name=ly_zjapp
|
||||
# The address to start the backend app.
|
||||
APP_HOST=0.0.0.0
|
||||
|
||||
# The port to start the backend app.
|
||||
APP_PORT=8000
|
||||
|
||||
FILESERVER_URL_PREFIX=/api/files
|
||||
|
||||
# E2B_API_KEY key is required to run code interpreter tool. Get it here: https://e2b.dev/docs/getting-started/api-key
|
||||
# E2B_API_KEY=
|
||||
|
||||
# The system prompt for the AI model.
|
||||
SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weather forecast for a given location.
|
||||
-You are a Python interpreter that can run any python code in a secure environment.
|
||||
- The python code runs in a Jupyter notebook. Every time you call the 'interpreter' tool, the python code is executed in a separate cell.
|
||||
- You are given tasks to complete and you run python code to solve them.
|
||||
- It's okay to make multiple calls to interpreter tool. If you get an error or the result is not what you expected, you can call the tool again. Don't give up too soon!
|
||||
- Plot visualizations using matplotlib or any other visualization library directly in the notebook.
|
||||
- You can install any pip package (if it exists) by running a cell with pip install.
|
||||
"
|
||||
|
||||
@@ -2,6 +2,3 @@ __pycache__
|
||||
storage
|
||||
.env
|
||||
output
|
||||
/storage_vector/
|
||||
/.idea/
|
||||
/.python-version
|
||||
|
||||
@@ -124,7 +124,7 @@ async def chat_config() -> ChatConfig:
|
||||
starter_questions = None
|
||||
conversation_starters = os.getenv("CONVERSATION_STARTERS")
|
||||
if conversation_starters and conversation_starters.strip():
|
||||
starter_questions = conversation_starters.strip().split("\\n")
|
||||
starter_questions = conversation_starters.strip().split("\n")
|
||||
return ChatConfig(starter_questions=starter_questions)
|
||||
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Literal, Optional, Set
|
||||
|
||||
from llama_index.core.llms import ChatMessage, MessageRole
|
||||
from llama_index.core.schema import NodeWithScore
|
||||
from pydantic import BaseModel, Field, validator, field_validator
|
||||
from pydantic import BaseModel, Field, validator
|
||||
from pydantic.alias_generators import to_camel
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
@@ -89,7 +89,7 @@ class ChatData(BaseModel):
|
||||
}
|
||||
}
|
||||
|
||||
@field_validator("messages")
|
||||
@validator("messages")
|
||||
def messages_must_not_be_empty(cls, v):
|
||||
if len(v) == 0:
|
||||
raise ValueError("Messages must not be empty")
|
||||
@@ -173,8 +173,7 @@ class SourceNodes(BaseModel):
|
||||
def from_source_node(cls, source_node: NodeWithScore):
|
||||
metadata = source_node.node.metadata
|
||||
url = cls.get_url_from_metadata(metadata)
|
||||
#text = 'filename' in metadata and metadata['filename'] or source_node.node.node_id
|
||||
text = source_node.node.text
|
||||
text = 'filename' in metadata and metadata['filename'] or source_node.node.node_id
|
||||
return cls(
|
||||
id=source_node.node.node_id,
|
||||
metadata=metadata,
|
||||
|
||||
@@ -87,7 +87,9 @@ class PrivateFileService:
|
||||
nodes = pipeline.run(documents=documents)
|
||||
|
||||
# Add the nodes to the index and persist it
|
||||
current_index = get_index()
|
||||
indexs = get_index()
|
||||
if len(indexs) > 0:
|
||||
current_index = list(indexs.values())[0]
|
||||
|
||||
# Insert the documents into the index
|
||||
if isinstance(current_index, LlamaCloudIndex):
|
||||
|
||||
@@ -6,10 +6,10 @@ from llama_index.core.settings import Settings
|
||||
from pydantic import BaseModel
|
||||
|
||||
NEXT_QUESTIONS_SUGGESTION_PROMPT = PromptTemplate(
|
||||
"你是一个乐于助人的助手!你的任务是对用户可能会问的下一个问题给出建议。 "
|
||||
"\n这是对话历史记录"
|
||||
"You're a helpful assistant! Your task is to suggest the next question that user might ask. "
|
||||
"\nHere is the conversation history"
|
||||
"\n---------------------\n{conversation}\n---------------------"
|
||||
"考虑到对话历史记录,仅限于现在知识库已有内容, 请给我 $number_of_questions 个你接下来可能会问题的问题!"
|
||||
"Given the conversation history, please give me $number_of_questions questions that you might ask next!"
|
||||
)
|
||||
N_QUESTION_TO_GENERATE = 3
|
||||
|
||||
|
||||
@@ -43,7 +43,9 @@ def get_chat_engine(filters=None, params=None):
|
||||
description="来源于一个由博微公司电力造价软件编制的造价工程文件。该文件以多张表格的形式存储存储了整个工程的全部数据内容。适用于以详细的自然语言查询表格数据方式查询造价工程各项具体属性、费用的数值。请先使用“zj_query_tool”无法解决才使用本工具")
|
||||
|
||||
# Add query tool if index exists
|
||||
index = get_index()
|
||||
indexs = get_index()
|
||||
if len(indexs) > 0:
|
||||
index = list(indexs.values())[0]
|
||||
if index is not None:
|
||||
summary_index = SummaryIndex(index.vector_store.get_nodes(node_ids=None))
|
||||
summary_query_engine = summary_index.as_query_engine()
|
||||
|
||||
@@ -5,7 +5,7 @@ load_dotenv()
|
||||
import logging
|
||||
import os
|
||||
|
||||
from app.engine.loaders import get_documents
|
||||
from app.engine.loaders import get_document_Types, get_documents
|
||||
from app.engine.vectordb import get_vector_store
|
||||
from app.settings import init_settings
|
||||
from llama_index.core.ingestion import IngestionPipeline
|
||||
@@ -19,17 +19,16 @@ logger = logging.getLogger()
|
||||
|
||||
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
|
||||
|
||||
|
||||
def get_doc_store():
|
||||
def get_doc_store(docType:str):
|
||||
|
||||
# If the storage directory is there, load the document store from it.
|
||||
# If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
|
||||
if os.path.exists(STORAGE_DIR):
|
||||
return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
|
||||
storeDir = os.path.join(STORAGE_DIR,docType)
|
||||
if os.path.exists(storeDir):
|
||||
return SimpleDocumentStore.from_persist_dir(storeDir)
|
||||
else:
|
||||
return SimpleDocumentStore()
|
||||
|
||||
|
||||
def run_pipeline(docstore, vector_store, documents):
|
||||
pipeline = IngestionPipeline(
|
||||
transformations=[
|
||||
@@ -49,7 +48,6 @@ def run_pipeline(docstore, vector_store, documents):
|
||||
|
||||
return nodes
|
||||
|
||||
|
||||
def persist_storage(docstore, vector_store):
|
||||
storage_context = StorageContext.from_defaults(
|
||||
docstore=docstore,
|
||||
@@ -57,18 +55,19 @@ def persist_storage(docstore, vector_store):
|
||||
)
|
||||
storage_context.persist(STORAGE_DIR)
|
||||
|
||||
|
||||
def generate_datasource():
|
||||
init_settings()
|
||||
logger.info("Generate index for the provided data")
|
||||
|
||||
# Get the stores and documents or create new ones
|
||||
documents = get_documents()
|
||||
docTypes = get_document_Types()
|
||||
for docType in docTypes:
|
||||
documents = get_documents(docType)
|
||||
# Set private=false to mark the document as public (required for filtering)
|
||||
for doc in documents:
|
||||
doc.metadata["private"] = "false"
|
||||
docstore = get_doc_store()
|
||||
vector_store = get_vector_store()
|
||||
docstore = get_doc_store(docType)
|
||||
vector_store = get_vector_store(docType)
|
||||
|
||||
# Run the ingestion pipeline
|
||||
_ = run_pipeline(docstore, vector_store, documents)
|
||||
@@ -78,7 +77,6 @@ def generate_datasource():
|
||||
|
||||
logger.info("Finished generating the index")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from phoenix.trace import using_project
|
||||
with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj:
|
||||
|
||||
@@ -1,22 +1,23 @@
|
||||
import logging
|
||||
from llama_index.core.indices import VectorStoreIndex
|
||||
from app.engine.vectordb import get_vector_store
|
||||
|
||||
from app.engine.generate import get_document_Types
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
index = None
|
||||
indexs = {}
|
||||
|
||||
def get_index(params=None):
|
||||
global index
|
||||
if index is None:
|
||||
global indexs
|
||||
if len(index) <= 0:
|
||||
logger.info("Connecting vector store...")
|
||||
|
||||
store = get_vector_store()
|
||||
docTypes = get_document_Types()
|
||||
for docType in docTypes:
|
||||
store = get_vector_store(docType)
|
||||
# Load the index from the vector store
|
||||
# If you are using a vector store that doesn't store text,
|
||||
# you must load the index from both the vector store and the document store
|
||||
index = VectorStoreIndex.from_vector_store(store)
|
||||
logger.info("Finished load index from vector store.")
|
||||
|
||||
return index
|
||||
indexs[docType] = index
|
||||
return indexs
|
||||
|
||||
@@ -13,8 +13,48 @@ def load_configs():
|
||||
configs = yaml.safe_load(f)
|
||||
return configs
|
||||
|
||||
def path_difference(path1:str, path2:str):
|
||||
import os
|
||||
path1 = os.path.abspath(path1)
|
||||
path2 = os.path.abspath(path2)
|
||||
|
||||
def get_documents():
|
||||
path1_parts = path1.split(os.path.sep)
|
||||
path2_parts = path2.split(os.path.sep)
|
||||
|
||||
for i, part in enumerate(path1_parts):
|
||||
if part != path2_parts[i]:
|
||||
break
|
||||
else:
|
||||
i += 1
|
||||
|
||||
pathKey = ''
|
||||
for j in range(i,len(path2_parts)):
|
||||
pathKey+=path2_parts[j] + '_'
|
||||
return pathKey[0:-1]
|
||||
|
||||
def get_document_Types():
|
||||
import os
|
||||
rootPath = 'data'
|
||||
configs = load_configs()
|
||||
if configs is not None and len(configs.items()) > 0:
|
||||
for loader_type, loader_config in configs.items():
|
||||
if loader_type == "file":
|
||||
rootPath = FileLoaderConfig(**loader_config).data_dir
|
||||
break
|
||||
|
||||
types = []
|
||||
dirStack = [rootPath]
|
||||
while len(dirStack) > 0:
|
||||
curDir = dirStack.pop()
|
||||
dirs = [os.path.join(curDir, d) for d in os.listdir(curDir) if os.path.isdir(os.path.join(curDir, d))]
|
||||
if len(dirs) > 0:
|
||||
for dir in dirs:
|
||||
dirStack.append(dir)
|
||||
else:
|
||||
types.append(path_difference(rootPath,curDir))
|
||||
return types
|
||||
|
||||
def get_documents(docType:str):
|
||||
documents = []
|
||||
config = load_configs()
|
||||
if config is None or len(config.items()) == 0:
|
||||
@@ -28,7 +68,7 @@ def get_documents():
|
||||
loader_config = loader_config or []
|
||||
match loader_type:
|
||||
case "file":
|
||||
document = get_file_documents(FileLoaderConfig(**loader_config))
|
||||
document = get_file_documents(FileLoaderConfig(**loader_config),docType)
|
||||
case "web":
|
||||
document = get_web_documents(WebLoaderConfig(**loader_config))
|
||||
case "db":
|
||||
|
||||
@@ -20,7 +20,6 @@ class FileLoaderConfig(BaseModel):
|
||||
raise ValueError(f"Directory '{v}' does not exist")
|
||||
return v
|
||||
|
||||
|
||||
def llama_parse_parser():
|
||||
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
|
||||
raise ValueError(
|
||||
@@ -35,7 +34,6 @@ def llama_parse_parser():
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def llama_parse_extractor() -> Dict[str, LlamaParse]:
|
||||
from llama_parse.utils import SUPPORTED_FILE_TYPES
|
||||
|
||||
@@ -45,8 +43,7 @@ def llama_parse_extractor() -> Dict[str, LlamaParse]:
|
||||
def llama_local_extractor() -> Dict[str, BaseReader]:
|
||||
return {"json" : JSONReader}
|
||||
|
||||
|
||||
def get_file_documents(config: FileLoaderConfig):
|
||||
def get_file_documents(config: FileLoaderConfig, childPath: str):
|
||||
from llama_index.core.readers import SimpleDirectoryReader
|
||||
|
||||
try:
|
||||
@@ -63,7 +60,7 @@ def get_file_documents(config: FileLoaderConfig):
|
||||
file_extractor = llama_local_extractor()
|
||||
|
||||
reader = SimpleDirectoryReader(
|
||||
config.data_dir,
|
||||
os.path.join(config.data_dir,childPath.replace('_','\\')),
|
||||
recursive=True,
|
||||
filename_as_id=True,
|
||||
raise_on_error=True,
|
||||
|
||||
@@ -5,12 +5,14 @@ from qdrant_client import qdrant_client
|
||||
|
||||
qclient = None
|
||||
|
||||
def get_qdrant_vector_store():
|
||||
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
|
||||
def get_qdrant_vector_store(docType:str):
|
||||
collection_name = docType
|
||||
#collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
|
||||
vector_store_path = os.getenv("VECTOR_STORE_PATH")
|
||||
host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"),
|
||||
port=int(os.getenv("VECTOR_STORE_PORT", "6333")),
|
||||
|
||||
vector_store_path =os.path.join(vector_store_path,docType)
|
||||
if not vector_store_path or not host:
|
||||
raise ValueError(
|
||||
"Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
|
||||
@@ -32,9 +34,11 @@ def get_qdrant_vector_store():
|
||||
vector_store = QdrantVectorStore(client=qclient, collection_name=collection_name)
|
||||
return vector_store
|
||||
|
||||
def get_chroma_vector_store():
|
||||
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
|
||||
def get_chroma_vector_store(docType:str):
|
||||
#collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
|
||||
collection_name = docType
|
||||
vector_store_path = os.getenv("VECTOR_STORE_PATH")
|
||||
vector_store_path =os.path.join(vector_store_path,docType)
|
||||
# if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path
|
||||
# otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
|
||||
if vector_store_path:
|
||||
@@ -55,16 +59,16 @@ def get_chroma_vector_store():
|
||||
)
|
||||
return store
|
||||
|
||||
def get_vector_store():
|
||||
def get_vector_store(docType:str):
|
||||
store_type=os.getenv("VECTOR_STORE_TYPE")
|
||||
|
||||
store = None
|
||||
|
||||
match store_type:
|
||||
case "chroma":
|
||||
store = get_chroma_vector_store()
|
||||
store = get_chroma_vector_store(docType)
|
||||
case "qdrant":
|
||||
store = get_qdrant_vector_store()
|
||||
store = get_qdrant_vector_store(docType)
|
||||
case _:
|
||||
raise ValueError(f"Invalid vector store type: {store_type}")
|
||||
|
||||
|
||||
+1
-21
@@ -3,10 +3,6 @@ from typing import Dict
|
||||
|
||||
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||
from llama_index.core.settings import Settings
|
||||
from llama_index.llms.xinference import Xinference
|
||||
from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP
|
||||
|
||||
from app.xinference.base import XinferenceEmbedding
|
||||
|
||||
|
||||
def init_settings():
|
||||
@@ -30,9 +26,8 @@ def init_settings():
|
||||
init_azure_openai()
|
||||
case "t-systems":
|
||||
from .llmhub import init_llmhub
|
||||
|
||||
init_llmhub()
|
||||
case "xinference":
|
||||
init_xinference()
|
||||
case _:
|
||||
raise ValueError(f"Invalid model provider: {model_provider}")
|
||||
|
||||
@@ -57,21 +52,6 @@ def init_ollama():
|
||||
# )
|
||||
pass
|
||||
|
||||
def init_xinference():
|
||||
base_url = os.getenv("BASE_URL")
|
||||
model = os.getenv("MODEL")
|
||||
max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None
|
||||
temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP))
|
||||
|
||||
Settings.llm = Xinference(model, base_url, temperature, max_tokens)
|
||||
|
||||
embedding_base_url = os.getenv("EMBEDDING_BASE_URL")
|
||||
embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url
|
||||
|
||||
embed_model_name = os.getenv("EMBEDDING_MODEL")
|
||||
dimensions = os.getenv("EMBEDDING_DIM")
|
||||
dimensions = int(dimensions) if dimensions is not None else None
|
||||
Settings.embed_model = XinferenceEmbedding(embed_model_name, embedding_base_url)
|
||||
|
||||
def init_openai():
|
||||
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||
|
||||
@@ -1,272 +0,0 @@
|
||||
"""Xinference embeddings file."""
|
||||
|
||||
import logging
|
||||
from enum import Enum
|
||||
from http import HTTPStatus
|
||||
from typing import Any, Dict, List, Optional, Union, Tuple
|
||||
|
||||
from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding
|
||||
from llama_index.core.bridge.pydantic import PrivateAttr
|
||||
from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding
|
||||
from llama_index.core.schema import ImageType
|
||||
from pydantic import Field
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
# class XinferenceTextEmbeddingType(str, Enum):
|
||||
# """DashScope TextEmbedding text_type."""
|
||||
#
|
||||
# TEXT_TYPE_QUERY = "query"
|
||||
# TEXT_TYPE_DOCUMENT = "document"
|
||||
#
|
||||
#
|
||||
# class DashScopeTextEmbeddingModels(str, Enum):
|
||||
# """DashScope TextEmbedding models."""
|
||||
#
|
||||
# TEXT_EMBEDDING_V1 = "text-embedding-v1"
|
||||
# TEXT_EMBEDDING_V2 = "text-embedding-v2"
|
||||
# TEXT_EMBEDDING_V3 = "text-embedding-v3"
|
||||
#
|
||||
#
|
||||
# class DashScopeBatchTextEmbeddingModels(str, Enum):
|
||||
# """DashScope TextEmbedding models."""
|
||||
#
|
||||
# TEXT_EMBEDDING_ASYNC_V1 = "text-embedding-async-v1"
|
||||
# TEXT_EMBEDDING_ASYNC_V2 = "text-embedding-async-v2"
|
||||
# TEXT_EMBEDDING_ASYNC_V3 = "text-embedding-async-v3"
|
||||
|
||||
|
||||
EMBED_MAX_INPUT_LENGTH = 2048
|
||||
EMBED_MAX_BATCH_SIZE = 1
|
||||
|
||||
|
||||
# class DashScopeMultiModalEmbeddingModels(str, Enum):
|
||||
# """DashScope MultiModalEmbedding models."""
|
||||
#
|
||||
# MULTIMODAL_EMBEDDING_ONE_PEACE_V1 = "multimodal-embedding-one-peace-v1"
|
||||
|
||||
|
||||
# def get_text_embedding(
|
||||
# model: str,
|
||||
# text: Union[str, List[str]],
|
||||
# api_key: Optional[str] = None,
|
||||
# **kwargs: Any,
|
||||
# ) -> List[List[float]]:
|
||||
# """Call DashScope text embedding.
|
||||
# ref: https://help.aliyun.com/zh/dashscope/developer-reference/text-embedding-api-details.
|
||||
#
|
||||
# Args:
|
||||
# model (str): The `DashScopeTextEmbeddingModels`
|
||||
# text (Union[str, List[str]]): text or list text to embedding.
|
||||
#
|
||||
# Raises:
|
||||
# ImportError: need import dashscope
|
||||
#
|
||||
# Returns:
|
||||
# List[List[float]]: The list of embedding result, if failed return empty list.
|
||||
# if some of test no output, the correspond index of output is None.
|
||||
# """
|
||||
# try:
|
||||
# import dashscope
|
||||
# except ImportError:
|
||||
# raise ImportError("DashScope requires `pip install dashscope")
|
||||
# if isinstance(text, str):
|
||||
# text = [text]
|
||||
# response = dashscope.TextEmbedding.call(
|
||||
# model=model, input=text, api_key=api_key, kwargs=kwargs
|
||||
# )
|
||||
# embedding_results = [None] * len(text)
|
||||
# if response.status_code == HTTPStatus.OK:
|
||||
# for emb in response.output["embeddings"]:
|
||||
# embedding_results[emb["text_index"]] = emb["embedding"]
|
||||
# else:
|
||||
# logger.error("Calling TextEmbedding failed, details: %s" % response)
|
||||
#
|
||||
# return embedding_results
|
||||
#
|
||||
#
|
||||
# def get_batch_text_embedding(
|
||||
# model: str, url: str, api_key: Optional[str] = None, **kwargs: Any
|
||||
# ) -> Optional[str]:
|
||||
# """Call DashScope batch text embedding.
|
||||
#
|
||||
# Args:
|
||||
# model (str): The `DashScopeMultiModalEmbeddingModels`
|
||||
# url (str): The url of the file to embedding which with lines of text to embedding.
|
||||
#
|
||||
# Raises:
|
||||
# ImportError: Need install dashscope package.
|
||||
#
|
||||
# Returns:
|
||||
# str: The url of the embedding result, format ref:
|
||||
# https://help.aliyun.com/zh/dashscope/developer-reference/text-embedding-async-api-details
|
||||
# """
|
||||
# try:
|
||||
# import dashscope
|
||||
# except ImportError:
|
||||
# raise ImportError("DashScope requires `pip install dashscope")
|
||||
# response = dashscope.BatchTextEmbedding.call(
|
||||
# model=model, url=url, api_key=api_key, kwargs=kwargs
|
||||
# )
|
||||
# if response.status_code == HTTPStatus.OK:
|
||||
# return response.output["url"]
|
||||
# else:
|
||||
# logger.error("Calling BatchTextEmbedding failed, details: %s" % response)
|
||||
# return None
|
||||
|
||||
|
||||
# def get_multimodal_embedding(
|
||||
# model: str, input: list, api_key: Optional[str] = None, **kwargs: Any
|
||||
# ) -> List[float]:
|
||||
# """Call DashScope multimodal embedding.
|
||||
# ref: https://help.aliyun.com/zh/dashscope/developer-reference/one-peace-multimodal-embedding-api-details.
|
||||
#
|
||||
# Args:
|
||||
# model (str): The `DashScopeBatchTextEmbeddingModels`
|
||||
# input (str): The input of the embedding, eg:
|
||||
# [{'factor': 1, 'text': '你好'},
|
||||
# {'factor': 2, 'audio': 'https://dashscope.oss-cn-beijing.aliyuncs.com/audios/cow.flac'},
|
||||
# {'factor': 3, 'image': 'https://dashscope.oss-cn-beijing.aliyuncs.com/images/256_1.png'}]
|
||||
#
|
||||
# Raises:
|
||||
# ImportError: Need install dashscope package.
|
||||
#
|
||||
# Returns:
|
||||
# List[float]: Embedding result, if failed return empty list.
|
||||
# """
|
||||
# try:
|
||||
# import dashscope
|
||||
# except ImportError:
|
||||
# raise ImportError("DashScope requires `pip install dashscope")
|
||||
# response = dashscope.MultiModalEmbedding.call(
|
||||
# model=model, input=input, api_key=api_key, kwargs=kwargs
|
||||
# )
|
||||
# if response.status_code == HTTPStatus.OK:
|
||||
# return response.output["embedding"]
|
||||
# else:
|
||||
# logger.error("Calling MultiModalEmbedding failed, details: %s" % response)
|
||||
# return []
|
||||
|
||||
class XinferenceEmbedding(BaseEmbedding):
|
||||
"""Xinference class for text embedding.
|
||||
|
||||
"""
|
||||
model_description: Dict[str, Any] = Field(
|
||||
description="The model description from Xinference."
|
||||
)
|
||||
_generator: Any = PrivateAttr()
|
||||
_model_uid: str = Field(description="The Xinference model to use.")
|
||||
_endpoint: str = Field(description="The Xinference endpoint URL to use.")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_uid: str,
|
||||
endpoint: str,
|
||||
embed_batch_size: int = EMBED_MAX_BATCH_SIZE,
|
||||
dimensions: Optional[int] = None,
|
||||
additional_kwargs: Optional[Dict[str, Any]] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
api_version: Optional[str] = None,
|
||||
max_retries: int = 10,
|
||||
# timeout: float = 60.0,
|
||||
# reuse_client: bool = True,
|
||||
# callback_manager: Optional[CallbackManager] = None,
|
||||
# default_headers: Optional[Dict[str, str]] = None,
|
||||
# http_client: Optional[httpx.Client] = None,
|
||||
# async_http_client: Optional[httpx.AsyncClient] = None,
|
||||
# num_workers: Optional[int] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
generator, model_description = self.load_model(
|
||||
model_uid, endpoint
|
||||
)
|
||||
self._generator = generator
|
||||
#self._model_uid = model_uid
|
||||
#self._endpoint = endpoint
|
||||
super().__init__(
|
||||
embed_batch_size=embed_batch_size,
|
||||
dimensions=dimensions,
|
||||
#callback_manager=callback_manager,
|
||||
model_name=model_uid,
|
||||
additional_kwargs=additional_kwargs,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
api_version=api_version,
|
||||
max_retries=max_retries,
|
||||
# reuse_client=reuse_client,
|
||||
# timeout=timeout,
|
||||
# default_headers=default_headers,
|
||||
# num_workers=num_workers,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
|
||||
try:
|
||||
from xinference.client import RESTfulClient
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import Xinference library."
|
||||
'Please install Xinference with `pip install "xinference[all]"`'
|
||||
)
|
||||
|
||||
client = RESTfulClient(endpoint)
|
||||
|
||||
try:
|
||||
assert isinstance(client, RESTfulClient)
|
||||
except AssertionError:
|
||||
raise RuntimeError(
|
||||
"Could not create RESTfulClient instance."
|
||||
"Please make sure Xinference endpoint is running at the correct port."
|
||||
)
|
||||
|
||||
generator = client.get_model(model_uid)
|
||||
model_description = client.list_models()[model_uid]
|
||||
|
||||
try:
|
||||
assert generator is not None
|
||||
assert model_description is not None
|
||||
except AssertionError:
|
||||
raise RuntimeError(
|
||||
"Could not get model from endpoint."
|
||||
"Please make sure Xinference endpoint is running at the correct port."
|
||||
)
|
||||
|
||||
model = model_description["model_name"]
|
||||
|
||||
return generator, model_description
|
||||
|
||||
@classmethod
|
||||
def class_name(cls) -> str:
|
||||
return "XinferenceEmbedding"
|
||||
|
||||
def _get_text_embedding(self, text: str) -> Embedding:
|
||||
"""
|
||||
Embed the input text synchronously.
|
||||
|
||||
Subclasses should implement this method. Reference get_text_embedding's
|
||||
docstring for more information.
|
||||
"""
|
||||
assert self._generator is not None
|
||||
|
||||
response = self._generator.create_embedding(input=text)
|
||||
return response['data'][0]['embedding']
|
||||
|
||||
def _get_query_embedding(self, query: str) -> Embedding:
|
||||
"""
|
||||
Embed the input query synchronously.
|
||||
|
||||
Subclasses should implement this method. Reference get_query_embedding's
|
||||
docstring for more information.
|
||||
"""
|
||||
return self._get_text_embedding(query)
|
||||
|
||||
async def _aget_query_embedding(self, query: str) -> Embedding:
|
||||
"""
|
||||
Embed the input query asynchronously.
|
||||
|
||||
Subclasses should implement this method. Reference get_query_embedding's
|
||||
docstring for more information.
|
||||
"""
|
||||
return self._get_query_embedding(query)
|
||||
Generated
-3979
File diff suppressed because it is too large
Load Diff
@@ -23,9 +23,6 @@ llama-index-callbacks-arize-phoenix = "^0.1.4"
|
||||
llama-index-llms-dashscope = "^0.1.2"
|
||||
llama-index-embeddings-dashscope = "^0.1.4"
|
||||
llama-index-postprocessor-dashscope-rerank-custom = "0.1.0"
|
||||
#xinference = "^0.14.1"
|
||||
xinference.client = "^0.14.1"
|
||||
llama-index-llms-xinference = "^0.1.2"
|
||||
qdrant-client="^1.10.1"
|
||||
llama-index-vector-stores-qdrant = "^0.2.14"
|
||||
chroma="^0.5.5"
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
rmdir /S /Q storage_vector
|
||||
rmdir /S /Q storage
|
||||
|
||||
python tests/query.py
|
||||
C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python tests/query.py
|
||||
+1
-1
@@ -1 +1 @@
|
||||
python main.py
|
||||
C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python main.py
|
||||
@@ -19,7 +19,9 @@ def main():
|
||||
init_settings()
|
||||
init_observability()
|
||||
|
||||
index = get_index()
|
||||
indexs = get_index()
|
||||
if len(indexs) > 0:
|
||||
index = list(indexs.values())[0]
|
||||
|
||||
top_k = 5
|
||||
filters = generate_filters([])
|
||||
|
||||
@@ -1,10 +0,0 @@
|
||||
# The backend API for chat endpoint.
|
||||
#NEXT_PUBLIC_CHAT_API=http://localhost:8000/api/chat
|
||||
NEXT_PUBLIC_CHAT_API=http://10.1.6.41:8000/api/chat
|
||||
|
||||
#PHOENIX_SERVER_URL=http://localhost:6006/
|
||||
PHOENIX_SERVER_URL=http://10.1.6.41:6006/
|
||||
|
||||
# Let's the user change indexes in LlamaCloud projects
|
||||
NEXT_PUBLIC_USE_LLAMACLOUD=false
|
||||
|
||||
@@ -4,7 +4,7 @@ const phoenixUrl = process.env.PHOENIX_SERVER_URL;
|
||||
|
||||
export default function Header() {
|
||||
return (
|
||||
<div className="z-10 w-full items-center justify-between font-mono text-sm lg:flex">
|
||||
<div className="z-10 max-w-5xl w-full items-center justify-between font-mono text-sm lg:flex">
|
||||
<p className="fixed left-0 top-0 flex w-full justify-center border-b border-gray-300 bg-gradient-to-b from-zinc-200 pb-6 pt-8 backdrop-blur-2xl dark:border-neutral-800 dark:bg-zinc-800/30 dark:from-inherit lg:static lg:w-auto lg:rounded-xl lg:border lg:bg-gray-200 lg:p-4 lg:dark:bg-zinc-800/30">
|
||||
<code className="font-mono font-bold"><a href="javascript:location.reload();">清空当前会话</a></code>
|
||||
</p>
|
||||
|
||||
@@ -99,8 +99,9 @@ export default function ChatInput(
|
||||
</div>
|
||||
)}
|
||||
<div className="flex w-full items-start justify-between gap-4 ">
|
||||
<Input
|
||||
<textarea
|
||||
autoFocus
|
||||
rows={2}
|
||||
name="message"
|
||||
placeholder="请输入消息"
|
||||
className="flex-1"
|
||||
|
||||
@@ -127,26 +127,9 @@ function NodeInfo({ nodeInfo }: { nodeInfo: NodeInfo }) {
|
||||
}
|
||||
|
||||
// node generated by unknown loader, implement renderer by analyzing logged out metadata
|
||||
// return (
|
||||
// <p>
|
||||
// 对不起, 未知文件类型. 无法打开当前的来源文件。
|
||||
// </p>
|
||||
// );
|
||||
return (
|
||||
<div className="flex items-center my-2">
|
||||
<span>{nodeInfo.text}</span>
|
||||
<Button
|
||||
onClick={() => copyToClipboard(nodeInfo.url!)}
|
||||
size="icon"
|
||||
variant="ghost"
|
||||
className="h-12 w-12 shrink-0"
|
||||
>
|
||||
{isCopied ? (
|
||||
<Check className="h-4 w-4" />
|
||||
) : (
|
||||
<Copy className="h-4 w-4" />
|
||||
)}
|
||||
</Button>
|
||||
</div>
|
||||
<p>
|
||||
对不起, 未知文件类型. 无法打开当前的来源文件。
|
||||
</p>
|
||||
);
|
||||
}
|
||||
|
||||
@@ -10,7 +10,7 @@ export interface ChatHandler {
|
||||
data?: any;
|
||||
},
|
||||
) => void;
|
||||
handleInputChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
|
||||
handleInputChange: (e: React.ChangeEvent<HTMLTextAreaElement>) => void;
|
||||
reload?: () => void;
|
||||
stop?: () => void;
|
||||
onFileUpload?: (file: File) => Promise<void>;
|
||||
|
||||
Generated
-16885
File diff suppressed because it is too large
Load Diff
@@ -1,3 +0,0 @@
|
||||
ENV_PHOENIX_HOST=0.0.0.0
|
||||
ENV_PHOENIX_PORT=6006
|
||||
PHOENIX_HOST_ROOT_PATH=./.phoenix/
|
||||
@@ -2,4 +2,4 @@ SET ENV_PHOENIX_HOST=0.0.0.0
|
||||
SET ENV_PHOENIX_PORT=6006
|
||||
SET PHOENIX_HOST_ROOT_PATH=./.phoenix/
|
||||
|
||||
python phoenixserver.py
|
||||
C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-pCyqx0Uo-py3.11\Scripts\python phoenixserver.py
|
||||
Reference in New Issue
Block a user