1 Commits

Author SHA1 Message Date
wanyaokun 7e58a1a223 实现多工程数据存储支持 2024-08-13 13:11:17 +08:00
48 changed files with 116 additions and 23326 deletions
-80
View File
@@ -1,80 +0,0 @@
# The Llama Cloud API key.
# LLAMA_CLOUD_API_KEY=
SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
#SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
DASHSCOPE_API_KEY=sk-02c8540e86d84b7ca0e6f4f51bac6e60
# The provider for the AI models to use.
MODEL_PROVIDER=dashscope
# The name of LLM model to use.
MODEL=qwen-max
# 是否启用检索重排功能
ENABLE_RERANK=true
# Name of the embedding model to use.
EMBEDDING_MODEL=text-embedding-v2
# Dimension of the embedding model to use.
EMBEDDING_DIM=1024
# The questions to help users get started (multi-line).
CONVERSATION_STARTERS=本工程指什么?\n总算表有哪些费用?\n项目划分哪些内容构成?\n其他费用表有哪些内容?
# The OpenAI API key to use.
# OPENAI_API_KEY=
# Temperature for sampling from the model.
# LLM_TEMPERATURE=
# Maximum number of tokens to generate.
# LLM_MAX_TOKENS=
# The number of similar embeddings to return when retrieving documents.
TOP_K=5
# The time in milliseconds to wait for the stream to return a response.
STREAM_TIMEOUT=60000
# 向量存储数据库类型,目前可选:chroma、qdrant
VECTOR_STORE_TYPE=chroma
# The name of the collection in your vector database
VECTOR_STORE_COLLECTION=default
# The API endpoint for your vector database
# VECTOR_STORE_HOST=
# The port for your vector database
# VECTOR_STORE_PORT=
# The local path to the vector database.
# Specify this if you are using a local vector database.
# Otherwise, use VECTOR_STORE__HOST and VECTOR_STORE__PORT config above
VECTOR_STORE_PATH=./storage_vector
PHOENIX_API_KEY=123456
PHOENIX_URL=http://localhost:6006/v1/traces
PHOENIX_PROJECT_NAME=ly_zjapp
#OTEL_SERVICE_NAME=ly_zjapp
#OTEL_RESOURCE_ATTRIBUTES=openinference.project.name=ly_zjapp
# The address to start the backend app.
APP_HOST=0.0.0.0
# The port to start the backend app.
APP_PORT=8000
FILESERVER_URL_PREFIX=/api/files
# E2B_API_KEY key is required to run code interpreter tool. Get it here: https://e2b.dev/docs/getting-started/api-key
# E2B_API_KEY=
# The system prompt for the AI model.
SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weather forecast for a given location.
-You are a Python interpreter that can run any python code in a secure environment.
- The python code runs in a Jupyter notebook. Every time you call the 'interpreter' tool, the python code is executed in a separate cell.
- You are given tasks to complete and you run python code to solve them.
- It's okay to make multiple calls to interpreter tool. If you get an error or the result is not what you expected, you can call the tool again. Don't give up too soon!
- Plot visualizations using matplotlib or any other visualization library directly in the notebook.
- You can install any pip package (if it exists) by running a cell with pip install.
"
-103
View File
@@ -1,103 +0,0 @@
# The Llama Cloud API key.
# LLAMA_CLOUD_API_KEY=
SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
#SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
#---------- Xinference ----------------
# The provider for the AI models to use.
MODEL_PROVIDER=xinference
# The OpenAI API key to use.
OPENAI_API_KEY=xinference
BASE_URL=http://10.1.0.142:9995
MODEL=Qwen2-72B-Instruct-GPTQ-Int8
# Temperature for sampling from the model.
LLM_TEMPERATURE=0.1
# Maximum number of tokens to generate.
#LLM_MAX_TOKENS=
# Name of the embedding model to use.
EMBEDDING_MODEL=bge-m3
EMBEDDING_BASE_URL=http://10.1.16.39:9995
# Dimension of the embedding model to use.
EMBEDDING_DIM=1024
##---------- OpenAI ----------------
## The provider for the AI models to use.
#MODEL_PROVIDER=openai
## The OpenAI API key to use.
#OPENAI_API_KEY=xinference
#BASE_URL=http://10.1.0.142:9995/v1
#MODEL=Qwen2-72B-Instruct-GPTQ-Int4
## Temperature for sampling from the model.
#LLM_TEMPERATURE=0.1
## Maximum number of tokens to generate.
##LLM_MAX_TOKENS=
## Name of the embedding model to use.
#EMBEDDING_MODEL=text-embedding-v2
## Dimension of the embedding model to use.
#EMBEDDING_DIM=1024
#---------- DashScope ----------------
#DASHSCOPE_API_KEY=sk-02c8540e86d84b7ca0e6f4f51bac6e60
## The provider for the AI models to use.
#MODEL_PROVIDER=dashscope
## The name of LLM model to use.
#MODEL=qwen-max
## Name of the embedding model to use.
#EMBEDDING_MODEL=text-embedding-v2
#--------------------------
# 是否启用检索重排功能
ENABLE_RERANK=true
# The questions to help users get started (multi-line).
CONVERSATION_STARTERS=本工程指什么?\n总算表有哪些费用?\n项目划分哪些内容构成?\n其他费用表有哪些内容?
# The number of similar embeddings to return when retrieving documents.
TOP_K=5
# The time in milliseconds to wait for the stream to return a response.
STREAM_TIMEOUT=60000
# 向量存储数据库类型,目前可选:chroma、qdrant
VECTOR_STORE_TYPE=chroma
# The name of the collection in your vector database
VECTOR_STORE_COLLECTION=default
# The API endpoint for your vector database
# VECTOR_STORE_HOST=
# The port for your vector database
# VECTOR_STORE_PORT=
# The local path to the vector database.
# Specify this if you are using a local vector database.
# Otherwise, use VECTOR_STORE__HOST and VECTOR_STORE__PORT config above
VECTOR_STORE_PATH=./storage_vector
PHOENIX_API_KEY=123456
PHOENIX_URL=http://localhost:6006/v1/traces
PHOENIX_PROJECT_NAME=ly_zjapp
#OTEL_SERVICE_NAME=ly_zjapp
#OTEL_RESOURCE_ATTRIBUTES=openinference.project.name=ly_zjapp
# The address to start the backend app.
APP_HOST=0.0.0.0
# The port to start the backend app.
APP_PORT=8000
FILESERVER_URL_PREFIX=/api/files
# E2B_API_KEY key is required to run code interpreter tool. Get it here: https://e2b.dev/docs/getting-started/api-key
# E2B_API_KEY=
# The system prompt for the AI model.
SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weather forecast for a given location.
-You are a Python interpreter that can run any python code in a secure environment.
- The python code runs in a Jupyter notebook. Every time you call the 'interpreter' tool, the python code is executed in a separate cell.
- You are given tasks to complete and you run python code to solve them.
- It's okay to make multiple calls to interpreter tool. If you get an error or the result is not what you expected, you can call the tool again. Don't give up too soon!
- Plot visualizations using matplotlib or any other visualization library directly in the notebook.
- You can install any pip package (if it exists) by running a cell with pip install.
"
-3
View File
@@ -2,6 +2,3 @@ __pycache__
storage
.env
output
/storage_vector/
/.idea/
/.python-version
+1 -1
View File
@@ -124,7 +124,7 @@ async def chat_config() -> ChatConfig:
starter_questions = None
conversation_starters = os.getenv("CONVERSATION_STARTERS")
if conversation_starters and conversation_starters.strip():
starter_questions = conversation_starters.strip().split("\\n")
starter_questions = conversation_starters.strip().split("\n")
return ChatConfig(starter_questions=starter_questions)
+3 -4
View File
@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Literal, Optional, Set
from llama_index.core.llms import ChatMessage, MessageRole
from llama_index.core.schema import NodeWithScore
from pydantic import BaseModel, Field, validator, field_validator
from pydantic import BaseModel, Field, validator
from pydantic.alias_generators import to_camel
logger = logging.getLogger("uvicorn")
@@ -89,7 +89,7 @@ class ChatData(BaseModel):
}
}
@field_validator("messages")
@validator("messages")
def messages_must_not_be_empty(cls, v):
if len(v) == 0:
raise ValueError("Messages must not be empty")
@@ -173,8 +173,7 @@ class SourceNodes(BaseModel):
def from_source_node(cls, source_node: NodeWithScore):
metadata = source_node.node.metadata
url = cls.get_url_from_metadata(metadata)
#text = 'filename' in metadata and metadata['filename'] or source_node.node.node_id
text = source_node.node.text
text = 'filename' in metadata and metadata['filename'] or source_node.node.node_id
return cls(
id=source_node.node.node_id,
metadata=metadata,
+3 -1
View File
@@ -87,7 +87,9 @@ class PrivateFileService:
nodes = pipeline.run(documents=documents)
# Add the nodes to the index and persist it
current_index = get_index()
indexs = get_index()
if len(indexs) > 0:
current_index = list(indexs.values())[0]
# Insert the documents into the index
if isinstance(current_index, LlamaCloudIndex):
+3 -3
View File
@@ -6,10 +6,10 @@ from llama_index.core.settings import Settings
from pydantic import BaseModel
NEXT_QUESTIONS_SUGGESTION_PROMPT = PromptTemplate(
"你是一个乐于助人的助手!你的任务是对用户可能会问的下一个问题给出建议。 "
"\n这是对话历史记录"
"You're a helpful assistant! Your task is to suggest the next question that user might ask. "
"\nHere is the conversation history"
"\n---------------------\n{conversation}\n---------------------"
"考虑到对话历史记录,仅限于现在知识库已有内容, 请给我 $number_of_questions 个你接下来可能会问题的问题!"
"Given the conversation history, please give me $number_of_questions questions that you might ask next!"
)
N_QUESTION_TO_GENERATE = 3
+3 -1
View File
@@ -43,7 +43,9 @@ def get_chat_engine(filters=None, params=None):
description="来源于一个由博微公司电力造价软件编制的造价工程文件。该文件以多张表格的形式存储存储了整个工程的全部数据内容。适用于以详细的自然语言查询表格数据方式查询造价工程各项具体属性、费用的数值。请先使用“zj_query_tool”无法解决才使用本工具")
# Add query tool if index exists
index = get_index()
indexs = get_index()
if len(indexs) > 0:
index = list(indexs.values())[0]
if index is not None:
summary_index = SummaryIndex(index.vector_store.get_nodes(node_ids=None))
summary_query_engine = summary_index.as_query_engine()
+17 -19
View File
@@ -5,7 +5,7 @@ load_dotenv()
import logging
import os
from app.engine.loaders import get_documents
from app.engine.loaders import get_document_Types, get_documents
from app.engine.vectordb import get_vector_store
from app.settings import init_settings
from llama_index.core.ingestion import IngestionPipeline
@@ -19,17 +19,16 @@ logger = logging.getLogger()
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
def get_doc_store():
def get_doc_store(docType:str):
# If the storage directory is there, load the document store from it.
# If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
if os.path.exists(STORAGE_DIR):
return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
storeDir = os.path.join(STORAGE_DIR,docType)
if os.path.exists(storeDir):
return SimpleDocumentStore.from_persist_dir(storeDir)
else:
return SimpleDocumentStore()
def run_pipeline(docstore, vector_store, documents):
pipeline = IngestionPipeline(
transformations=[
@@ -49,7 +48,6 @@ def run_pipeline(docstore, vector_store, documents):
return nodes
def persist_storage(docstore, vector_store):
storage_context = StorageContext.from_defaults(
docstore=docstore,
@@ -57,28 +55,28 @@ def persist_storage(docstore, vector_store):
)
storage_context.persist(STORAGE_DIR)
def generate_datasource():
init_settings()
logger.info("Generate index for the provided data")
# Get the stores and documents or create new ones
documents = get_documents()
# Set private=false to mark the document as public (required for filtering)
for doc in documents:
doc.metadata["private"] = "false"
docstore = get_doc_store()
vector_store = get_vector_store()
docTypes = get_document_Types()
for docType in docTypes:
documents = get_documents(docType)
# Set private=false to mark the document as public (required for filtering)
for doc in documents:
doc.metadata["private"] = "false"
docstore = get_doc_store(docType)
vector_store = get_vector_store(docType)
# Run the ingestion pipeline
_ = run_pipeline(docstore, vector_store, documents)
# Run the ingestion pipeline
_ = run_pipeline(docstore, vector_store, documents)
# Build the index and persist storage
persist_storage(docstore, vector_store)
# Build the index and persist storage
persist_storage(docstore, vector_store)
logger.info("Finished generating the index")
if __name__ == "__main__":
from phoenix.trace import using_project
with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj:
+14 -13
View File
@@ -1,22 +1,23 @@
import logging
from llama_index.core.indices import VectorStoreIndex
from app.engine.vectordb import get_vector_store
from app.engine.generate import get_document_Types
logger = logging.getLogger("uvicorn")
index = None
indexs = {}
def get_index(params=None):
global index
if index is None:
global indexs
if len(index) <= 0:
logger.info("Connecting vector store...")
store = get_vector_store()
# Load the index from the vector store
# If you are using a vector store that doesn't store text,
# you must load the index from both the vector store and the document store
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished load index from vector store.")
return index
docTypes = get_document_Types()
for docType in docTypes:
store = get_vector_store(docType)
# Load the index from the vector store
# If you are using a vector store that doesn't store text,
# you must load the index from both the vector store and the document store
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished load index from vector store.")
indexs[docType] = index
return indexs
+42 -2
View File
@@ -13,8 +13,48 @@ def load_configs():
configs = yaml.safe_load(f)
return configs
def path_difference(path1:str, path2:str):
import os
path1 = os.path.abspath(path1)
path2 = os.path.abspath(path2)
def get_documents():
path1_parts = path1.split(os.path.sep)
path2_parts = path2.split(os.path.sep)
for i, part in enumerate(path1_parts):
if part != path2_parts[i]:
break
else:
i += 1
pathKey = ''
for j in range(i,len(path2_parts)):
pathKey+=path2_parts[j] + '_'
return pathKey[0:-1]
def get_document_Types():
import os
rootPath = 'data'
configs = load_configs()
if configs is not None and len(configs.items()) > 0:
for loader_type, loader_config in configs.items():
if loader_type == "file":
rootPath = FileLoaderConfig(**loader_config).data_dir
break
types = []
dirStack = [rootPath]
while len(dirStack) > 0:
curDir = dirStack.pop()
dirs = [os.path.join(curDir, d) for d in os.listdir(curDir) if os.path.isdir(os.path.join(curDir, d))]
if len(dirs) > 0:
for dir in dirs:
dirStack.append(dir)
else:
types.append(path_difference(rootPath,curDir))
return types
def get_documents(docType:str):
documents = []
config = load_configs()
if config is None or len(config.items()) == 0:
@@ -28,7 +68,7 @@ def get_documents():
loader_config = loader_config or []
match loader_type:
case "file":
document = get_file_documents(FileLoaderConfig(**loader_config))
document = get_file_documents(FileLoaderConfig(**loader_config),docType)
case "web":
document = get_web_documents(WebLoaderConfig(**loader_config))
case "db":
+2 -5
View File
@@ -20,7 +20,6 @@ class FileLoaderConfig(BaseModel):
raise ValueError(f"Directory '{v}' does not exist")
return v
def llama_parse_parser():
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
raise ValueError(
@@ -35,7 +34,6 @@ def llama_parse_parser():
)
return parser
def llama_parse_extractor() -> Dict[str, LlamaParse]:
from llama_parse.utils import SUPPORTED_FILE_TYPES
@@ -45,8 +43,7 @@ def llama_parse_extractor() -> Dict[str, LlamaParse]:
def llama_local_extractor() -> Dict[str, BaseReader]:
return {"json" : JSONReader}
def get_file_documents(config: FileLoaderConfig):
def get_file_documents(config: FileLoaderConfig, childPath: str):
from llama_index.core.readers import SimpleDirectoryReader
try:
@@ -63,7 +60,7 @@ def get_file_documents(config: FileLoaderConfig):
file_extractor = llama_local_extractor()
reader = SimpleDirectoryReader(
config.data_dir,
os.path.join(config.data_dir,childPath.replace('_','\\')),
recursive=True,
filename_as_id=True,
raise_on_error=True,
+12 -8
View File
@@ -5,12 +5,14 @@ from qdrant_client import qdrant_client
qclient = None
def get_qdrant_vector_store():
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
def get_qdrant_vector_store(docType:str):
collection_name = docType
#collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
vector_store_path = os.getenv("VECTOR_STORE_PATH")
host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"),
port=int(os.getenv("VECTOR_STORE_PORT", "6333")),
vector_store_path =os.path.join(vector_store_path,docType)
if not vector_store_path or not host:
raise ValueError(
"Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
@@ -32,9 +34,11 @@ def get_qdrant_vector_store():
vector_store = QdrantVectorStore(client=qclient, collection_name=collection_name)
return vector_store
def get_chroma_vector_store():
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
def get_chroma_vector_store(docType:str):
#collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
collection_name = docType
vector_store_path = os.getenv("VECTOR_STORE_PATH")
vector_store_path =os.path.join(vector_store_path,docType)
# if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path
# otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
if vector_store_path:
@@ -55,16 +59,16 @@ def get_chroma_vector_store():
)
return store
def get_vector_store():
def get_vector_store(docType:str):
store_type=os.getenv("VECTOR_STORE_TYPE")
store = None
match store_type:
case "chroma":
store = get_chroma_vector_store()
store = get_chroma_vector_store(docType)
case "qdrant":
store = get_qdrant_vector_store()
store = get_qdrant_vector_store(docType)
case _:
raise ValueError(f"Invalid vector store type: {store_type}")
+1 -21
View File
@@ -3,10 +3,6 @@ from typing import Dict
from llama_index.core.constants import DEFAULT_TEMPERATURE
from llama_index.core.settings import Settings
from llama_index.llms.xinference import Xinference
from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP
from app.xinference.base import XinferenceEmbedding
def init_settings():
@@ -30,9 +26,8 @@ def init_settings():
init_azure_openai()
case "t-systems":
from .llmhub import init_llmhub
init_llmhub()
case "xinference":
init_xinference()
case _:
raise ValueError(f"Invalid model provider: {model_provider}")
@@ -57,21 +52,6 @@ def init_ollama():
# )
pass
def init_xinference():
base_url = os.getenv("BASE_URL")
model = os.getenv("MODEL")
max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None
temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP))
Settings.llm = Xinference(model, base_url, temperature, max_tokens)
embedding_base_url = os.getenv("EMBEDDING_BASE_URL")
embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url
embed_model_name = os.getenv("EMBEDDING_MODEL")
dimensions = os.getenv("EMBEDDING_DIM")
dimensions = int(dimensions) if dimensions is not None else None
Settings.embed_model = XinferenceEmbedding(embed_model_name, embedding_base_url)
def init_openai():
from llama_index.core.constants import DEFAULT_TEMPERATURE
View File
-272
View File
@@ -1,272 +0,0 @@
"""Xinference embeddings file."""
import logging
from enum import Enum
from http import HTTPStatus
from typing import Any, Dict, List, Optional, Union, Tuple
from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding
from llama_index.core.bridge.pydantic import PrivateAttr
from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding
from llama_index.core.schema import ImageType
from pydantic import Field
logger = logging.getLogger(__name__)
# class XinferenceTextEmbeddingType(str, Enum):
# """DashScope TextEmbedding text_type."""
#
# TEXT_TYPE_QUERY = "query"
# TEXT_TYPE_DOCUMENT = "document"
#
#
# class DashScopeTextEmbeddingModels(str, Enum):
# """DashScope TextEmbedding models."""
#
# TEXT_EMBEDDING_V1 = "text-embedding-v1"
# TEXT_EMBEDDING_V2 = "text-embedding-v2"
# TEXT_EMBEDDING_V3 = "text-embedding-v3"
#
#
# class DashScopeBatchTextEmbeddingModels(str, Enum):
# """DashScope TextEmbedding models."""
#
# TEXT_EMBEDDING_ASYNC_V1 = "text-embedding-async-v1"
# TEXT_EMBEDDING_ASYNC_V2 = "text-embedding-async-v2"
# TEXT_EMBEDDING_ASYNC_V3 = "text-embedding-async-v3"
EMBED_MAX_INPUT_LENGTH = 2048
EMBED_MAX_BATCH_SIZE = 1
# class DashScopeMultiModalEmbeddingModels(str, Enum):
# """DashScope MultiModalEmbedding models."""
#
# MULTIMODAL_EMBEDDING_ONE_PEACE_V1 = "multimodal-embedding-one-peace-v1"
# def get_text_embedding(
# model: str,
# text: Union[str, List[str]],
# api_key: Optional[str] = None,
# **kwargs: Any,
# ) -> List[List[float]]:
# """Call DashScope text embedding.
# ref: https://help.aliyun.com/zh/dashscope/developer-reference/text-embedding-api-details.
#
# Args:
# model (str): The `DashScopeTextEmbeddingModels`
# text (Union[str, List[str]]): text or list text to embedding.
#
# Raises:
# ImportError: need import dashscope
#
# Returns:
# List[List[float]]: The list of embedding result, if failed return empty list.
# if some of test no output, the correspond index of output is None.
# """
# try:
# import dashscope
# except ImportError:
# raise ImportError("DashScope requires `pip install dashscope")
# if isinstance(text, str):
# text = [text]
# response = dashscope.TextEmbedding.call(
# model=model, input=text, api_key=api_key, kwargs=kwargs
# )
# embedding_results = [None] * len(text)
# if response.status_code == HTTPStatus.OK:
# for emb in response.output["embeddings"]:
# embedding_results[emb["text_index"]] = emb["embedding"]
# else:
# logger.error("Calling TextEmbedding failed, details: %s" % response)
#
# return embedding_results
#
#
# def get_batch_text_embedding(
# model: str, url: str, api_key: Optional[str] = None, **kwargs: Any
# ) -> Optional[str]:
# """Call DashScope batch text embedding.
#
# Args:
# model (str): The `DashScopeMultiModalEmbeddingModels`
# url (str): The url of the file to embedding which with lines of text to embedding.
#
# Raises:
# ImportError: Need install dashscope package.
#
# Returns:
# str: The url of the embedding result, format ref:
# https://help.aliyun.com/zh/dashscope/developer-reference/text-embedding-async-api-details
# """
# try:
# import dashscope
# except ImportError:
# raise ImportError("DashScope requires `pip install dashscope")
# response = dashscope.BatchTextEmbedding.call(
# model=model, url=url, api_key=api_key, kwargs=kwargs
# )
# if response.status_code == HTTPStatus.OK:
# return response.output["url"]
# else:
# logger.error("Calling BatchTextEmbedding failed, details: %s" % response)
# return None
# def get_multimodal_embedding(
# model: str, input: list, api_key: Optional[str] = None, **kwargs: Any
# ) -> List[float]:
# """Call DashScope multimodal embedding.
# ref: https://help.aliyun.com/zh/dashscope/developer-reference/one-peace-multimodal-embedding-api-details.
#
# Args:
# model (str): The `DashScopeBatchTextEmbeddingModels`
# input (str): The input of the embedding, eg:
# [{'factor': 1, 'text': '你好'},
# {'factor': 2, 'audio': 'https://dashscope.oss-cn-beijing.aliyuncs.com/audios/cow.flac'},
# {'factor': 3, 'image': 'https://dashscope.oss-cn-beijing.aliyuncs.com/images/256_1.png'}]
#
# Raises:
# ImportError: Need install dashscope package.
#
# Returns:
# List[float]: Embedding result, if failed return empty list.
# """
# try:
# import dashscope
# except ImportError:
# raise ImportError("DashScope requires `pip install dashscope")
# response = dashscope.MultiModalEmbedding.call(
# model=model, input=input, api_key=api_key, kwargs=kwargs
# )
# if response.status_code == HTTPStatus.OK:
# return response.output["embedding"]
# else:
# logger.error("Calling MultiModalEmbedding failed, details: %s" % response)
# return []
class XinferenceEmbedding(BaseEmbedding):
"""Xinference class for text embedding.
"""
model_description: Dict[str, Any] = Field(
description="The model description from Xinference."
)
_generator: Any = PrivateAttr()
_model_uid: str = Field(description="The Xinference model to use.")
_endpoint: str = Field(description="The Xinference endpoint URL to use.")
def __init__(
self,
model_uid: str,
endpoint: str,
embed_batch_size: int = EMBED_MAX_BATCH_SIZE,
dimensions: Optional[int] = None,
additional_kwargs: Optional[Dict[str, Any]] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
max_retries: int = 10,
# timeout: float = 60.0,
# reuse_client: bool = True,
# callback_manager: Optional[CallbackManager] = None,
# default_headers: Optional[Dict[str, str]] = None,
# http_client: Optional[httpx.Client] = None,
# async_http_client: Optional[httpx.AsyncClient] = None,
# num_workers: Optional[int] = None,
**kwargs: Any,
) -> None:
generator, model_description = self.load_model(
model_uid, endpoint
)
self._generator = generator
#self._model_uid = model_uid
#self._endpoint = endpoint
super().__init__(
embed_batch_size=embed_batch_size,
dimensions=dimensions,
#callback_manager=callback_manager,
model_name=model_uid,
additional_kwargs=additional_kwargs,
api_key=api_key,
api_base=api_base,
api_version=api_version,
max_retries=max_retries,
# reuse_client=reuse_client,
# timeout=timeout,
# default_headers=default_headers,
# num_workers=num_workers,
**kwargs,
)
def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
try:
from xinference.client import RESTfulClient
except ImportError:
raise ImportError(
"Could not import Xinference library."
'Please install Xinference with `pip install "xinference[all]"`'
)
client = RESTfulClient(endpoint)
try:
assert isinstance(client, RESTfulClient)
except AssertionError:
raise RuntimeError(
"Could not create RESTfulClient instance."
"Please make sure Xinference endpoint is running at the correct port."
)
generator = client.get_model(model_uid)
model_description = client.list_models()[model_uid]
try:
assert generator is not None
assert model_description is not None
except AssertionError:
raise RuntimeError(
"Could not get model from endpoint."
"Please make sure Xinference endpoint is running at the correct port."
)
model = model_description["model_name"]
return generator, model_description
@classmethod
def class_name(cls) -> str:
return "XinferenceEmbedding"
def _get_text_embedding(self, text: str) -> Embedding:
"""
Embed the input text synchronously.
Subclasses should implement this method. Reference get_text_embedding's
docstring for more information.
"""
assert self._generator is not None
response = self._generator.create_embedding(input=text)
return response['data'][0]['embedding']
def _get_query_embedding(self, query: str) -> Embedding:
"""
Embed the input query synchronously.
Subclasses should implement this method. Reference get_query_embedding's
docstring for more information.
"""
return self._get_text_embedding(query)
async def _aget_query_embedding(self, query: str) -> Embedding:
"""
Embed the input query asynchronously.
Subclasses should implement this method. Reference get_query_embedding's
docstring for more information.
"""
return self._get_query_embedding(query)
-3979
View File
File diff suppressed because it is too large Load Diff
+2 -6
View File
@@ -11,25 +11,21 @@ generate = "app.engine.generate:generate_datasource"
[tool.poetry.dependencies]
python = "^3.11,<3.12"
fastapi = "^0.110.3"
fastapi = "^0.112.0"
python-dotenv = "^1.0.0"
aiostream = "^0.6.2"
llama-index = "0.10.63"
cachetools = "^5.3.3"
protobuf = "4.25.4"
#arize-phoenix = "^4.12.0"
openinference-instrumentation-llama-index="2.2.3"
llama-index-callbacks-arize-phoenix = "^0.1.4"
llama-index-llms-dashscope = "^0.1.2"
llama-index-embeddings-dashscope = "^0.1.4"
llama-index-postprocessor-dashscope-rerank-custom = "0.1.0"
xinference = "^0.14.1"
xinference-client = "^0.14.1"
llama-index-llms-xinference = "^0.1.2"
qdrant-client="^1.10.1"
llama-index-vector-stores-qdrant = "^0.2.14"
chroma="^0.2.0"
chroma="^0.5.5"
llama-index-vector-stores-chroma = "^0.1.10"
llama-index-readers-json = "^0.1.5"
+1 -1
View File
@@ -1,4 +1,4 @@
rmdir /S /Q storage_vector
rmdir /S /Q storage
python tests/query.py
C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python tests/query.py
+1 -1
View File
@@ -1 +1 @@
python main.py
C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python main.py
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "人工费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "临时设施费的费率是多少?",
"answer": "费率是6.3500000000"
},
{
"question": "乙供装置性材料费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "甲供装置性材料费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "夜间施工增加费的费率是多少?",
"answer": "费率是0E-10"
},
{
"question": "装置性材料费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "冬雨季施工增加费的费率是多少?",
"answer": "费率是3.5700000000"
},
{
"question": "材料费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "机械价差的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "规费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "直接工程费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "安全文明施工费的费率是多少?",
"answer": "费率是3.5500000000"
},
{
"question": "企业管理费的费率是多少?",
"answer": "费率是35.7600000000"
},
{
"question": "税金的费率是多少?",
"answer": "费率是9.0000000000"
},
{
"question": "直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "安全文明施工费的费率是多少?",
"answer": "费率是3.5500000000"
},
{
"question": "合计的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "税金的费率是多少?",
"answer": "费率是9.0000000000"
},
{
"question": "安全文明施工费的费率是多少?",
"answer": "费率是3.5500000000"
},
{
"question": "直接工程费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "税金的费率是多少?",
"answer": "费率是9.0000000000"
},
{
"question": "社会保险费的费率是多少?",
"answer": "费率是15.0000000000"
},
{
"question": "间接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "合计的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "临时设施费的费率是多少?",
"answer": "费率是0E-10"
},
{
"question": "利润的费率是多少?",
"answer": "费率是5.2400000000"
},
{
"question": "税金的费率是多少?",
"answer": "费率是9.0000000000"
},
{
"question": "社会保险费的费率是多少?",
"answer": "费率是15.0000000000"
},
{
"question": "直接工程费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "乙供设备不含税价的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "企业管理费的费率是多少?",
"answer": "费率是17.1300000000"
},
{
"question": "企业管理费的费率是多少?",
"answer": "费率是35.7600000000"
},
{
"question": "夜间施工增加费的费率是多少?",
"answer": "费率是0E-10"
},
{
"question": "直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "夜间施工增加费的费率是多少?",
"answer": "费率是0E-10"
},
{
"question": "甲供设备含税价的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "施工机械使用费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "安全文明施工费的费率是多少?",
"answer": "费率是3.5500000000"
},
{
"question": "定额直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "主材费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "施工企业配合调试费的费率是多少?",
"answer": "费率是0E-10"
},
{
"question": "施工机械使用费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "临时设施费的费率是多少?",
"answer": "费率是6.3500000000"
},
{
"question": "施工工具用具使用费的费率是多少?",
"answer": "费率是3.8200000000"
},
{
"question": "措施费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "材料价差的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "措施费的费率是多少?",
"answer": "费率是100.0000000000"
}
]
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "前期工作管理费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "特种设备安全监测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "工程监理费的金额是多少?",
"answer": "金额是131009.9200000000"
},
{
"question": "水土保持方案编审费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "生产准备费的金额是多少?",
"answer": "金额是472373669.4635599852"
},
{
"question": "电力工程技术经济标准编制费的金额是多少?",
"answer": "金额是84352440.9756360054"
},
{
"question": "项目建设技术服务费的金额是多少?",
"answer": "金额是16855957065.4302005768"
},
{
"question": "工程保险费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "其他的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "施工图文件评审费的金额是多少?",
"answer": "金额是24940.0000000000"
},
{
"question": "节能评估费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "桩基检测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "项目前期工作费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "其他的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "项目法人管理费的金额是多少?",
"answer": "金额是986923559.4149370193"
},
{
"question": "专业爆破服务费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "节能评估费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "用地预审费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "设备材料监造费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "环境监测及环境保护验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "环境监测及环境保护验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "设备材料监造费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "勘察费的金额是多少?",
"answer": "金额是12122154260.0000000000"
},
{
"question": "项目法人管理费的金额是多少?",
"answer": "金额是986923559.4149370193"
},
{
"question": "社会稳定风险评估费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "勘察费的金额是多少?",
"answer": "金额是12122154260.0000000000"
},
{
"question": "环境影响评价费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "水土保持方案编审费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "使用林地可行性研究费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "环境监测及环境保护验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "桩基检测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "设计费的金额是多少?",
"answer": "金额是4042055949.4299998283"
},
{
"question": "环境监测及环境保护验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "建设场地征用及清理费的金额是多少?",
"answer": "金额是16831284.2287110016"
},
{
"question": "施工图文件评审费的金额是多少?",
"answer": "金额是24940.0000000000"
},
{
"question": "项目后评价费的金额是多少?",
"answer": "金额是421762204.8781780005"
},
{
"question": "水土保持方案编审费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "勘察设计费的金额是多少?",
"answer": "金额是16164210209.4300003052"
},
{
"question": "前期工作管理费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "节能评估费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "初步设计文件评审费的金额是多少?",
"answer": "金额是18560.0000000000"
},
{
"question": "特种设备安全监测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "初步设计文件评审费的金额是多少?",
"answer": "金额是18560.0000000000"
},
{
"question": "桩基检测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "矿产压覆评估费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "设计费的金额是多少?",
"answer": "金额是4042055949.4299998283"
},
{
"question": "水土保持方案编审费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "电力工程技术经济标准编制费的金额是多少?",
"answer": "金额是84352440.9756360054"
},
{
"question": "桩基检测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "矿产压覆评估费用的金额是多少?",
"answer": "金额是0E-10"
}
]
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "新增项目名称的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "预制基础的合价是多少?",
"answer": "合价是40567.2639480000"
},
{
"question": "绝缘子串及金具安装的合价是多少?",
"answer": "合价是2897171.9878110001"
},
{
"question": "杆塔工程材料工地运输的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "基础防护的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "护坡、挡土墙及排洪沟土石方工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "新增项目名称的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "(1)拆除后能利用的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "地基处理的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "灌注桩基础的合价是多少?",
"answer": "合价是43466660.0544390008"
},
{
"question": "(1)拆除后能利用的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "悬垂绝缘子串及金具安装的合价是多少?",
"answer": "合价是1251465.0340440001"
},
{
"question": "护坡、挡土墙及排洪沟土石方工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "附件安装工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "导地线跨越架设的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "辅助工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "新增项目名称的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "绝缘子串及金具安装的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "护坡、挡土墙及排洪沟砌筑的合价是多少?",
"answer": "合价是709931.9013930000"
},
{
"question": "锚杆基础的合价是多少?",
"answer": "合价是15344967.9002950005"
},
{
"question": "建筑工程的合价是多少?",
"answer": "合价是25411.2790780000"
},
{
"question": "辅助工程的合价是多少?",
"answer": "合价是1046253.4135240000"
},
{
"question": "导地线跨越架设的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "电缆工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "输、送电线路试运的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "基础土石方工程的合价是多少?",
"answer": "合价是32872843180.7429008484"
},
{
"question": "基础永久性围堰的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "基础永久性围堰的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "混凝土及钢筋混凝土结构的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "输、送电线路试运的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "混合结构的合价是多少?",
"answer": "合价是16967.5193850000"
},
{
"question": "杆塔组立的合价是多少?",
"answer": "合价是2253906.0859830002"
},
{
"question": "附件安装工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "接地工程材料工地运输的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "新增项目名称的合价是多少?",
"answer": "合价是27148.0310160000"
},
{
"question": "导地线架设的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "护坡、挡土墙及排洪沟的合价是多少?",
"answer": "合价是709931.9013930000"
},
{
"question": "(1)拆除后能利用的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "基础永久性围堰砌筑的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "(2)拆除后不能利用的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "安装工程的合价是多少?",
"answer": "合价是65324.9496330000"
},
{
"question": "尖峰、施工基面土石方工程的合价是多少?",
"answer": "合价是325205.4178770000"
},
{
"question": "架线工程的合价是多少?",
"answer": "合价是4844399648.0778598785"
},
{
"question": "杆塔组立的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "架线工程材料工地运输的合价是多少?",
"answer": "合价是2088570123.2409000397"
},
{
"question": "导地线架设的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "耐张绝缘子串及金具安装的合价是多少?",
"answer": "合价是1645706.9537680000"
},
{
"question": "架线工程材料工地运输的合价是多少?",
"answer": "合价是2088570123.2409000397"
},
{
"question": "其他基础的合价是多少?",
"answer": "合价是3839666.7656879998"
},
{
"question": "架线工程材料工地运输的合价是多少?",
"answer": "合价是0E-10"
}
]
@@ -1,202 +0,0 @@
[
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是440877984.9458540082"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是1086586.9018659999"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是51486.7898090000"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是3321.8139230000"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是78005.0340730000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是3535892767.0972299576"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是24045.2334060000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是336253.7482950000"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是142270.1346780000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是61049.8665780000"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是933061.7795919999"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "的直接费是多少?",
"answer": "直接费是182949.5997350000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(余物清理)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是21220645.1637400016"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是933061.7795919999"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是2501470269.7231497765"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是51486.7898090000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是55265.9111100000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是442897633.6273120046"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "的直接费是多少?",
"answer": "直接费是1057484.3306960000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是442897633.6273120046"
},
{
"question": "的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是21220645.1637400016"
},
{
"question": "线路取费表(余物清理)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "的直接费是多少?",
"answer": "直接费是336253.7482950000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "的直接费是多少?",
"answer": "直接费是61049.8665780000"
},
{
"question": "线路取费表(余物清理)(1)的直接费是多少?",
"answer": "直接费是61049.8665780000"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是24045.2334060000"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(余物清理)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是659466.5955000001"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是2501470269.7231497765"
}
]
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "降阻剂_数量的属性值是多少?",
"answer": "属性值是f"
},
{
"question": "导线2_单位单价的属性值是多少?",
"answer": "属性值是9"
},
{
"question": "导线_单公里用量的属性值是多少?",
"answer": "属性值是36"
},
{
"question": "线路参数_导地线防震措施的属性值是多少?",
"answer": "属性值是457"
},
{
"question": "合成绝缘子_数量的属性值是多少?",
"answer": "属性值是5"
},
{
"question": "基础垫层的属性值是多少?",
"answer": "属性值是"
},
{
"question": "其中:基础护壁用量的属性值是多少?",
"answer": "属性值是74394.212"
},
{
"question": "铺石加混凝土的属性值是多少?",
"answer": "属性值是0.0"
},
{
"question": "导线用量(西北)的属性值是多少?",
"answer": "属性值是-795976.0855"
},
{
"question": "导线单公里用量(西北)的属性值是多少?",
"answer": "属性值是-159195.2171"
},
{
"question": "灰土垫层单公里用量(西北)的属性值是多少?",
"answer": "属性值是8.0"
},
{
"question": "地线瓷绝缘子单公里用量(西北)的属性值是多少?",
"answer": "属性值是738.253"
},
{
"question": "地形条件_高山的属性值是多少?",
"answer": "属性值是7"
},
{
"question": "流砂坑比例的属性值是多少?",
"answer": "属性值是0.001"
},
{
"question": "碎石_数量的属性值是多少?",
"answer": "属性值是12"
},
{
"question": "线路参数_导地线防震措施的属性值是多少?",
"answer": "属性值是457"
},
{
"question": "灰土垫层的属性值是多少?",
"answer": "属性值是40.0"
},
{
"question": "交叉跨越_弱电线路的属性值是多少?",
"answer": "属性值是45"
},
{
"question": "地线1_根数的属性值是多少?",
"answer": "属性值是12"
},
{
"question": "土质比例_岩石(人凿)的属性值是多少?",
"answer": "属性值是49"
},
{
"question": "耐张混凝土杆基数的属性值是多少?",
"answer": "属性值是26.0"
},
{
"question": "设计单位的属性值是多少?",
"answer": "属性值是3"
},
{
"question": "接地钢的属性值是多少?",
"answer": "属性值是"
},
{
"question": "间隔棒_单公里用量的属性值是多少?",
"answer": "属性值是r"
},
{
"question": "导线其中:跳线和导线弧垂单公里用量(西北)的属性值是多少?",
"answer": "属性值是159203.0171"
},
{
"question": "桩基础的属性值是多少?",
"answer": "属性值是310.0"
},
{
"question": "降阻剂的属性值是多少?",
"answer": "属性值是"
},
{
"question": "可抵扣增值税(万元)的属性值是多少?",
"answer": "属性值是2005241.808822"
},
{
"question": "主要技术经济指标2的属性值是多少?",
"answer": "属性值是"
},
{
"question": "合成绝缘子_数量的属性值是多少?",
"answer": "属性值是5"
},
{
"question": "土质比例_水坑的属性值是多少?",
"answer": "属性值是47"
},
{
"question": "基础_插入式的属性值是多少?",
"answer": "属性值是3"
},
{
"question": "耐张角钢塔比例的属性值是多少?",
"answer": "属性值是250%"
},
{
"question": "地线的属性值是多少?",
"answer": "属性值是"
},
{
"question": "回路数的属性值是多少?",
"answer": "属性值是三回"
},
{
"question": "导线其中:跳线和导线弧垂用量的属性值是多少?",
"answer": "属性值是796015.0855"
},
{
"question": "OPGW用量(西北)的属性值是多少?",
"answer": "属性值是2904.737"
},
{
"question": "现浇混凝土_单公里用量的属性值是多少?",
"answer": "属性值是22"
},
{
"question": "架线工程费用(万元)(含价差)的属性值是多少?",
"answer": "属性值是3203726.0"
},
{
"question": "耐张钢管塔比例的属性值是多少?",
"answer": "属性值是300%"
},
{
"question": "单公里土石方量_基面的属性值是多少?",
"answer": "属性值是8*8"
},
{
"question": "地线2的属性值是多少?",
"answer": "属性值是"
},
{
"question": "降阻剂的属性值是多少?",
"answer": "属性值是"
},
{
"question": "土质比例的属性值是多少?",
"answer": "属性值是"
},
{
"question": "地线1_单位单价的属性值是多少?",
"answer": "属性值是113"
},
{
"question": "绝缘子串型式_悬垂串的属性值是多少?",
"answer": "属性值是48"
},
{
"question": "基坑土石方量(西北)的属性值是多少?",
"answer": "属性值是405403506.156"
},
{
"question": "基坑坚土的属性值是多少?",
"answer": "属性值是25585167.713"
},
{
"question": "基坑普通土的属性值是多少?",
"answer": "属性值是313873965.334"
},
{
"question": "瓷绝缘子单公里用量(西北)的属性值是多少?",
"answer": "属性值是201.0"
}
]
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "电杆坑、塔坑、拉线坑人工挖方(或爆破)及回填 水坑 坑深2.0m以内的编码是多少?",
"answer": "编码是YX2-72"
},
{
"question": "钢筋加工及制作的编码是多少?",
"answer": "编码是YX3-43"
},
{
"question": "船舶运输 线材 每件重400kg以内 运输的编码是多少?",
"answer": "编码是YX1-132"
},
{
"question": "船舶运输 钢管塔材 运输的编码是多少?",
"answer": "编码是YX1-152"
},
{
"question": "碎石的编码是多少?",
"answer": "编码是C10020103"
},
{
"question": "混凝土(保护帽)的编码是多少?",
"answer": "编码是ZH1001"
},
{
"question": "船舶运输 金具、绝缘子、零星钢材 运输的编码是多少?",
"answer": "编码是YX1-144"
},
{
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
"answer": "编码是YX1-1"
},
{
"question": "船舶运输 线材 每件重1000kg以内 运输的编码是多少?",
"answer": "编码是YX1-136"
},
{
"question": "混凝土搅拌及浇制 每基基础联系梁混凝土量20m³以内的编码是多少?",
"answer": "编码是YX3-69"
},
{
"question": "索道运输 循环式 塔材 荷载1t以内 装卸的编码是多少?",
"answer": "编码是YX1-185"
},
{
"question": "人力运输 混凝土预制品 每件重100kg以内的编码是多少?",
"answer": "编码是YX1-6"
},
{
"question": "船舶运输 混凝土杆 每件重1500kg以上 运输的编码是多少?",
"answer": "编码是YX1-118"
},
{
"question": "碎石的编码是多少?",
"answer": "编码是C10020103"
},
{
"question": "电杆坑、塔坑、拉线坑人工挖方(或爆破)及回填 泥水 坑深8.0m以上的编码是多少?",
"answer": "编码是YX2-55"
},
{
"question": "机械施工土方 场地平整的编码是多少?",
"answer": "编码是GT1-1"
},
{
"question": "汽车运输 混凝土预制品 每件重100kg以内 装卸的编码是多少?",
"answer": "编码是YX1-69"
},
{
"question": "汽车运输 其他建筑安装材料 运输的编码是多少?",
"answer": "编码是YX1-108"
},
{
"question": "钻孔灌注桩基础 混凝土搅拌及浇制 孔深10m以内的编码是多少?",
"answer": "编码是YX3-171"
},
{
"question": "线路复测及分坑 直线双杆及拉线塔的编码是多少?",
"answer": "编码是YX2-3"
},
{
"question": "氧化锌避雷器安装 35kV的编码是多少?",
"answer": "编码是YX7-32"
},
{
"question": "混凝土(保护帽)的编码是多少?",
"answer": "编码是ZH1002"
},
{
"question": "汽车运输 其他建筑安装材料 装卸的编码是多少?",
"answer": "编码是YX1-107"
},
{
"question": "船舶运输 混凝土杆 每件重500kg以内 装卸的编码是多少?",
"answer": "编码是YX1-109"
},
{
"question": "混凝土(保护帽)的编码是多少?",
"answer": "编码是ZH1001"
},
{
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
"answer": "编码是YX1-1"
},
{
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
"answer": "编码是YX1-1"
},
{
"question": "普通硅酸盐水泥的编码是多少?",
"answer": "编码是C09010102"
},
{
"question": "拖拉机运输 钢管塔材 运输的编码是多少?",
"answer": "编码是YX1-44"
},
{
"question": "尖峰及施工基面挖方(或爆破) 普通土的编码是多少?",
"answer": "编码是YX2-226"
},
{
"question": "汽车运输 角钢塔材 装卸的编码是多少?",
"answer": "编码是YX1-103"
},
{
"question": "接地槽挖方(或爆破)及回填 普通土的编码是多少?",
"answer": "编码是YX2-213"
},
{
"question": "水的编码是多少?",
"answer": "编码是C21010101"
},
{
"question": "直线(直线换位、直线转角)杆塔绝缘子串悬挂安装 35kV 针式单联串(悬垂串)的编码是多少?",
"answer": "编码是YX6-21"
},
{
"question": "直线(直线换位、直线转角)杆塔绝缘子串悬挂安装 35kV I型双联串(悬垂串)的编码是多少?",
"answer": "编码是YX6-22"
},
{
"question": "钻孔灌注桩基础 机械推钻成孔 砂砾石 孔深20m以内 孔径1.0m以内的编码是多少?",
"answer": "编码是YX3-117"
},
{
"question": "线路复测及分坑 直线自立塔的编码是多少?",
"answer": "编码是YX2-6"
},
{
"question": "钻孔灌注桩基础 凿桩头 桩径0.8m以上的编码是多少?",
"answer": "编码是YX3-180"
},
{
"question": "线路复测及分坑 耐张(转角)单杆的编码是多少?",
"answer": "编码是YX2-2"
},
{
"question": "中砂的编码是多少?",
"answer": "编码是C10010101"
},
{
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
"answer": "编码是YX1-1"
},
{
"question": "带电跨越电力线 被跨线电压等级 35kV的编码是多少?",
"answer": "编码是YX5-186"
},
{
"question": "人工挖土方 普土 深2m以内的编码是多少?",
"answer": "编码是YT1-1"
},
{
"question": "混凝土杆的编码是多少?",
"answer": "编码是"
},
{
"question": "接地模块安装的编码是多少?",
"answer": "编码是YX3-213"
},
{
"question": "拖拉机运输 线材 每件重400kg以内 运输的编码是多少?",
"answer": "编码是YX1-34"
},
{
"question": "拖拉机运输 其他建筑安装材料 装卸的编码是多少?",
"answer": "编码是YX1-45"
},
{
"question": "普通硅酸盐水泥的编码是多少?",
"answer": "编码是C09010102"
},
{
"question": "船舶运输 线材 每件重4000kg以内 装卸的编码是多少?",
"answer": "编码是YX1-139"
},
{
"question": "水的编码是多少?",
"answer": "编码是C21010101"
}
]
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "架空输电线路本体工程的金额是多少?",
"answer": "金额是55105688268.5176010132"
},
{
"question": "价差预备费的金额是多少?",
"answer": "金额是22731130869.6655998230"
},
{
"question": "工程静态投资的金额是多少?",
"answer": "金额是715035853336.3909912109"
},
{
"question": "工程动态投资的金额是多少?",
"answer": "金额是776282009093.5660400391"
},
{
"question": "其中:工程建设检测费的金额是多少?",
"answer": "金额是185575370.1463980079"
},
{
"question": "工程静态投资的金额是多少?",
"answer": "金额是715035853336.3909912109"
},
{
"question": "建设期贷款利息的金额是多少?",
"answer": "金额是38515024887.5095977783"
},
{
"question": "特殊项目的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "动态费用的金额是多少?",
"answer": "金额是61246155757.1752014160"
},
{
"question": "动态费用的金额是多少?",
"answer": "金额是61246155757.1752014160"
},
{
"question": "小计的金额是多少?",
"answer": "金额是458257942570.3129882812"
},
{
"question": "其他费用的金额是多少?",
"answer": "金额是210942912572.8689880371"
},
{
"question": "基本预备费的金额是多少?",
"answer": "金额是14020310849.7332000732"
},
{
"question": "其中:水土保持监测及验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "其中:工程建设检测费的金额是多少?",
"answer": "金额是185575370.1463980079"
},
{
"question": "其中:特种设备安全监测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "工程静态投资的金额是多少?",
"answer": "金额是715035853336.3909912109"
},
{
"question": "其中:水土保持监测及验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "架空输电线路本体工程的金额是多少?",
"answer": "金额是55105688268.5176010132"
},
{
"question": "基本预备费的金额是多少?",
"answer": "金额是14020310849.7332000732"
},
{
"question": "其中:水土保持监测及验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "小计的金额是多少?",
"answer": "金额是458257942570.3129882812"
},
{
"question": "编制基准期价差的金额是多少?",
"answer": "金额是29246752707.1180000305"
},
{
"question": "其中:水土保持监测及验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "小计的金额是多少?",
"answer": "金额是458257942570.3129882812"
},
{
"question": "其他费用的金额是多少?",
"answer": "金额是210942912572.8689880371"
},
{
"question": "特殊项目的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "编制基准期价差的金额是多少?",
"answer": "金额是29246752707.1180000305"
},
{
"question": "特殊项目的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "小计的金额是多少?",
"answer": "金额是458257942570.3129882812"
},
{
"question": "工程动态投资的金额是多少?",
"answer": "金额是776282009093.5660400391"
},
{
"question": "其中:建设场地征用及清理费的金额是多少?",
"answer": "金额是16831284.2287110016"
},
{
"question": "其中:可抵扣增值税额的金额是多少?",
"answer": "金额是20069645492.2888984680"
},
{
"question": "小计的金额是多少?",
"answer": "金额是458257942570.3129882812"
},
{
"question": "动态费用的金额是多少?",
"answer": "金额是61246155757.1752014160"
},
{
"question": "建设期贷款利息的金额是多少?",
"answer": "金额是38515024887.5095977783"
},
{
"question": "工程静态投资的金额是多少?",
"answer": "金额是715035853336.3909912109"
},
{
"question": "其中:建设场地征用及清理费的金额是多少?",
"answer": "金额是16831284.2287110016"
},
{
"question": "建设期贷款利息的金额是多少?",
"answer": "金额是38515024887.5095977783"
},
{
"question": "工程动态投资的金额是多少?",
"answer": "金额是776282009093.5660400391"
},
{
"question": "架空输电线路本体工程的金额是多少?",
"answer": "金额是55105688268.5176010132"
},
{
"question": "其中:工程建设检测费的金额是多少?",
"answer": "金额是185575370.1463980079"
},
{
"question": "其中:水土保持监测及验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "工程动态投资的金额是多少?",
"answer": "金额是776282009093.5660400391"
},
{
"question": "其中:可抵扣增值税额的金额是多少?",
"answer": "金额是20069645492.2888984680"
},
{
"question": "价差预备费的金额是多少?",
"answer": "金额是22731130869.6655998230"
},
{
"question": "一般线路本体工程的金额是多少?",
"answer": "金额是55105688268.5176010132"
},
{
"question": "其中:工程建设检测费的金额是多少?",
"answer": "金额是185575370.1463980079"
},
{
"question": "基本预备费的金额是多少?",
"answer": "金额是14020310849.7332000732"
},
{
"question": "设备购置费的金额是多少?",
"answer": "金额是2567934636.3574500084"
}
]
View File
-19
View File
@@ -1,19 +0,0 @@
import chromadb
# 创建 ChromaDB 客户端
chroma_client = chromadb.PersistentClient(path="/home/bw/ctr/zjdataai-app/backend/storage_vector-1/")
# 获取已存在的 "default" 集合
collection = chroma_client.get_collection(name="default")
# 获取集合中的所有数据
results = collection.get(
include=['documents', 'metadatas', 'embeddings'] # 只包含允许的选项
)
# 将结果转换为字符串并保存到txt文件中
with open('/home/bw/ctr/zjdataai-app/backend/test1/query_results-1.txt', 'w', encoding='utf-8') as file:
file.write(str(results))
# 打印结果
print("查询结果已保存到 query_results.txt 文件中。")
-28
View File
@@ -1,28 +0,0 @@
[]: ?
: 9.0000000000
: 9
: ?
: 3.5700000000
:
: ?
: 15.0000000000
: 15
: ?
: 9.0000000000
: 9
: ?
: 3.5700000000
: 3
: ?
: 9.0000000000
: 9
: ?
: 3.5700000000
:
-1
View File
@@ -1 +0,0 @@
-12
View File
@@ -1,12 +0,0 @@
TOP_K: 5
LLM_TEMPERATURE: 0.1
similarity_top_k: 5.0
: ?
: SQL: 9.0%9.0%
: 9.0000000000
: ?
: SQL: "冬雨季施工增加费"
: 3.5700000000
View File
View File
-200
View File
@@ -1,200 +0,0 @@
import re
import os
import sys
import json
from sqlalchemy import create_engine
from llama_index.core import VectorStoreIndex, SQLDatabase
from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
from app.api.routers.chat import generate_filters
from app.engine import get_index, makeDescriptionByEngine
from app.engine.loaders.db import CustomDatabaseReader
from app.engine.vectordb import get_vector_store
from app.observability import init_observability
from app.settings import init_settings
from dotenv import load_dotenv
load_dotenv()
def read_questions_and_answers(file_path):
questions_and_answers = []
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file) # 读取 JSON 数据
for entry in data:
question = entry.get("question", "").strip() # 获取 question
answer = entry.get("answer", "").strip() # 直接获取 answer 而不是提取数字
if question and answer:
questions_and_answers.append((question, answer))
return questions_and_answers
def save_results_to_file(question, query_result, correct_answer, file_path):
# 保存原始查询结果
result_data = {
"问题": question,
"查询结果": str(query_result), # 保存原始查询结果
"正确答案": correct_answer
}
with open(file_path, 'a', encoding='utf-8') as file:
json.dump(result_data, file, ensure_ascii=False)
file.write('\n') # 每个结果条目之间添加换行符
def log_incorrect_answers(question, correct_answer, query_result, log_file_path):
# 保存原始查询结果
incorrect_data = {
"错误问题": question,
"正确答案": correct_answer,
"查询结果": str(query_result) # 保存原始查询结果
}
with open(log_file_path, 'a', encoding='utf-8') as file:
json.dump(incorrect_data, file, ensure_ascii=False)
file.write('\n') # 每个结果条目之间添加换行符
# 提取多个数字
def extract_all_numbers_from_result(result_str):
"""从查询结果字符串中提取所有数字"""
# 使用正则表达式匹配所有数值(包含小数和科学计数法)
numbers = re.findall(r"-?\d+,\d+(\.\d+)?|0E-\d+|\d+(\.\d+)?", result_str)
# 移除逗号并返回所有数字的列表
return [num.replace(',', '') for num in numbers]
# 判断两个浮点数是否接近
def is_close_enough(val1, val2, epsilon=1e-5):
"""判断两个数值是否在指定的误差范围内接近"""
return abs(val1 - val2) < epsilon
def is_answer_correct(query_result_str, correct_answer_str):
"""检查查询结果是否与正确答案匹配"""
# 提取查询结果中的数字或编码
query_result_value = extract_number_or_code_from_result(query_result_str)
# 提取正确答案中的数字或编码
correct_answer_value = extract_number_or_code_from_result(correct_answer_str)
# 对比提取的数字或编码
if query_result_value and correct_answer_value:
try:
# 移除逗号,并转换为浮点数
query_result_float = float(query_result_value.replace(',', ''))
correct_answer_float = float(correct_answer_value.replace(',', ''))
# 处理科学计数法中的零值
if query_result_float == 0.0 and correct_answer_float == 0.0:
return True
# 四舍五入处理到小数点后5位
rounded_query_result = round(query_result_float, 5)
rounded_correct_answer = round(correct_answer_float, 5)
# 比较四舍五入后的浮点数值
return rounded_query_result == rounded_correct_answer
except ValueError:
# 如果无法转换为浮点数,则直接比较字符串
return query_result_value == correct_answer_value
return False # 如果任何一方为空,则认为不匹配
def extract_number_or_code_from_result(result_str):
"""从查询结果字符串中提取数字或编码,并处理逗号、百分号和科学计数法"""
# 使用正则表达式匹配浮点数,包括可能的多位小数、逗号、百分比形式和科学计数法
match = re.search(r"(\d{1,3}(,\d{3})*(\.\d+)?|0E-\d+)", result_str)
if match:
number_str = match.group(1).replace(',', '').replace('%', '') # 移除逗号和百分号
return number_str
# 尝试从结果中提取所有可能的编码格式
potential_codes = re.findall(r"\b[A-Z][A-Za-z\d-]+\b", result_str)
# 返回第一个匹配的编码
return potential_codes[0] if potential_codes else None
def main(questions_file, query_type):
# 获取脚本所在的目录
script_dir = os.path.dirname(os.path.abspath(__file__))
# 将文件扩展名更改为 .json
questions_file_path = os.path.join(script_dir, questions_file)
results_file_path = os.path.join(script_dir, "query_results.json")
log_file_path = os.path.join(script_dir, "incorrect_answers_log.json")
# 如果 .json 文件不存在,则生成一个空的 JSON 文件
if not os.path.exists(questions_file_path):
with open(questions_file_path, 'w', encoding='utf-8') as file:
json.dump([], file) # 写入空数组
# 更新环境变量
os.environ['TOP_K'] = str(5) # 向量的TOP_K值
os.environ['LLM_TEMPERATURE'] = str(0.1) # 温度值
os.environ['similarity_top_k'] = str(5) # SQL的TOP_K值
init_settings()
init_observability()
index = get_index()
top_k = int(os.getenv("TOP_K")) # 向量的TOP_K值
temperature = float(os.getenv("LLM_TEMPERATURE")) # 温度值
similarity_top_k = int(os.getenv("similarity_top_k")) # SQL的TOP_K值
filters = generate_filters([])
engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
sql_database = SQLDatabase(engine)
table_schema_objs = makeDescriptionByEngine(sql_database)
table_node_mapping = SQLTableNodeMapping(sql_database)
# 创建SQL查询工具
sql_obj_index = ObjectIndex.from_objects(
table_schema_objs,
table_node_mapping,
index_cls=VectorStoreIndex,
)
sql_query_engine = SQLTableRetrieverQueryEngine(sql_database,
sql_obj_index.as_retriever(similarity_top_k=similarity_top_k))
questions_and_answers = read_questions_and_answers(questions_file_path)
# 如果文件为空,则写入参数值
if os.path.getsize(results_file_path) == 0:
with open(results_file_path, 'w', encoding='utf-8') as file:
json.dump({
"TOP_K": top_k,
"LLM_TEMPERATURE": temperature,
"similarity_top_k": similarity_top_k
}, file, ensure_ascii=False)
file.write('\n')
# 循环执行查询
for i, (question, correct_answer) in enumerate(questions_and_answers):
print(f"执行查询 {i+1}: {question}")
if query_type == "vector":
query_engine = index.as_query_engine(
similarity_top_k=top_k, filters=filters
)
query_result = query_engine.query(question)
print(f"向量查询结果: {query_result}\n")
# 提取向量查询结果中的数字或编码进行匹配
query_result_str = f"The encoding for the query \"{question}\" is {str(query_result)}"
elif query_type == "sql":
sql_query_result = sql_query_engine.query(question)
print(f"SQL查询结果: {sql_query_result}\n")
# 提取SQL查询结果中的数字或编码进行匹配
query_result_str = f"The encoding for the query \"{question}\" is {str(sql_query_result)}"
else:
print("无效的查询类型,请选择 'vector''sql'")
sys.exit(1)
if is_answer_correct(query_result_str, correct_answer):
# 只在查询结果正确时记录结果
save_results_to_file(question, query_result_str, correct_answer, results_file_path)
else:
# 记录不正确的答案
log_incorrect_answers(question, correct_answer, query_result_str, log_file_path)
if __name__ == "__main__":
if len(sys.argv) < 3:
print("请提供questions.json文件名和查询类型(vector 或 sql)")
sys.exit(1)
questions_file = sys.argv[1]
query_type = sys.argv[2].lower()
main(questions_file, query_type)
-82
View File
@@ -1,82 +0,0 @@
import os
import json
from sqlalchemy import create_engine, MetaData, Table, func
from sqlalchemy.orm import sessionmaker
from dotenv import load_dotenv
load_dotenv()
def generate_questions_for_table(table_name, file_path, num_questions=50):
engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
metadata = MetaData()
metadata.reflect(bind=engine)
# 定义每张表的列索引、值列和问题模板
tables_info = {
"ProjectProperties": (0, "Value", "{name_value}的属性值是多少?", "属性值是{answer_value}"),
"OtherFee": (3, "Amount", "{name_value}的金额是多少?", "金额是{answer_value}"),
"FeeCollectionTable": (1, "Rate", "{name_value}的费率是多少?", "费率是{answer_value}"),
"ProjectDivision": (5, "Sum_Price", "{name_value}的合价是多少?", "合价是{answer_value}"),
"ProjectDivisions_CostPreview": (4, "Direct_Cost", "{name_value}的直接费是多少?", "直接费是{answer_value}"),
"TotalCalculateTable": (3, "Amount", "{name_value}的金额是多少?", "金额是{answer_value}"),
"ProjectQuantities": (6, "Encoding", "{name_value}的编码是多少?", "编码是{answer_value}")
}
if table_name not in tables_info:
print(f"未找到表 {table_name} 的配置信息")
return
# 获取表信息
name_index, value_column, question_template, answer_template = tables_info[table_name]
# 加载表
table = Table(table_name, metadata, autoload_with=engine)
# 创建会话
Session = sessionmaker(bind=engine)
session = Session()
# 获取列名
name_column = table.columns.keys()[name_index]
questions_and_answers = []
# 生成指定数量的问题
for _ in range(num_questions):
# 查询表中的随机一行,并获取名称列和值列的值
row = session.query(table).order_by(func.random()).first()
name_value = getattr(row, name_column)
answer_value = getattr(row, value_column)
# 构造问题和答案
question = question_template.format(name_value=name_value)
answer = answer_template.format(answer_value=answer_value)
# 添加到列表中
questions_and_answers.append({
"question": question,
"answer": answer
})
# 将问题和答案以 JSON 格式写入文件
with open(file_path, 'w', encoding='utf-8') as file:
json.dump(questions_and_answers, file, ensure_ascii=False, indent=4)
print(f"已生成表 {table_name} 的问题到文件: {file_path}")
def main():
engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
metadata = MetaData()
metadata.reflect(bind=engine)
# 获取脚本所在目录
script_dir = os.path.dirname(os.path.abspath(__file__))
# 遍历每张表并生成对应的 JSON 文件
for table_name in metadata.tables.keys():
# 文件路径为:脚本目录 + 表名 + .json
file_path = os.path.join(script_dir, f"{table_name}.json")
generate_questions_for_table(table_name, file_path)
if __name__ == "__main__":
main()
-14
View File
@@ -1,14 +0,0 @@
question:线路参数_转角次数的属性值是多少? answer:线路参数_转角次数的属性值是64
question:接地土石方量的属性值是多少? answer:接地土石方量的属性值是16
question:工程监理费的金额是多少? answer:工程监理费的金额是131009.92
question:矿产压覆评估费用的金额是多少? answer:矿产压覆评估费用的金额是0
question:线路取费表(余物清理)的费率是多少? answer:线路取费表(余物清理)的费率是100
question:线路取费表(拆除)的费率是多少? answer:线路取费表(拆除)的费率是100
question:一般线路本体工程的合价是多少? answer:一般线路本体工程的合价是55105688268.5176
question:基础工程的合价是多少? answer:基础工程的合价是49051649642.9667
question:线路取费表(调试工程)aa的直接费是多少? answer:线路取费表(调试工程)aa的直接费是22411207942.4858
question:线路取费表的直接费是多少? answer:线路取费表的直接费是7314300665.34141
question:一般线路本体工程的金额是多少? answer:一般线路本体工程的金额是55105688268.5176
question:架空输电线路本体工程的金额是多少? answer:架空输电线路本体工程的金额是55105688268.5176
question:截止阀的编码是多少? answer:截止阀的编码是F01010101
question:自定义主材的编码是多少? answer:自定义主材的编码是asd
-110
View File
@@ -1,110 +0,0 @@
import os
import json
import sys
from llama_index.core import VectorStoreIndex, SQLDatabase
from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
from sqlalchemy import create_engine
from app.api.routers.chat import generate_filters
from app.engine import get_index, makeDescriptionByEngine
from app.engine.vectordb import get_vector_store
from app.observability import init_observability
from app.settings import init_settings
from dotenv import load_dotenv
load_dotenv()
def read_questions(file_path):
with open(file_path, 'r', encoding='utf-8') as file:
data = json.load(file)
questions = [item["question"] for item in data]
return questions
def save_results_to_file(question, result, file_path):
result_data = {
"question": question,
"result": result
}
with open(file_path, 'a', encoding='utf-8') as file:
json.dump(result_data, file, ensure_ascii=False)
file.write('\n')
def main(questions_file, query_type):
# 更新环境变量
os.environ['TOP_K'] = str(5) # 向量的TOP_K值
os.environ['similarity_top_k'] = str(1) # SQL的TOP_K值固定为1
init_settings()
init_observability()
index = get_index()
top_k = int(os.getenv("TOP_K")) # 向量的TOP_K值
similarity_top_k = int(os.getenv("similarity_top_k")) # SQL的TOP_K值
filters = generate_filters([])
engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
sql_database = SQLDatabase(engine)
table_schema_objs = makeDescriptionByEngine(sql_database)
table_node_mapping = SQLTableNodeMapping(sql_database)
# 创建SQL查询工具
sql_obj_index = ObjectIndex.from_objects(
table_schema_objs,
table_node_mapping,
index_cls=VectorStoreIndex,
)
sql_query_engine = SQLTableRetrieverQueryEngine(sql_database,
sql_obj_index.as_retriever(similarity_top_k=similarity_top_k))
script_dir = os.path.dirname(os.path.abspath(__file__))
questions_file_path = os.path.join(script_dir, questions_file)
results_file_path = os.path.join(script_dir, "parameters_results.json")
questions = read_questions(questions_file_path)
# # 如果文件为空,则写入参数值
# if not os.path.isfile(results_file_path):
# with open(results_file_path, 'w', encoding='utf-8') as file:
# json.dump({
# "TOP_K": top_k,
# "similarity_top_k": similarity_top_k
# }, file, ensure_ascii=False)
# file.write('\n')
# 循环执行查询
for i, question in enumerate(questions):
print(f"Executing query {i+1}: {question}")
# 对于每个问题,测试不同的温度值
for temperature in range(1, 11): # 从1到10
temperature_value = temperature / 10.0 # 从0.1到1.0
os.environ['LLM_TEMPERATURE'] = str(temperature_value)
if query_type == "vector":
query_engine = index.as_query_engine(
similarity_top_k=top_k, filters=filters
)
query_result = query_engine.query(question)
print(f"Vector Query Result: {query_result}\n")
save_results_to_file(question, f"Current parameters: TOP_K={top_k}, similarity_top_k={similarity_top_k}, Temperature: {temperature_value:.1f}, Vector Query Result: {query_result}", results_file_path)
elif query_type == "sql":
sql_query_result = sql_query_engine.query(question)
print(f"SQL Query Result: {sql_query_result}\n")
save_results_to_file(question, f"Current parameters: TOP_K={top_k}, similarity_top_k={similarity_top_k}, Temperature: {temperature_value:.1f}, SQL Query Result: {sql_query_result}", results_file_path)
else:
print("无效的查询类型,请选择 'vector''sql'")
sys.exit(1)
if __name__ == "__main__":
if len(sys.argv) < 3:
print("请提供questions.json文件的路径和查询类型(vector 或 sql)")
sys.exit(1)
questions_file = sys.argv[1]
query_type = sys.argv[2].lower()
from phoenix.trace import using_project
with using_project(questions_file) as obj:
main(questions_file, query_type)
+3 -1
View File
@@ -19,7 +19,9 @@ def main():
init_settings()
init_observability()
index = get_index()
indexs = get_index()
if len(indexs) > 0:
index = list(indexs.values())[0]
top_k = 5
filters = generate_filters([])
-10
View File
@@ -1,10 +0,0 @@
# The backend API for chat endpoint.
#NEXT_PUBLIC_CHAT_API=http://localhost:8000/api/chat
NEXT_PUBLIC_CHAT_API=http://10.1.6.41:8000/api/chat
#PHOENIX_SERVER_URL=http://localhost:6006/
PHOENIX_SERVER_URL=http://10.1.6.41:6006/
# Let's the user change indexes in LlamaCloud projects
NEXT_PUBLIC_USE_LLAMACLOUD=false
+1 -1
View File
@@ -4,7 +4,7 @@ const phoenixUrl = process.env.PHOENIX_SERVER_URL;
export default function Header() {
return (
<div className="z-10 w-full items-center justify-between font-mono text-sm lg:flex">
<div className="z-10 max-w-5xl w-full items-center justify-between font-mono text-sm lg:flex">
<p className="fixed left-0 top-0 flex w-full justify-center border-b border-gray-300 bg-gradient-to-b from-zinc-200 pb-6 pt-8 backdrop-blur-2xl dark:border-neutral-800 dark:bg-zinc-800/30 dark:from-inherit lg:static lg:w-auto lg:rounded-xl lg:border lg:bg-gray-200 lg:p-4 lg:dark:bg-zinc-800/30">
<code className="font-mono font-bold"><a href="javascript:location.reload();"></a></code>
</p>
@@ -99,8 +99,9 @@ export default function ChatInput(
</div>
)}
<div className="flex w-full items-start justify-between gap-4 ">
<Input
<textarea
autoFocus
rows={2}
name="message"
placeholder="请输入消息"
className="flex-1"
@@ -127,26 +127,9 @@ function NodeInfo({ nodeInfo }: { nodeInfo: NodeInfo }) {
}
// node generated by unknown loader, implement renderer by analyzing logged out metadata
// return (
// <p>
// 对不起, 未知文件类型. 无法打开当前的来源文件。
// </p>
// );
return (
<div className="flex items-center my-2">
<span>{nodeInfo.text}</span>
<Button
onClick={() => copyToClipboard(nodeInfo.url!)}
size="icon"
variant="ghost"
className="h-12 w-12 shrink-0"
>
{isCopied ? (
<Check className="h-4 w-4" />
) : (
<Copy className="h-4 w-4" />
)}
</Button>
</div>
<p>
, .
</p>
);
}
@@ -10,7 +10,7 @@ export interface ChatHandler {
data?: any;
},
) => void;
handleInputChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
handleInputChange: (e: React.ChangeEvent<HTMLTextAreaElement>) => void;
reload?: () => void;
stop?: () => void;
onFileUpload?: (file: File) => Promise<void>;
-16885
View File
File diff suppressed because it is too large Load Diff
-3
View File
@@ -1,3 +0,0 @@
ENV_PHOENIX_HOST=0.0.0.0
ENV_PHOENIX_PORT=6006
PHOENIX_HOST_ROOT_PATH=./.phoenix/
+1 -1
View File
@@ -2,4 +2,4 @@ SET ENV_PHOENIX_HOST=0.0.0.0
SET ENV_PHOENIX_PORT=6006
SET PHOENIX_HOST_ROOT_PATH=./.phoenix/
python phoenixserver.py
C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-pCyqx0Uo-py3.11\Scripts\python phoenixserver.py