diff --git a/backend/app/api/routers/events.py b/backend/app/api/routers/events.py index 94cc585..a1d2ea8 100644 --- a/backend/app/api/routers/events.py +++ b/backend/app/api/routers/events.py @@ -20,9 +20,9 @@ class CallbackEvent(BaseModel): if self.payload: nodes = self.payload.get("nodes") if nodes: - msg = f"Retrieved {len(nodes)} sources to use as context for the query" + msg = f"根据查询检索到 {len(nodes)} 源文件" else: - msg = f"Retrieving context for query: '{self.payload.get('query_str')}'" + msg = f"查询检索中: '{self.payload.get('query_str')}'" return { "type": "events", "data": {"title": msg}, @@ -37,7 +37,7 @@ class CallbackEvent(BaseModel): return { "type": "events", "data": { - "title": f"Calling tool: {tool.name} with inputs: {func_call_args}", + "title": f"调用工具 {tool.name} ,参数: {func_call_args}", }, } @@ -87,7 +87,7 @@ class CallbackEvent(BaseModel): case _: return None except Exception as e: - logger.error(f"Error in converting event to response: {e}") + logger.error(f"转换回应时间时发生错误,原因: {e}") return None diff --git a/backend/app/api/routers/models.py b/backend/app/api/routers/models.py index c9ea1ad..17a86b5 100644 --- a/backend/app/api/routers/models.py +++ b/backend/app/api/routers/models.py @@ -173,12 +173,12 @@ class SourceNodes(BaseModel): def from_source_node(cls, source_node: NodeWithScore): metadata = source_node.node.metadata url = cls.get_url_from_metadata(metadata) - + text = 'filename' in metadata and metadata['filename'] or source_node.node.node_id return cls( id=source_node.node.node_id, metadata=metadata, score=source_node.score, - text=source_node.node.text, # type: ignore + text=text, # type: ignore url=url, ) diff --git a/backend/app/engine/__init__.py b/backend/app/engine/__init__.py index fb8d410..def5e51 100644 --- a/backend/app/engine/__init__.py +++ b/backend/app/engine/__init__.py @@ -1,24 +1,67 @@ import os + +from llama_index.core import SQLDatabase, SummaryIndex, VectorStoreIndex +from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine +from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex from llama_index.core.settings import Settings -from llama_index.core.agent import AgentRunner +from llama_index.core.agent import AgentRunner, StructuredPlannerAgent, FunctionCallingAgentWorker from llama_index.core.tools.query_engine import QueryEngineTool +from sqlalchemy import create_engine, Engine + +from app.engine.loaders.db import makeDescriptionByEngine from app.engine.tools import ToolFactory from app.engine.index import get_index +sql_database = None +sql_obj_index = None def get_chat_engine(filters=None, params=None): system_prompt = os.getenv("SYSTEM_PROMPT") - top_k = os.getenv("TOP_K", "3") + top_k = int(os.getenv("TOP_K", "3")) tools = [] + global sql_obj_index + global sql_database + if sql_obj_index is None: + sqlengine = create_engine(os.getenv("SQL_DATABASE_URL", "")) + sql_database = SQLDatabase(sqlengine) + table_schema_objs = makeDescriptionByEngine(sql_database) + table_node_mapping = SQLTableNodeMapping(sql_database) + + sql_obj_index = ObjectIndex.from_objects( + table_schema_objs, + table_node_mapping, + index_cls=VectorStoreIndex, + ) + + # 创建SQL查询工具 + sql_query_engine = SQLTableRetrieverQueryEngine(sql_database, + sql_obj_index.as_retriever(similarity_top_k=top_k), + verbose=True,) + sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine, + name="zjdata_query_tool", + description="来源于一个由博微公司电力造价软件编制的造价工程文件。该文件以多张表格的形式存储存储了整个工程的全部数据内容。适用于以详细的自然语言查询表格数据方式查询造价工程各项具体属性、费用的数值。请先使用“zj_query_tool”无法解决才使用本工具") + # Add query tool if index exists index = get_index() if index is not None: + summary_index = SummaryIndex(index.vector_store.get_nodes(node_ids=None)) + summary_query_engine = summary_index.as_query_engine() + summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool", + description="适用于任何需要进行全面总结、概括的要求。", + #description="适用于任何需要对所有内容进行全面总结的请求。有关电力造价领域更具体部分的问题,请使用zj_query_engine_tool", + ) + + # 创建向量检索查询工具 query_engine = index.as_query_engine( - similarity_top_k=int(top_k), filters=filters + similarity_top_k=top_k, filters=filters ) - query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine) + query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool", + description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后建议使用“zjdata_query_tool”工具。", + ) + tools.append(summary_query_tool) tools.append(query_engine_tool) + #tools.append(sql_query_tool) # Add additional tools tools += ToolFactory.from_env() @@ -29,3 +72,10 @@ def get_chat_engine(filters=None, params=None): system_prompt=system_prompt, verbose=True, ) + # create the function calling worker for reasoning + # worker = FunctionCallingAgentWorker.from_tools( + # tools, verbose=True + # ) + # + # # wrap the worker in the top-level planner + # return StructuredPlannerAgent(worker, tools) diff --git a/backend/app/engine/constants.py b/backend/app/engine/constants.py new file mode 100644 index 0000000..bd93bb8 --- /dev/null +++ b/backend/app/engine/constants.py @@ -0,0 +1 @@ +STORAGE_DIR = "storage" # directory to cache the generated index \ No newline at end of file diff --git a/backend/app/engine/generate.py b/backend/app/engine/generate.py index 8bcf606..115c175 100644 --- a/backend/app/engine/generate.py +++ b/backend/app/engine/generate.py @@ -2,50 +2,84 @@ from dotenv import load_dotenv load_dotenv() -import os import logging -from app.settings import init_settings -from app.engine.loaders import get_documents -from llama_index.indices.managed.llama_cloud import LlamaCloudIndex +import os +from app.engine.loaders import get_documents +from app.engine.vectordb import get_vector_store +from app.settings import init_settings +from llama_index.core.ingestion import IngestionPipeline +from llama_index.core.node_parser import SentenceSplitter +from llama_index.core.settings import Settings +from llama_index.core.storage import StorageContext +from llama_index.core.storage.docstore import SimpleDocumentStore logging.basicConfig(level=logging.INFO) logger = logging.getLogger() +STORAGE_DIR = os.getenv("STORAGE_DIR", "storage") + + +def get_doc_store(): + + # If the storage directory is there, load the document store from it. + # If not, set up an in-memory document store since we can't load from a directory that doesn't exist. + if os.path.exists(STORAGE_DIR): + return SimpleDocumentStore.from_persist_dir(STORAGE_DIR) + else: + return SimpleDocumentStore() + + +def run_pipeline(docstore, vector_store, documents): + pipeline = IngestionPipeline( + transformations=[ + SentenceSplitter( + chunk_size=Settings.chunk_size, + chunk_overlap=Settings.chunk_overlap, + ), + Settings.embed_model, + ], + docstore=docstore, + docstore_strategy="upserts_and_delete", + vector_store=vector_store, + ) + + # Run the ingestion pipeline and store the results + nodes = pipeline.run(show_progress=True, documents=documents) + + return nodes + + +def persist_storage(docstore, vector_store): + storage_context = StorageContext.from_defaults( + docstore=docstore, + vector_store=vector_store, + ) + storage_context.persist(STORAGE_DIR) + def generate_datasource(): init_settings() logger.info("Generate index for the provided data") - name = os.getenv("LLAMA_CLOUD_INDEX_NAME") - project_name = os.getenv("LLAMA_CLOUD_PROJECT_NAME") - api_key = os.getenv("LLAMA_CLOUD_API_KEY") - base_url = os.getenv("LLAMA_CLOUD_BASE_URL") - organization_id = os.getenv("LLAMA_CLOUD_ORGANIZATION_ID") - - if name is None or project_name is None or api_key is None: - raise ValueError( - "Please set LLAMA_CLOUD_INDEX_NAME, LLAMA_CLOUD_PROJECT_NAME and LLAMA_CLOUD_API_KEY" - " to your environment variables or config them in .env file" - ) - + # Get the stores and documents or create new ones documents = get_documents() - # Set private=false to mark the document as public (required for filtering) for doc in documents: doc.metadata["private"] = "false" + docstore = get_doc_store() + vector_store = get_vector_store() - LlamaCloudIndex.from_documents( - documents=documents, - name=name, - project_name=project_name, - api_key=api_key, - base_url=base_url, - organization_id=organization_id - ) + # Run the ingestion pipeline + _ = run_pipeline(docstore, vector_store, documents) + + # Build the index and persist storage + persist_storage(docstore, vector_store) logger.info("Finished generating the index") if __name__ == "__main__": - generate_datasource() + from phoenix.trace import using_project + with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj: + generate_datasource() diff --git a/backend/app/engine/index.py b/backend/app/engine/index.py index e54e8ca..b21e695 100644 --- a/backend/app/engine/index.py +++ b/backend/app/engine/index.py @@ -1,31 +1,22 @@ import logging -import os -from llama_index.indices.managed.llama_cloud import LlamaCloudIndex +from llama_index.core.indices import VectorStoreIndex +from app.engine.vectordb import get_vector_store logger = logging.getLogger("uvicorn") +index = None + def get_index(params=None): - configParams = params or {} - pipelineConfig = configParams.get("llamaCloudPipeline", {}) - name = pipelineConfig.get("pipeline", os.getenv("LLAMA_CLOUD_INDEX_NAME")) - project_name = pipelineConfig.get("project", os.getenv("LLAMA_CLOUD_PROJECT_NAME")) - api_key = os.getenv("LLAMA_CLOUD_API_KEY") - base_url = os.getenv("LLAMA_CLOUD_BASE_URL") - organization_id = os.getenv("LLAMA_CLOUD_ORGANIZATION_ID") + global index + if index is None: + logger.info("Connecting vector store...") - if name is None or project_name is None or api_key is None: - raise ValueError( - "Please set LLAMA_CLOUD_INDEX_NAME, LLAMA_CLOUD_PROJECT_NAME and LLAMA_CLOUD_API_KEY" - " to your environment variables or config them in .env file" - ) - - index = LlamaCloudIndex( - name=name, - project_name=project_name, - api_key=api_key, - base_url=base_url, - organization_id=organization_id - ) + store = get_vector_store() + # Load the index from the vector store + # If you are using a vector store that doesn't store text, + # you must load the index from both the vector store and the document store + index = VectorStoreIndex.from_vector_store(store) + logger.info("Finished load index from vector store.") return index diff --git a/backend/app/engine/loaders/__init__.py b/backend/app/engine/loaders/__init__.py index 4a278a4..a220170 100644 --- a/backend/app/engine/loaders/__init__.py +++ b/backend/app/engine/loaders/__init__.py @@ -17,19 +17,22 @@ def load_configs(): def get_documents(): documents = [] config = load_configs() + if config is None or len(config.items()) == 0: + return documents + for loader_type, loader_config in config.items(): logger.info( f"Loading documents from loader: {loader_type}, config: {loader_config}" ) + + loader_config = loader_config or [] match loader_type: case "file": document = get_file_documents(FileLoaderConfig(**loader_config)) case "web": document = get_web_documents(WebLoaderConfig(**loader_config)) case "db": - document = get_db_documents( - configs=[DBLoaderConfig(**cfg) for cfg in loader_config] - ) + document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config]) case _: raise ValueError(f"Invalid loader type: {loader_type}") documents.extend(document) diff --git a/backend/app/engine/loaders/db.py b/backend/app/engine/loaders/db.py index d5c9ffd..69d3279 100644 --- a/backend/app/engine/loaders/db.py +++ b/backend/app/engine/loaders/db.py @@ -1,26 +1,187 @@ import os import logging from typing import List +from typing import Any, List, Optional + +from llama_index.core.readers.base import BaseReader +from llama_index.core.schema import Document +from llama_index.core.utilities.sql_wrapper import SQLDatabase +from sqlalchemy import text +from sqlalchemy.engine import Engine +from llama_index.core import SQLDatabase, Document +from llama_index.core.objects import SQLTableSchema, SQLTableNodeMapping +from llama_index.core.readers.base import BaseReader +from llama_index.readers.database import DatabaseReader from pydantic import BaseModel, validator from llama_index.core.indices.vector_store import VectorStoreIndex +from sqlalchemy import create_engine logger = logging.getLogger(__name__) +class CustomDatabaseReader(BaseReader): + """Simple Database reader. + + Concatenates each row into Document used by LlamaIndex. + + Args: + sql_database (Optional[SQLDatabase]): SQL database to use, + including table names to specify. + See :ref:`Ref-Struct-Store` for more details. + + OR + + engine (Optional[Engine]): SQLAlchemy Engine object of the database connection. + + OR + + uri (Optional[str]): uri of the database connection. + + OR + + scheme (Optional[str]): scheme of the database connection. + host (Optional[str]): host of the database connection. + port (Optional[int]): port of the database connection. + user (Optional[str]): user of the database connection. + password (Optional[str]): password of the database connection. + dbname (Optional[str]): dbname of the database connection. + + Returns: + DatabaseReader: A DatabaseReader object. + """ + + def __init__( + self, + sql_database: Optional[SQLDatabase] = None, + engine: Optional[Engine] = None, + uri: Optional[str] = None, + scheme: Optional[str] = None, + host: Optional[str] = None, + port: Optional[str] = None, + user: Optional[str] = None, + password: Optional[str] = None, + dbname: Optional[str] = None, + *args: Any, + **kwargs: Any, + ) -> None: + """Initialize with parameters.""" + if sql_database: + self.sql_database = sql_database + elif engine: + self.sql_database = SQLDatabase(engine, *args, **kwargs) + elif uri: + self.uri = uri + self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs) + elif scheme and host and port and user and password and dbname: + uri = f"{scheme}://{user}:{password}@{host}:{port}/{dbname}" + self.uri = uri + self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs) + else: + raise ValueError( + "You must provide either a SQLDatabase, " + "a SQL Alchemy Engine, a valid connection URI, or a valid " + "set of credentials." + ) + + def load_data(self, query: str) -> List[Document]: + """Query and load data from the Database, returning a list of Documents. + + Args: + query (str): Query parameter to filter tables and rows. + + Returns: + List[Document]: A list of Document objects. + """ + dco_str = "" + with self.sql_database.engine.connect() as connection: + if query is None: + raise ValueError("A query parameter is necessary to filter the data") + else: + result = connection.execute(text(query)) + + dco_str = ", ".join( + [f"{entry}" for entry in result.keys()] + ) + + for item in result.fetchall(): + # fetch each item + record_str = ", ".join( + [f"{entry}" for col, entry in zip(result.keys(), item)] + ) + dco_str += record_str + "\n" + + doc = Document(text=dco_str) + doc.metadata["name"] = query + doc.metadata["context"] = query + doc.metadata["file_type"] = "application/vnd.ms-excel" + return [doc] class DBLoaderConfig(BaseModel): uri: str queries: List[str] +def makeDescriptionByEngine(sql_database:SQLDatabase): + reader = DatabaseReader(sql_database) + + table_names = sql_database.get_usable_table_names() + table_schema_objs = [] + for table_name in table_names: + columns = sql_database.get_table_columns(table_name) + if len(columns) > 150: + continue + stats_txt = "" + + if table_name == 'gongchengshuxing': + stats_txt = '该表中有以下属性:' + documents = reader.load_data(query='select name from gongchengshuxing') + for index in range(len(documents) if len(documents) < 30 else 30): + if index == 0: + continue + elif index > 1: + stats_txt += ',' + stats_txt += documents[index].text.split(':')[1] + + tbSchema = (SQLTableSchema(table_name=table_name, context_str=stats_txt)) + table_schema_objs.append(tbSchema) + + return table_schema_objs def get_db_documents(configs: list[DBLoaderConfig]): - from llama_index.readers.database import DatabaseReader - docs = [] + + if len(configs) == 0 or configs[0].uri == "": + logger.warning( + f"Failed to load database, error message: uri is empty. Return as empty document list." + ) + return docs + + metadata = { + #'file_name':'', + 'file_type':'application/booway.document.zj', + #'file_path':'', + #'file_size':'', + #'creation_date':'', + #'last_modified_date':'', + } + + #from llama_index.readers.database import DatabaseReader for entry in configs: - loader = DatabaseReader(uri=entry.uri) - for query in entry.queries: + engine = create_engine(entry.uri) + sql_database = SQLDatabase(engine) + + table_schema_objs = makeDescriptionByEngine(sql_database) + table_node_mapping = SQLTableNodeMapping(sql_database) + + nodes = table_node_mapping.to_nodes(table_schema_objs) + for node in nodes: + node.metadata.update(metadata) + + docs.extend(nodes) + + queries = entry.queries or [] + loader = CustomDatabaseReader(sql_database) + for query in queries: logger.info(f"Loading data from database with query: {query}") documents = loader.load_data(query=query) - docs.extend(documents) - return documents + docs.extend(documents) + return docs diff --git a/backend/app/engine/loaders/file.py b/backend/app/engine/loaders/file.py index 4dea4f8..1db99ce 100644 --- a/backend/app/engine/loaders/file.py +++ b/backend/app/engine/loaders/file.py @@ -1,6 +1,9 @@ import os import logging from typing import Dict + +from llama_index.core.readers.base import BaseReader +from llama_index.core.readers.json import JSONReader from llama_parse import LlamaParse from pydantic import BaseModel, validator @@ -39,6 +42,9 @@ def llama_parse_extractor() -> Dict[str, LlamaParse]: parser = llama_parse_parser() return {file_type: parser for file_type in SUPPORTED_FILE_TYPES} +def llama_local_extractor() -> Dict[str, BaseReader]: + return {"json" : JSONReader} + def get_file_documents(config: FileLoaderConfig): from llama_index.core.readers import SimpleDirectoryReader @@ -53,6 +59,9 @@ def get_file_documents(config: FileLoaderConfig): nest_asyncio.apply() file_extractor = llama_parse_extractor() + else: + file_extractor = llama_local_extractor() + reader = SimpleDirectoryReader( config.data_dir, recursive=True, diff --git a/backend/app/engine/loaders/web.py b/backend/app/engine/loaders/web.py index 563e51b..e667a69 100644 --- a/backend/app/engine/loaders/web.py +++ b/backend/app/engine/loaders/web.py @@ -11,7 +11,7 @@ class CrawlUrl(BaseModel): class WebLoaderConfig(BaseModel): driver_arguments: list[str] = Field(default=None) - urls: list[CrawlUrl] + urls: list[CrawlUrl] = [] def get_web_documents(config: WebLoaderConfig): @@ -25,6 +25,7 @@ def get_web_documents(config: WebLoaderConfig): options.add_argument(arg) docs = [] + urls = config.urls or [] for url in config.urls: scraper = WholeSiteReader( prefix=url.prefix, diff --git a/backend/app/engine/tools/__init__.py b/backend/app/engine/tools/__init__.py index 111bee5..1aced70 100644 --- a/backend/app/engine/tools/__init__.py +++ b/backend/app/engine/tools/__init__.py @@ -48,9 +48,13 @@ class ToolFactory: if os.path.exists("config/tools.yaml"): with open("config/tools.yaml", "r") as f: tool_configs = yaml.safe_load(f) - for tool_type, config_entries in tool_configs.items(): - for tool_name, config in config_entries.items(): - tools.extend( - ToolFactory.load_tools(tool_type, tool_name, config) - ) + if tool_configs != None and len(tool_configs.items()) != 0: + for tool_type, config_entries in tool_configs.items(): + if config_entries == None or len(config_entries.items()) == 0: + continue + + for tool_name, config in config_entries.items(): + tools.extend( + ToolFactory.load_tools(tool_type, tool_name, config) + ) return tools diff --git a/backend/app/engine/vectordb.py b/backend/app/engine/vectordb.py new file mode 100644 index 0000000..f3f2a7d --- /dev/null +++ b/backend/app/engine/vectordb.py @@ -0,0 +1,71 @@ +import os +from llama_index.vector_stores.chroma import ChromaVectorStore +from llama_index.vector_stores.qdrant import QdrantVectorStore +from qdrant_client import qdrant_client + +qclient = None + +def get_qdrant_vector_store(): + collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default") + vector_store_path = os.getenv("VECTOR_STORE_PATH") + host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"), + port=int(os.getenv("VECTOR_STORE_PORT", "6333")), + + if not vector_store_path or not host: + raise ValueError( + "Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT" + ) + # if VECTOR_STORE_PATH is set, use a local QdrantVectorStore from the path + # otherwise, use a remote QdrantVectorStore + global qclient + if qclient == None: + if vector_store_path: + qclient = qdrant_client.QdrantClient( + path=vector_store_path, + ) + else: + qclient = qdrant_client.QdrantClient( + host=host, + port=port, + ) + + vector_store = QdrantVectorStore(client=qclient, collection_name=collection_name) + return vector_store + +def get_chroma_vector_store(): + collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default") + vector_store_path = os.getenv("VECTOR_STORE_PATH") + # if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path + # otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet) + if vector_store_path: + store = ChromaVectorStore.from_params( + persist_dir=vector_store_path, collection_name=collection_name, + collection_kwargs={"metadata":{"hnsw:space":"cosine"}}, + ) + else: + if not os.getenv("VECTOR_STORE_HOST") or not os.getenv("VECTOR_STORE_PORT"): + raise ValueError( + "Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT" + ) + store = ChromaVectorStore.from_params( + host=os.getenv("VECTOR_STORE_HOST"), + port=int(os.getenv("VECTOR_STORE_PORT")), + collection_name=collection_name, + collection_kwargs={"metadata":{"hnsw:space":"cosine"}}, + ) + return store + +def get_vector_store(): + store_type=os.getenv("VECTOR_STORE_TYPE") + + store = None + + match store_type: + case "chroma": + store = get_chroma_vector_store() + case "qdrant": + store = get_qdrant_vector_store() + case _: + raise ValueError(f"Invalid vector store type: {store_type}") + + return store \ No newline at end of file diff --git a/backend/app/observability.py b/backend/app/observability.py index 28019c3..780ae04 100644 --- a/backend/app/observability.py +++ b/backend/app/observability.py @@ -1,2 +1,20 @@ +import os + +import llama_index.core + def init_observability(): - pass + + PHOENIX_API_KEY = os.getenv("PHOENIX_API_KEY") + if not PHOENIX_API_KEY: + raise ValueError("PHOENIX_API_KEY environment variable is not set") + os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}" + PHOENIX_URL = os.getenv("PHOENIX_URL") + llama_index.core.set_global_handler( + "arize_phoenix", endpoint=PHOENIX_URL, eval_params={} + ) + + #debugHandle=[] + # llama_debug = LlamaDebugHandler(print_trace_on_end=True) + # debugHandle.append(llama_debug) + # callback_manager = CallbackManager(debugHandle) + # settings.Settings.callback_manager = callback_manager diff --git a/backend/app/settings.py b/backend/app/settings.py index b723bf3..0158074 100644 --- a/backend/app/settings.py +++ b/backend/app/settings.py @@ -1,6 +1,7 @@ import os from typing import Dict +from llama_index.core.constants import DEFAULT_TEMPERATURE from llama_index.core.settings import Settings @@ -9,6 +10,8 @@ def init_settings(): match model_provider: case "openai": init_openai() + case "dashscope": + init_dashscope() case "groq": init_groq() case "ollama": @@ -33,20 +36,21 @@ def init_settings(): def init_ollama(): - from llama_index.embeddings.ollama import OllamaEmbedding - from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama - - base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434" - request_timeout = float( - os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT) - ) - Settings.embed_model = OllamaEmbedding( - base_url=base_url, - model_name=os.getenv("EMBEDDING_MODEL"), - ) - Settings.llm = Ollama( - base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout - ) + # from llama_index.embeddings.ollama import OllamaEmbedding + # from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama + # + # base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434" + # request_timeout = float( + # os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT) + # ) + # Settings.embed_model = OllamaEmbedding( + # base_url=base_url, + # model_name=os.getenv("EMBEDDING_MODEL"), + # ) + # Settings.llm = Ollama( + # base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout + # ) + pass def init_openai(): @@ -69,104 +73,129 @@ def init_openai(): } Settings.embed_model = OpenAIEmbedding(**config) +def init_dashscope(): + from llama_index.llms.dashscope import DashScope,DashScopeGenerationModels + from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeBatchTextEmbeddingModels,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels + + max_tokens = os.getenv("LLM_MAX_TOKENS") + config = { + "model": os.getenv("MODEL"), + "temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)), + "max_tokens": int(max_tokens) if max_tokens is not None else None, + } + Settings.llm = llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX) + + dimensions = os.getenv("EMBEDDING_DIM") + config = { + "model": os.getenv("EMBEDDING_MODEL"), + "dimensions": int(dimensions) if dimensions is not None else None, + } + Settings.embed_model = DashScopeEmbedding(model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2, + text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY) + def init_azure_openai(): - from llama_index.core.constants import DEFAULT_TEMPERATURE - from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding - from llama_index.llms.azure_openai import AzureOpenAI - - llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"] - embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"] - max_tokens = os.getenv("LLM_MAX_TOKENS") - temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) - dimensions = os.getenv("EMBEDDING_DIM") - - azure_config = { - "api_key": os.environ["AZURE_OPENAI_KEY"], - "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"], - "api_version": os.getenv("AZURE_OPENAI_API_VERSION") - or os.getenv("OPENAI_API_VERSION"), - } - - Settings.llm = AzureOpenAI( - model=os.getenv("MODEL"), - max_tokens=int(max_tokens) if max_tokens is not None else None, - temperature=float(temperature), - deployment_name=llm_deployment, - **azure_config, - ) - - Settings.embed_model = AzureOpenAIEmbedding( - model=os.getenv("EMBEDDING_MODEL"), - dimensions=int(dimensions) if dimensions is not None else None, - deployment_name=embedding_deployment, - **azure_config, - ) + # from llama_index.core.constants import DEFAULT_TEMPERATURE + # from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding + # from llama_index.llms.azure_openai import AzureOpenAI + # + # llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"] + # embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"] + # max_tokens = os.getenv("LLM_MAX_TOKENS") + # temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) + # dimensions = os.getenv("EMBEDDING_DIM") + # + # azure_config = { + # "api_key": os.environ["AZURE_OPENAI_KEY"], + # "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"], + # "api_version": os.getenv("AZURE_OPENAI_API_VERSION") + # or os.getenv("OPENAI_API_VERSION"), + # } + # + # Settings.llm = AzureOpenAI( + # model=os.getenv("MODEL"), + # max_tokens=int(max_tokens) if max_tokens is not None else None, + # temperature=float(temperature), + # deployment_name=llm_deployment, + # **azure_config, + # ) + # + # Settings.embed_model = AzureOpenAIEmbedding( + # model=os.getenv("EMBEDDING_MODEL"), + # dimensions=int(dimensions) if dimensions is not None else None, + # deployment_name=embedding_deployment, + # **azure_config, + # ) + pass def init_fastembed(): """ Use Qdrant Fastembed as the local embedding provider. """ - from llama_index.embeddings.fastembed import FastEmbedEmbedding - - embed_model_map: Dict[str, str] = { - # Small and multilingual - "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", - # Large and multilingual - "paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", # noqa: E501 - } - - # This will download the model automatically if it is not already downloaded - Settings.embed_model = FastEmbedEmbedding( - model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")] - ) + # from llama_index.embeddings.fastembed import FastEmbedEmbedding + # + # embed_model_map: Dict[str, str] = { + # # Small and multilingual + # "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", + # # Large and multilingual + # "paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", # noqa: E501 + # } + # + # # This will download the model automatically if it is not already downloaded + # Settings.embed_model = FastEmbedEmbedding( + # model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")] + # ) + pass def init_groq(): - from llama_index.llms.groq import Groq - - model_map: Dict[str, str] = { - "llama3-8b": "llama3-8b-8192", - "llama3-70b": "llama3-70b-8192", - "mixtral-8x7b": "mixtral-8x7b-32768", - } - - Settings.llm = Groq(model=model_map[os.getenv("MODEL")]) - # Groq does not provide embeddings, so we use FastEmbed instead - init_fastembed() + # from llama_index.llms.groq import Groq + # + # model_map: Dict[str, str] = { + # "llama3-8b": "llama3-8b-8192", + # "llama3-70b": "llama3-70b-8192", + # "mixtral-8x7b": "mixtral-8x7b-32768", + # } + # + # Settings.llm = Groq(model=model_map[os.getenv("MODEL")]) + # # Groq does not provide embeddings, so we use FastEmbed instead + # init_fastembed() + pass def init_anthropic(): - from llama_index.llms.anthropic import Anthropic - - model_map: Dict[str, str] = { - "claude-3-opus": "claude-3-opus-20240229", - "claude-3-sonnet": "claude-3-sonnet-20240229", - "claude-3-haiku": "claude-3-haiku-20240307", - "claude-2.1": "claude-2.1", - "claude-instant-1.2": "claude-instant-1.2", - } - - Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")]) - # Anthropic does not provide embeddings, so we use FastEmbed instead - init_fastembed() + # from llama_index.llms.anthropic import Anthropic + # + # model_map: Dict[str, str] = { + # "claude-3-opus": "claude-3-opus-20240229", + # "claude-3-sonnet": "claude-3-sonnet-20240229", + # "claude-3-haiku": "claude-3-haiku-20240307", + # "claude-2.1": "claude-2.1", + # "claude-instant-1.2": "claude-instant-1.2", + # } + # + # Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")]) + # # Anthropic does not provide embeddings, so we use FastEmbed instead + # init_fastembed() + pass def init_gemini(): - from llama_index.embeddings.gemini import GeminiEmbedding - from llama_index.llms.gemini import Gemini - - model_name = f"models/{os.getenv('MODEL')}" - embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}" - - Settings.llm = Gemini(model=model_name) - Settings.embed_model = GeminiEmbedding(model_name=embed_model_name) - + # from llama_index.embeddings.gemini import GeminiEmbedding + # from llama_index.llms.gemini import Gemini + # + # model_name = f"models/{os.getenv('MODEL')}" + # embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}" + # + # Settings.llm = Gemini(model=model_name) + # Settings.embed_model = GeminiEmbedding(model_name=embed_model_name) + pass def init_mistral(): - from llama_index.embeddings.mistralai import MistralAIEmbedding - from llama_index.llms.mistralai import MistralAI - - Settings.llm = MistralAI(model=os.getenv("MODEL")) - Settings.embed_model = MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL")) + # from llama_index.embeddings.mistralai import MistralAIEmbedding + # from llama_index.llms.mistralai import MistralAI + # + # Settings.llm = MistralAI(model=os.getenv("MODEL")) + # Settings.embed_model = MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL")) + pass \ No newline at end of file diff --git a/backend/config/loaders.yaml b/backend/config/loaders.yaml index d746c61..1d4fdba 100644 --- a/backend/config/loaders.yaml +++ b/backend/config/loaders.yaml @@ -1,10 +1,30 @@ file: # use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable - use_llama_parse: true + use_llama_parse: false + db: # The configuration for the database loader, only supports MySQL and PostgreSQL databases for now. # uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db # query: The query to fetch data from the database. E.g.: SELECT * FROM table - uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1 + #- uri: mysql+pymysql://zjinfo:Y6EAjEEdSYmskA8B@110.42.234.166:3306/zjinfo +# - uri: mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2 queries: - - SELECT * FROM mytable + - select * from ProjectProperties limit 30; + - select Name, Code, Amount, Amount_Total from TotalCalculateTable + - select SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 1 limit 30; + - select Name, Code, Rate, Amount from OtherFee + +#web: +# driver_arguments: +# # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode +# - --no-sandbox +# - --disable-dev-shm-usage +# urls: +# # base_url: The URL to start crawling with +# # prefix: Only crawl URLs matching the specified prefix +# # depth: The maximum depth for BFS traversal +# # You can add more websites by adding more entries (don't forget the - prefix from YAML) +# - base_url: https://www.llamaindex.ai +# prefix: https://www.llamaindex.ai +# depth: 1 \ No newline at end of file diff --git a/backend/config/tools.yaml b/backend/config/tools.yaml index df5690c..1cde4f4 100644 --- a/backend/config/tools.yaml +++ b/backend/config/tools.yaml @@ -1,4 +1,5 @@ local: - weather: {} - interpreter: {} + #weather: {} + #interpreter: {} + #duckduckgo: {} llamahub: {} diff --git a/backend/data/101.pdf b/backend/data/101.pdf deleted file mode 100644 index ae5acff..0000000 Binary files a/backend/data/101.pdf and /dev/null differ diff --git a/backend/data/projectstruct/博微电力造价工程文件格式_FeeCollectionTable.json b/backend/data/projectstruct/博微电力造价工程文件格式_FeeCollectionTable.json new file mode 100644 index 0000000..45ce0c1 --- /dev/null +++ b/backend/data/projectstruct/博微电力造价工程文件格式_FeeCollectionTable.json @@ -0,0 +1,61 @@ +{ + "Table": [ + { + "name": "FeeCollectionTable", + "alias": "", + "comment": "取费表是取费设置中各取费表明细。查询示例: SELECT Rate FROM FeeCollectionTable WHERE Name = 'findname'。", + "fileds": [ + { + "name": "FeeCollectionTableName", + "alias": "取费名,取费名称,取费表名称", + "comment": "取费表名称" + }, + { + "name": "Name", + "alias": "费用名,名称,项目名", + "comment": "费用名称,项目名称" + }, + { + "name": "SerialNumber", + "alias": "费用序号,序号,序列号", + "comment": "费用表序号" + }, + { + "name": "Code", + "alias": "代码,代号,编号", + "comment": "费用代码" + }, + { + "name": "CalculationFormula", + "alias": "表达式,公式,计算式", + "comment": "取费基数" + }, + { + "name": "Rate", + "alias": "费用利率,费率", + "comment": "取费费率" + }, + { + "name": "Remarks", + "alias": "备注,说明", + "comment": "费用项备注说明" + }, + { + "name": "Major", + "alias": "专业", + "comment": "取费表专业" + }, + { + "name": "Type", + "alias": "类型,取费类型", + "comment": "取费表类型" + }, + { + "name": "Path", + "alias": "费用全路径,路径", + "comment": "费用项层级全路径" + } + ] + } + ] +} \ No newline at end of file diff --git a/backend/data/projectstruct/博微电力造价工程文件格式_OtherFee.json b/backend/data/projectstruct/博微电力造价工程文件格式_OtherFee.json new file mode 100644 index 0000000..a813798 --- /dev/null +++ b/backend/data/projectstruct/博微电力造价工程文件格式_OtherFee.json @@ -0,0 +1,76 @@ +{ + "Table": [ + { + "name": "OtherFee", + "alias": "", + "comment": "其他费用表被称为“工程费用中其他费用明细”。其他费用是指为完成工程项目建设所必需的,但不属于建筑工程费、安装工程费、设备购置费、基本预备费的其他相关费用。包括建设场地征用及清理费、项目建设管理费、项目建设技术服务费、生产准备费、大件运输措施费、专业爆破服务费等。查询示例: SELECT Rate FROM OtherFee WHERE Name = 'findname'。", + "fileds": [ + { + "name": "Id", + "alias": "id,项目id,费用id", + "comment": "费用项目id" + }, + { + "name": "ParentId", + "alias": "父id,父级id", + "comment": "费用项目父级id" + }, + { + "name": "Level", + "alias": "层级,层编号,层号", + "comment": "层级编号,从1开始" + }, + { + "name": "Name", + "alias": "费用名,名称,项目名", + "comment": "费用名称,项目名称" + }, + { + "name": "SerialNumber", + "alias": "序号,序列号", + "comment": "费用表序号" + }, + { + "name": "Code", + "alias": "代码,代号,编号", + "comment": "费用代码" + }, + { + "name": "CalculationFormula", + "alias": "表达式,公式,计算式", + "comment": "取费基数" + }, + { + "name": "Rate", + "alias": "费用利率,费率", + "comment": "取费费率" + }, + { + "name": "Amount", + "alias": "金额,价格", + "comment": "金额、合计、费用,\n单位为元" + }, + { + "name": "Remarks", + "alias": "备注,说明", + "comment": "费用项备注说明" + }, + { + "name": "Compilation_Basis", + "alias": "编制依据,编制来源", + "comment": "费用项编制依据" + }, + { + "name": "WBS_Code", + "alias": "WBS编号,WBS编码", + "comment": "费用项WBS编码" + }, + { + "name": "Path", + "alias": "费用全路径,路径", + "comment": "费用项层级全路径" + } + ] + } + ] +} \ No newline at end of file diff --git a/backend/data/projectstruct/博微电力造价工程文件格式_ProjectDivision.json b/backend/data/projectstruct/博微电力造价工程文件格式_ProjectDivision.json new file mode 100644 index 0000000..de24f3b --- /dev/null +++ b/backend/data/projectstruct/博微电力造价工程文件格式_ProjectDivision.json @@ -0,0 +1,126 @@ +{ + "Table": [ + { + "name": "ProjectDivision", + "alias": "", + "comment": "项目划分表是用于存储工程项目划分树状数据。内部包含安装工程项目划分,建筑工程项目划分,线路项目划分,工程分部分项。查询示例: SELECT Sum_Price FROM ProjectDivision WHERE Name = 'findname'。", + "fileds": [ + { + "name": "Id", + "alias": "id,项目id,费用id", + "comment": "项目划分id" + }, + { + "name": "ParentId", + "alias": "父id,父级id", + "comment": "项目划分父级id" + }, + { + "name": "Level", + "alias": "层级,层编号,层号", + "comment": "层级编号,从1开始" + }, + { + "name": "Quantity", + "alias": "个数,数量,数目", + "comment": "项目划分数量" + }, + { + "name": "SerialNumber", + "alias": "项目序号,序号,序列号", + "comment": "项目划分序号" + }, + { + "name": "Name", + "alias": "项目名,名称", + "comment": "项目名称" + }, + { + "name": "Encoding", + "alias": "译码,编码", + "comment": "项目划分编码" + }, + { + "name": "Sum_Price", + "alias": "合计,合价", + "comment": "项目划分合价,分部分项费用" + }, + { + "name": "FeeCollectionTableName", + "alias": "取费表", + "comment": "项目划分的取费表,此项目划分选用的取费表" + }, + { + "name": "Remarks", + "alias": "备注,说明", + "comment": "备注" + }, + { + "name": "WBS_Code", + "alias": "WBS编号,WBS编码", + "comment": "WBS编码" + }, + { + "name": "Manual_Adjustment_Coefficient", + "alias": "人工调差系数", + "comment": "此项目划分下人工调差系数" + }, + { + "name": "Material_Adjustment_Coefficient", + "alias": "材料调差系数", + "comment": "此项目划分下材料调差系数" + }, + { + "name": "Mechanical_Adjustment_Coefficient", + "alias": "机械调差系数", + "comment": "此项目划分下机械调差系数" + }, + { + "name": "Demolition_Manual_Adjustment_Coefficient", + "alias": "拆除人工调差系数", + "comment": "此项目划分下拆除人工调差系数" + }, + { + "name": "Demolition_Material_Adjustment_Coefficient", + "alias": "拆除材料调差系数", + "comment": "此项目划分下拆除材料调差系数" + }, + { + "name": "Demolition_Mechanical_Adjustment_Coefficient", + "alias": "拆除机械调差系数", + "comment": "此项目划分下拆除机械调差系数" + }, + { + "name": "ProfessionalType", + "alias": "专业类型", + "comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。" + }, + { + "name": "Unit", + "alias": "单位", + "comment": "项目划分单位" + }, + { + "name": "CalculationFormula", + "alias": "表达式,公式,计算式", + "comment": "项目划分计算式" + }, + { + "name": "Rate", + "alias": "费用利率,费率", + "comment": "项目划分费率" + }, + { + "name": "Code", + "alias": "代码,代号,编号", + "comment": "项目划分代码" + }, + { + "name": "Path", + "alias": "路径,项目全路径", + "comment": "项目划分层级全路径" + } + ] + } + ] +} \ No newline at end of file diff --git a/backend/data/projectstruct/博微电力造价工程文件格式_ProjectDivisions_CostPreview.json b/backend/data/projectstruct/博微电力造价工程文件格式_ProjectDivisions_CostPreview.json new file mode 100644 index 0000000..b1c88be --- /dev/null +++ b/backend/data/projectstruct/博微电力造价工程文件格式_ProjectDivisions_CostPreview.json @@ -0,0 +1,201 @@ +{ + "Table": [ + { + "name": "ProjectDivisions_CostPreview", + "alias": "", + "comment": "项目划分_费用预览表也被称为“项目划分费用预览”、“项目划分取费费用”。其中包含项目划分合价、直接费、间接费、利润、税金、主材费等。查询示例: SELECT Total FROM ProjectDivisions_CostPreview WHERE Id = '15'。", + "fileds": [ + { + "name": "Id", + "alias": "id,项目id", + "comment": "项目划分id" + }, + { + "name": "ParentId", + "alias": "父id,父级id", + "comment": "项目划分父级id" + }, + { + "name": "Level", + "alias": "层级,层编号,层号", + "comment": "层级编号,从1开始" + }, + { + "name": "ProfessionalType", + "alias": "专业类型", + "comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。" + }, + { + "name": "FeeCollectionTableName", + "alias": "取费表", + "comment": "项目划分的取费表,此项目划分选用的取费表" + }, + { + "name": "Direct_Cost", + "alias": "直接费", + "comment": "直接费是指施工过程中直接耗用于建筑、安装工程产品的各项费用的总和。包括直接工程费和措施费。" + }, + { + "name": "Direct_Project_Cost", + "alias": "直接工程费", + "comment": "直接工程费是指按照正常的施工条件,在施工过程中耗费的构成工程实体的各项费用。包括人工费、材料费和施工机械使用费。" + }, + { + "name": "Quota_Direct_Cost", + "alias": "定额直接费", + "comment": "定额直接费,包含人工费、材料费中已进入定额基价的消耗性材料费和施工机械使用费。" + }, + { + "name": "Labor_Cost", + "alias": "人工费", + "comment": "人工费是指支付给直接从事建筑安装工程施工作业的生产人员的各项费用。包括基本工资、工资性补贴、辅助工资、职工福利费、生产人员劳动保护费。" + }, + { + "name": "Material_Cost", + "alias": "材料费", + "comment": "材料费是指施工过程中一次性消耗材料及摊销材料的费用。指已进入定额基价的消耗性材料费。" + }, + { + "name": "Construction_Machinery_Cost", + "alias": "施工机械使用费", + "comment": "施工机械使用费是指施工机械作业所发生的机械使用费以及机械的现场安拆费和场外运费。包括折旧费、检修费、维护费、安装及拆卸费、场外运费、操作人员人工费、燃料动力费、其他费等。" + }, + { + "name": "Installation_Material_Cost", + "alias": "装置性材料费", + "comment": "装置性材料费是指建设工程中构成工艺系统实体的工艺性材料,也称主要材料费。装置性材料通常在概算或预算定额中未计价,也称未计价材料,也称主材。" + }, + { + "name": "A_Supply_Installation_Material_Cost", + "alias": "甲供装置性材料费", + "comment": "供货方为甲供的装置性材料费。" + }, + { + "name": "B_Supply_Installation_Material_Cost", + "alias": "乙供装置性材料费", + "comment": "供货方为乙供的装置性材料费。" + }, + { + "name": "Measure_Cost", + "alias": "措施费", + "comment": "措施费是指为完成工程项目施工而进行施工准备、克服自然条件的不利影响和辅助施工所发生的不构成工程实体的各项费用。包括冬雨季施工增加费、夜间施工增加费、施工工具用具使用费、特殊地区施工增加费、临时设施费、施工机构迁移费、安全文明施工费。" + }, + { + "name": "WinterRainySeasons_Additional_Construction_Cost", + "alias": "冬雨季施工增加费", + "comment": "冬雨季施工增加费是指按照合理的工期要求,建筑、安装工程必须在冬季、雨季期间连续施工而需要增加的费用。" + }, + { + "name": "Night_Additional_Construction_Cost", + "alias": "夜间施工增加费", + "comment": "夜间施工增加费是指按照规程要求,工程必须在夜间连续施工所发生的夜班补助、夜间施工降效、夜间施工照明设备摊销及照明用电等费用。" + }, + { + "name": "Construction_Tool_Usage_Cost", + "alias": "施工工具用具使用费", + "comment": "施工工具用具使用费是指施工企业的生产、检验、试验部门使用的不属于固定资产的工具用具和仪器仪表的购置、摊销和维护费用。" + }, + { + "name": "Special_Areas_Additional_Construction_Cost", + "alias": "特殊地区施工增加费", + "comment": "特殊地区施工增加费是指在高海拔、酷热、严寒等地区施工:因特殊自然条件影响而需额外增加的施工费用。" + }, + { + "name": "Temporary_Facility_Cost", + "alias": "临时设施费", + "comment": "临时设施费是指施工企业为满足现场正常生产、生活需要在现场必须搭设的生产、生活用临时建筑物、构筑物和其他临时设施所发生的费用,以及维修、拆除、折旧及摊销费,或临时设施的租赁费等。" + }, + { + "name": "Construction_Organization_Relocation_Cost", + "alias": "施工机构迁移费", + "comment": "施工机构迁移费是指施工企业派遣施工队伍到所承建工程现场所发生的搬迁费用。包括职工调遣差旅费和调遣期间的工资,以及办公设备、工器具、家具、材料用品和施工机械等的搬迁费用。" + }, + { + "name": "Safe_Civilized_Construction_Cost", + "alias": "安全文明施工费", + "comment": "安全文明施工费,包括安全生产费、文明施工费、环境保护费。" + }, + { + "name": "Indirect_Cost", + "alias": "间接费", + "comment": "间接费是指建筑安装工程的施工过程中,为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。" + }, + { + "name": "Regulatory_Cost", + "alias": "规费", + "comment": "规费是指按照国家行政主管部门或省级政府和省级有关权力部门规定必须缴纳并计入建筑安装工程造价的费用。包括社会保险费和住房公积金。" + }, + { + "name": "Social_Insurance_Premiums", + "alias": "社会保险费", + "comment": "社会保险费包括养老保险费、失业保险费、医疗保险费、生育保险费和工伤保险费。" + }, + { + "name": "Housing_Provident_Fund", + "alias": "住房公积金", + "comment": "住房公积金是指企业按照规定标准为职工缴纳的住房公积金。" + }, + { + "name": "Enterprise_Management_Cost", + "alias": "企业管理费", + "comment": "企业管理费是指建筑安装施工企业为组织施工生产和经营管理所发生的费用。" + }, + { + "name": "Construction_Enterprise_Cooperation_Debugging_Cost", + "alias": "施工企业配合调试费", + "comment": "施工企业配合调试费是指在工程整套启动试运阶段,施工企业安装专业配合调试所发生的费用。" + }, + { + "name": "Profit", + "alias": "利润", + "comment": "利润是指施工企业完成所承包工程获得的盈利。" + }, + { + "name": "Taxes", + "alias": "税金", + "comment": "税金是指按照国家税法规定应计入建筑安装工程造价内的销项税额。" + }, + { + "name": "Equipment_Cost", + "alias": "设备费", + "comment": "设备购置费是指为项目建设而购置或自制各种设备,并将设备运至施工现场指定位置所支出的费用。包括设备费和设备运杂费。" + }, + { + "name": "B_Supply_Equipment_Excluding_Tax_Price", + "alias": "乙供设备不含税价", + "comment": "设备费中,供货方为乙供设备,不含税价" + }, + { + "name": "A_Supply_Equipment_Tax_Price", + "alias": "甲供设备含税价", + "comment": "设备费中,供货方为甲供设备,含税价" + }, + { + "name": "Installation_Cost", + "alias": "安装费", + "comment": "安装费包含定额直接费、措施费、间接费、利润、税金和一笔性费用。" + }, + { + "name": "Main_Material_Cost", + "alias": "主材费", + "comment": "主材费指装置性材料费" + }, + { + "name": "Total", + "alias": "总体费用,总计,总价,总的费用", + "comment": "总计包含安装费、主材费、设备费。" + }, + { + "name": "Sum", + "alias": "合计,合价", + "comment": "项目划分合价,分部分项费用,项目划分费用。合计包含安装费和主材费。" + }, + { + "name": "Path", + "alias": "路径,项目划分全路径", + "comment": "项目划分层级全路径" + } + ] + } + ] +} \ No newline at end of file diff --git a/backend/data/projectstruct/博微电力造价工程文件格式_ProjectProperties.json b/backend/data/projectstruct/博微电力造价工程文件格式_ProjectProperties.json new file mode 100644 index 0000000..e0e7c16 --- /dev/null +++ b/backend/data/projectstruct/博微电力造价工程文件格式_ProjectProperties.json @@ -0,0 +1,31 @@ +{ + "Table": [ + { + "name": "ProjectProperties", + "alias": "", + "comment": "工程属性表是用于存储整个工程的重要属性,访问该表都是为了通过属性名查找属性值。通常属性值有工程信息、工程属性、技经参数,表中包含工程总投资、工程总费用,工程主要费用,工程技经参数等。查询示例: SELECT Value FROM ProjectProperties WHERE Name = 'findname'。", + "fileds": [ + { + "name": "Name\n", + "alias": "属性名,属性名称,属性", + "comment": "属性的唯一标识" + }, + { + "name": "Value", + "alias": "属性值", + "comment": "属性对应的实际值" + }, + { + "name": "Type", + "alias": "类型,属性类型", + "comment": "属性变量的类型" + }, + { + "name": "Unit", + "alias": "单位", + "comment": "单位" + } + ] + } + ] +} \ No newline at end of file diff --git a/backend/data/projectstruct/博微电力造价工程文件格式_ProjectQuantities.json b/backend/data/projectstruct/博微电力造价工程文件格式_ProjectQuantities.json new file mode 100644 index 0000000..0a721d7 --- /dev/null +++ b/backend/data/projectstruct/博微电力造价工程文件格式_ProjectQuantities.json @@ -0,0 +1,281 @@ +{ + "Table": [ + { + "name": "ProjectQuantities", + "alias": "", + "comment": "工程量表是项目划分下工程量,包含定额、主材、设备、一笔性费用。查询示例: SELECT BudgetPrice FROM ProjectQuantities WHERE Name = 'findname'。", + "fileds": [ + { + "name": "Id", + "alias": "id", + "comment": "消耗量id,工程量id" + }, + { + "name": "ParentId", + "alias": "父id,父级id", + "comment": "父级id" + }, + { + "name": "ProjectDivisionId", + "alias": "项目划分id,项目id", + "comment": "父级项目划分id" + }, + { + "name": "Quantity", + "alias": "个数,数量,数目", + "comment": "数量,消耗量数量,工程量数量,主材数量,定额数量,设备数量,项目划分单位" + }, + { + "name": "FeatureSegment", + "alias": "特征段", + "comment": "线路特征段" + }, + { + "name": "ParentQuantity", + "alias": "父级个数,父级数量", + "comment": "父级id的数量" + }, + { + "name": "Name", + "alias": "名称", + "comment": "项目名称,工程量名称,消耗量名称,主材名称,定额名称,设备名称,材料名称" + }, + { + "name": "Encoding", + "alias": "译码,编码", + "comment": "编码,定额编码,主材编码,设备编码" + }, + { + "name": "SpecificationModel", + "alias": "规格型号", + "comment": "规格型号,主材规格型号,设备规格型号" + }, + { + "name": "Unit", + "alias": "单位", + "comment": "单位,主材单位,定额单位,设备单位,项目划分单位" + }, + { + "name": "BasePrice", + "alias": "基价", + "comment": "定额基价" + }, + { + "name": "LaborCost", + "alias": "人工费", + "comment": "定额人工费" + }, + { + "name": "MaterialCost", + "alias": "材料费", + "comment": "定额材料费" + }, + { + "name": "MachineryCost", + "alias": "机械费", + "comment": "定额机械费" + }, + { + "name": "QuotaCoefficient", + "alias": "定额系数", + "comment": "定额系数" + }, + { + "name": "LaborCoefficient", + "alias": "人工系数", + "comment": "定额人工系数" + }, + { + "name": "MaterialCoefficient", + "alias": "材料系数", + "comment": "定额材料系数" + }, + { + "name": "MechanicalCoefficient", + "alias": "机械系数", + "comment": "定额机械系数" + }, + { + "name": "ExpenseType", + "alias": "费用类型", + "comment": "费用类型,取值为取费、不取费" + }, + { + "name": "BudgetPrice", + "alias": "预算价", + "comment": "预算价" + }, + { + "name": "MarketPrice", + "alias": "市场价", + "comment": "间接费是指建筑安装工程的施工过程中,为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。" + }, + { + "name": "Supplier", + "alias": "供货方", + "comment": "供货方,设备供货方,主材供货方,取值为甲供、乙供" + }, + { + "name": "Type", + "alias": "类型", + "comment": "工程量类型,取值定额、主材、设备、一笔性费用" + }, + { + "name": "QuotaRange", + "alias": "定额范围", + "comment": "定额范围,取值概算、预算" + }, + { + "name": "A_Supply_Material_Cost_Excluding_Tax", + "alias": "甲供材料费不含税", + "comment": "甲供材料费不含税" + }, + { + "name": "A_Supply_Material_Cost_Including_Tax", + "alias": "甲供材料费含税", + "comment": "甲供材料费含税" + }, + { + "name": "B_Supply_Material_Cost_Excluding_Tax", + "alias": "乙供材料费不含税", + "comment": "乙供材料费不含税" + }, + { + "name": "B_Supply_Material_Cost_Including_Tax", + "alias": "乙供材料费含税", + "comment": "乙供材料费含税" + }, + { + "name": "ScaffoldCalculation", + "alias": "脚手架计取", + "comment": "脚手架计取,取值计取、不计取" + }, + { + "name": "Remarks", + "alias": "备注,说明", + "comment": "备注,说明" + }, + { + "name": "FeeCollectionTableName", + "alias": "取费表", + "comment": "项目划分的取费表,工程量的取费表" + }, + { + "name": "Quota_Section_Name", + "alias": "定额章节名称", + "comment": "定额章节名称" + }, + { + "name": "ProfessionalType", + "alias": "专业类型", + "comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。" + }, + { + "name": "split", + "alias": "拆分", + "comment": "是否为拆分材料,取值1为拆分,取值0为不拆分" + }, + { + "name": "Loss", + "alias": "损耗", + "comment": "损耗率,主材损耗率" + }, + { + "name": "SingleWeight", + "alias": "单重", + "comment": "单重,主材单重" + }, + { + "name": "LineWeight", + "alias": "线重", + "comment": "线重,主材线重" + }, + { + "name": "SupervisedMaterials", + "alias": "监造物料", + "comment": "监造物料,取值1为监造物料,取值0为非监造物料" + }, + { + "name": "EquipmentMaterials", + "alias": "设备性材料", + "comment": "设备性材料,取值1为设备性材料,取值0为主材" + }, + { + "name": "GrossWeight", + "alias": "毛重", + "comment": "毛重,主材毛重" + }, + { + "name": "TransportationType", + "alias": "运输类型", + "comment": "运输类型,主材运输类型" + }, + { + "name": "TransportationMiscellaneous", + "alias": "运杂费率", + "comment": "运杂费率,设备运杂费率" + }, + { + "name": "EquipmentType", + "alias": "设备类型", + "comment": "设备类型,取值为主要设备、普通设备" + }, + { + "name": "UnitPrice", + "alias": "单价", + "comment": "单价" + }, + { + "name": "Market_Price_Excluding_Tax", + "alias": "市场价不含税", + "comment": "市场价不含税" + }, + { + "name": "Market_Price_Including_Tax", + "alias": "市场价含税", + "comment": "市场价含税,设备含税价" + }, + { + "name": "Budget_Price_Excluding_Tax", + "alias": "预算价不含税", + "comment": "预算价不含税" + }, + { + "name": "Budget_Price_Including_Tax", + "alias": "预算价含税", + "comment": "预算价含税" + }, + { + "name": "Unit_Price_Excluding_Tax", + "alias": "单价不含税", + "comment": "单价不含税,设备不含税价" + }, + { + "name": "GroupPrice", + "alias": "分组合价", + "comment": "分组合价" + }, + { + "name": "Pump_Truck_Pouring", + "alias": "泵车浇制", + "comment": "泵车浇制,取值1为泵车浇制,取值0为非泵车浇制" + }, + { + "name": "On_Site_Preparation", + "alias": "现场制备", + "comment": "现场制备,取值1为现场制备,取值0为非现场制备" + }, + { + "name": "Clear_Water_Concrete", + "alias": "清水混凝土", + "comment": "清水混凝土,取值1为清水混凝土,取值0为非清水混凝土" + }, + { + "name": "Debugging_Fee_Calculation", + "alias": "调试费计取", + "comment": "调试费计取,取值计取、不计取" + } + ] + } + ] +} \ No newline at end of file diff --git a/backend/data/projectstruct/博微电力造价工程文件格式_TotalCalculateTable.json b/backend/data/projectstruct/博微电力造价工程文件格式_TotalCalculateTable.json new file mode 100644 index 0000000..3f5d25b --- /dev/null +++ b/backend/data/projectstruct/博微电力造价工程文件格式_TotalCalculateTable.json @@ -0,0 +1,86 @@ +{ + "Table": [ + { + "name": "TotalCalculateTable", + "alias": "总算表", + "comment": "总算表也被称为“工程总费用”、“工程费用”。其中包含本地工程、辅助设施工程、编制基准期价差、设备购置费、其他费用、基本预备费、特殊费用、工程静态投资、动态费用、价差预备费、建设期贷款利息、工程动态投资、可抵扣增值税额。查询示例: SELECT Amount FROM TotalCalculateTable WHERE Name = 'findname'。", + "fileds": [ + { + "name": "Id", + "alias": "id,项目id,费用id", + "comment": "费用项目id" + }, + { + "name": "ParentId", + "alias": "父id,父级id", + "comment": "费用项目父级id" + }, + { + "name": "Level", + "alias": "层级,层编号,层号", + "comment": "层级编号,从1开始" + }, + { + "name": "Name", + "alias": "费用名,名称,项目名", + "comment": "费用名称,项目名称" + }, + { + "name": "SerialNumber", + "alias": "序号", + "comment": "工程费用序号" + }, + { + "name": "Code", + "alias": "代码,代号,编号", + "comment": "费用代码" + }, + { + "name": "Rate", + "alias": "费用利率,费率", + "comment": "费率" + }, + { + "name": "Amount", + "alias": "金额,价格", + "comment": "合计费" + }, + { + "name": "WBS_Code", + "alias": "WBS编号,WBS编码", + "comment": "费用编码" + }, + { + "name": "Path", + "alias": "费用全路径,路径", + "comment": "费用名称全路径" + }, + { + "name": "Amount_InstallationCost", + "alias": "安装价格,安装金额,金额_安装费", + "comment": "安装费金额" + }, + { + "name": "Amount_EquipmentCost", + "alias": "设备价格,金额_设备费,设备金额", + "comment": "设备费金额" + }, + { + "name": "Amount_OtherCost", + "alias": "其他费用价格,其他费用金额,金额_其他费", + "comment": "其他费金额" + }, + { + "name": "Amount_Total", + "alias": "总的金额,金额_占总计,总体金额", + "comment": "合计费占总计" + }, + { + "name": "Amount_UnitInvestment", + "alias": "合计投资金额,金额_单位投资", + "comment": "合计费单位投资" + } + ] + } + ] +} \ No newline at end of file diff --git a/backend/data/博微电力造价工程业务数据说明.docx b/backend/data/博微电力造价工程业务数据说明.docx new file mode 100644 index 0000000..425772f Binary files /dev/null and b/backend/data/博微电力造价工程业务数据说明.docx differ diff --git a/backend/data/工程造价基础知识.doc b/backend/data/工程造价基础知识.doc new file mode 100644 index 0000000..27d4d9f Binary files /dev/null and b/backend/data/工程造价基础知识.doc differ diff --git a/backend/data/电力造价知识.docx b/backend/data/电力造价知识.docx new file mode 100644 index 0000000..45ff225 Binary files /dev/null and b/backend/data/电力造价知识.docx differ diff --git a/backend/main.py b/backend/main.py index a72745e..cbf7969 100644 --- a/backend/main.py +++ b/backend/main.py @@ -15,50 +15,53 @@ from app.observability import init_observability from fastapi.staticfiles import StaticFiles -app = FastAPI() - -init_settings() -init_observability() - -environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set logger = logging.getLogger("uvicorn") +app = None -if environment == "dev": - logger.warning("Running in development mode - allowing CORS for all origins") - app.add_middleware( - CORSMiddleware, - allow_origins=["*"], - allow_credentials=True, - allow_methods=["*"], - allow_headers=["*"], - ) +def init_webserver(): + global app + app = FastAPI() + environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set + if environment == "dev": + logger.warning("Running in development mode - allowing CORS for all origins") + app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], + ) + + def mount_static_files(directory, path): + if os.path.exists(directory): + for dir, _, _ in os.walk(directory): + relative_path = os.path.relpath(dir, directory) + mount_path = path if relative_path == "." else f"{path}/{relative_path}" + logger.info(f"Mounting static files '{dir}' at {mount_path}") + app.mount(mount_path, StaticFiles(directory=dir), name=f"{dir}-static") + + # Mount the data files to serve the file viewer + mount_static_files("data", "/api/files/data") + # Mount the output files from tools + mount_static_files("data_output", "/api/files/output") + app.include_router(chat_router, prefix="/api/chat") + app.include_router(file_upload_router, prefix="/api/chat/upload") # Redirect to documentation page when accessing base URL @app.get("/") async def redirect_to_docs(): return RedirectResponse(url="/docs") - -def mount_static_files(directory, path): - if os.path.exists(directory): - for dir, _, _ in os.walk(directory): - relative_path = os.path.relpath(dir, directory) - mount_path = path if relative_path == "." else f"{path}/{relative_path}" - logger.info(f"Mounting static files '{dir}' at {mount_path}") - app.mount(mount_path, StaticFiles(directory=dir), name=f"{dir}-static") - - -# Mount the data files to serve the file viewer -mount_static_files("data", "/api/files/data") -# Mount the output files from tools -mount_static_files("output", "/api/files/output") - -app.include_router(chat_router, prefix="/api/chat") -app.include_router(file_upload_router, prefix="/api/chat/upload") - if __name__ == "__main__": - app_host = os.getenv("APP_HOST", "0.0.0.0") - app_port = int(os.getenv("APP_PORT", "8000")) - reload = True if environment == "dev" else False + from phoenix.trace import using_project + with using_project(os.getenv("PHOENIX_PROJECT_NAME")) as obj: - uvicorn.run(app="main:app", host=app_host, port=app_port, reload=reload) + init_settings() + init_observability() + init_webserver() + + app_host = os.getenv("APP_HOST", "0.0.0.0") + app_port = int(os.getenv("APP_PORT", "8000")) + #reload = True if environment == "dev" else False + reload = False + uvicorn.run(app=app, host=app_host, port=app_port, reload=reload) diff --git a/backend/pyproject.toml b/backend/pyproject.toml index 21715b3..527c0ef 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -11,11 +11,25 @@ generate = "app.engine.generate:generate_datasource" [tool.poetry.dependencies] python = "^3.11,<3.12" -fastapi = "^0.109.1" +fastapi = "^0.112.0" python-dotenv = "^1.0.0" -aiostream = "^0.5.2" -llama-index = "0.10.58" +aiostream = "^0.6.2" +llama-index = "0.10.63" cachetools = "^5.3.3" +protobuf = "4.25.4" +#arize-phoenix = "^4.12.0" +openinference-instrumentation-llama-index="2.2.3" +llama-index-callbacks-arize-phoenix = "^0.1.4" +llama-index-llms-dashscope = "^0.1.2" +llama-index-embeddings-dashscope = "^0.1.4" +llama-index-postprocessor-dashscope-rerank-custom = "0.1.0" +qdrant-client="^1.10.1" +llama-index-vector-stores-qdrant = "^0.2.14" +chroma="^0.5.5" +llama-index-vector-stores-chroma = "^0.1.10" +llama-index-readers-json = "^0.1.5" + +duckduckgo_search = "^6.2.6" [tool.poetry.dependencies.uvicorn] extras = [ "standard" ] @@ -28,11 +42,11 @@ version = "^0.1.3" version = "^1.1.0" extras = [ "rsa" ] -[tool.poetry.dependencies.psycopg2] -version = "^2.9.9" +#[tool.poetry.dependencies.psycopg2] +#version = "^2.9.9" -[tool.poetry.dependencies.llama-index-indices-managed-llama-cloud] -version = "^0.2.7" +#[tool.poetry.dependencies.llama-index-indices-managed-llama-cloud] +#version = "^0.2.7" [tool.poetry.dependencies.docx2txt] version = "^0.8" @@ -40,8 +54,6 @@ version = "^0.8" [tool.poetry.dependencies.e2b_code_interpreter] version = "0.0.7" -[tool.poetry.dependencies.llama-index-agent-openai] -version = "0.2.6" [build-system] requires = [ "poetry-core" ] diff --git a/backend/run-data.bat b/backend/run-data.bat new file mode 100644 index 0000000..8314019 --- /dev/null +++ b/backend/run-data.bat @@ -0,0 +1,4 @@ +rmdir /S /Q storage_vector +rmdir /S /Q storage + +C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python app/engine/generate.py \ No newline at end of file diff --git a/backend/run-test.bat b/backend/run-test.bat new file mode 100644 index 0000000..12114d2 --- /dev/null +++ b/backend/run-test.bat @@ -0,0 +1,4 @@ +rmdir /S /Q storage_vector +rmdir /S /Q storage + +C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python tests/query.py \ No newline at end of file diff --git a/backend/run.bat b/backend/run.bat new file mode 100644 index 0000000..0e0df02 --- /dev/null +++ b/backend/run.bat @@ -0,0 +1 @@ +C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python main.py \ No newline at end of file diff --git a/backend/tests/query.py b/backend/tests/query.py new file mode 100644 index 0000000..48ca304 --- /dev/null +++ b/backend/tests/query.py @@ -0,0 +1,67 @@ +import os +from ctypes import cast + +from llama_index.core import VectorStoreIndex, SQLDatabase +from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine +from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex +from llama_index.readers.database import DatabaseReader +from sqlalchemy import create_engine + +from app.api.routers.chat import generate_filters +from app.engine import get_index, makeDescriptionByEngine +from app.engine.loaders.db import CustomDatabaseReader +from app.engine.vectordb import get_vector_store +from app.observability import init_observability +from app.settings import init_settings + + +def main(): + init_settings() + init_observability() + + index = get_index() + + top_k = 5 + filters = generate_filters([]) + #question = "从工程属性表中查找工程名称" + question = "总算表中名称等于架空输电线路本体工程的金额?" + # 创建向量检索查询工具 + query_engine = index.as_query_engine( + similarity_top_k=top_k, filters=filters + ) + query_result = query_engine.query(question) + print(query_result) + + engine = create_engine(os.getenv("SQL_DATABASE_URL", "")) + sql_database = SQLDatabase(engine) + + loader = CustomDatabaseReader(sql_database) + documents = loader.load_data(query="select * from ProjectProperties") + + table_schema_objs = makeDescriptionByEngine(sql_database) + table_node_mapping = SQLTableNodeMapping(sql_database) + + vectorIndex = VectorStoreIndex() + # 创建SQL查询工具 + sql_obj_index = ObjectIndex.from_objects( + table_schema_objs, + table_node_mapping, + index_cls=VectorStoreIndex, + ) + + query_result =vectorIndex.as_query_engine( + similarity_top_k=top_k, filters=filters + ).query(question) + print(query_result) + + sql_query_engine = SQLTableRetrieverQueryEngine(sql_database, + sql_obj_index.as_retriever(similarity_top_k=1)) + sql_query_result = sql_query_engine.query(question) + print(sql_query_result) + + +if __name__ == "__main__": + from phoenix.trace import using_project + + with using_project("ly_zjapp_test") as obj: + main() \ No newline at end of file diff --git a/frontend/.env b/frontend/.env index faf27f1..d52b5f0 100644 --- a/frontend/.env +++ b/frontend/.env @@ -1,6 +1,8 @@ # The backend API for chat endpoint. NEXT_PUBLIC_CHAT_API=http://localhost:8000/api/chat -# Let's the user change indexes in LlamaCloud projects -NEXT_PUBLIC_USE_LLAMACLOUD=true +PHOENIX_SERVER_URL=http://localhost:6006/ + +# Let's the user change indexes in LlamaCloud projects +NEXT_PUBLIC_USE_LLAMACLOUD=false diff --git a/frontend/app/components/header.tsx b/frontend/app/components/header.tsx index f02ce73..c08a67c 100644 --- a/frontend/app/components/header.tsx +++ b/frontend/app/components/header.tsx @@ -1,18 +1,20 @@ import Image from "next/image"; +const phoenixUrl = process.env.PHOENIX_SERVER_URL; + export default function Header() { return (

- Get started by editing  - app/page.tsx + 清空当前会话

- Built by LlamaIndex + 打开监控平台 - Stop generating + 停止 )} {props.showReload && ( )}
diff --git a/frontend/app/components/ui/chat/chat-input.tsx b/frontend/app/components/ui/chat/chat-input.tsx index 4c58296..5bbb435 100644 --- a/frontend/app/components/ui/chat/chat-input.tsx +++ b/frontend/app/components/ui/chat/chat-input.tsx @@ -68,7 +68,7 @@ export default function ChatInput( const handleUploadFile = async (file: File) => { if (imageUrl || files.length > 0) { - alert("You can only upload one file at a time."); + alert("同一时刻只能上传一个文件。"); return; } try { @@ -99,10 +99,11 @@ export default function ChatInput(
)}
- )}
diff --git a/frontend/app/components/ui/chat/chat-message/chat-events.tsx b/frontend/app/components/ui/chat/chat-message/chat-events.tsx index 3dfad75..4000520 100644 --- a/frontend/app/components/ui/chat/chat-message/chat-events.tsx +++ b/frontend/app/components/ui/chat/chat-message/chat-events.tsx @@ -17,7 +17,7 @@ export function ChatEvents({ }) { const [isOpen, setIsOpen] = useState(false); - const buttonLabel = isOpen ? "Hide events" : "Show events"; + const buttonLabel = isOpen ? "隐藏" : "详情"; const EventIcon = isOpen ? ( diff --git a/frontend/app/components/ui/chat/chat-message/chat-sources.tsx b/frontend/app/components/ui/chat/chat-message/chat-sources.tsx index 1d4ccb6..8736375 100644 --- a/frontend/app/components/ui/chat/chat-message/chat-sources.tsx +++ b/frontend/app/components/ui/chat/chat-message/chat-sources.tsx @@ -9,23 +9,33 @@ import { import { useCopyToClipboard } from "../hooks/use-copy-to-clipboard"; import { SourceData } from "../index"; import PdfDialog from "../widgets/PdfDialog"; +import { useClientConfig } from "../hooks/use-config"; const SCORE_THRESHOLD = 0.3; -function SourceNumberButton({ index }: { index: number }) { +function truncateNumber(num: number | undefined, precision: number): number { + if (num == undefined || num == 0) return 0; + const factor = Math.pow(10, precision); + return Math.trunc(num * factor) / factor; +} + +function SourceNumberButton({ index, score }: { index: number, score: number | undefined }) { return ( -
- {index + 1} +
+ {truncateNumber(score, 2)}
); } type NodeInfo = { id: string; + score?: number; + text: string; url?: string; }; export function ChatSources({ data }: { data: SourceData }) { + const { backend } = useClientConfig(); const sources: NodeInfo[] = useMemo(() => { // aggregate nodes by url or file_path (get the highest one by score) const nodesByPath: { [path: string]: NodeInfo } = {}; @@ -36,6 +46,8 @@ export function ChatSources({ data }: { data: SourceData }) { .forEach((node) => { const nodeInfo = { id: node.id, + score: node.score, + text: node.text, url: node.url, }; const key = nodeInfo.url ?? nodeInfo.id; // use id as key for UNKNOWN type @@ -51,7 +63,7 @@ export function ChatSources({ data }: { data: SourceData }) { return (
- Sources: + 来源:
{sources.map((nodeInfo: NodeInfo, index: number) => { if (nodeInfo.url?.endsWith(".pdf")) { @@ -59,8 +71,8 @@ export function ChatSources({ data }: { data: SourceData }) { } + url={backend+nodeInfo.url} + trigger={} /> ); } @@ -68,9 +80,9 @@ export function ChatSources({ data }: { data: SourceData }) {
- + - + @@ -83,6 +95,7 @@ export function ChatSources({ data }: { data: SourceData }) { } function NodeInfo({ nodeInfo }: { nodeInfo: NodeInfo }) { + const { backend } = useClientConfig(); const { isCopied, copyToClipboard } = useCopyToClipboard({ timeout: 1000 }); if (nodeInfo.url) { @@ -92,10 +105,10 @@ function NodeInfo({ nodeInfo }: { nodeInfo: NodeInfo }) {
- {nodeInfo.url} + {nodeInfo.text}
diff --git a/frontend/app/components/ui/chat/chat.interface.ts b/frontend/app/components/ui/chat/chat.interface.ts index 6b74d4f..5483abd 100644 --- a/frontend/app/components/ui/chat/chat.interface.ts +++ b/frontend/app/components/ui/chat/chat.interface.ts @@ -10,7 +10,7 @@ export interface ChatHandler { data?: any; }, ) => void; - handleInputChange: (e: React.ChangeEvent) => void; + handleInputChange: (e: React.ChangeEvent) => void; reload?: () => void; stop?: () => void; onFileUpload?: (file: File) => Promise; diff --git a/frontend/app/components/ui/chat/hooks/use-file.ts b/frontend/app/components/ui/chat/hooks/use-file.ts index 2c2c34b..bf1c508 100644 --- a/frontend/app/components/ui/chat/hooks/use-file.ts +++ b/frontend/app/components/ui/chat/hooks/use-file.ts @@ -63,7 +63,7 @@ export function useFile() { ...requestParams, }), }); - if (!response.ok) throw new Error("Failed to upload document."); + if (!response.ok) throw new Error("上传文件时发生错误。"); return await response.json(); }; @@ -109,7 +109,7 @@ export function useFile() { } const filetype = docMineTypeMap[file.type]; - if (!filetype) throw new Error("Unsupported document type."); + if (!filetype) throw new Error("不支持的文件类型。"); const newDoc: Omit = { id: uuidv4(), filetype, diff --git a/frontend/app/components/ui/file-uploader.tsx b/frontend/app/components/ui/file-uploader.tsx index e42a267..fe4f062 100644 --- a/frontend/app/components/ui/file-uploader.tsx +++ b/frontend/app/components/ui/file-uploader.tsx @@ -32,7 +32,7 @@ export default function FileUploader({ const allowedExtensions = config?.allowedExtensions; const defaultCheckExtension = (extension: string) => { if (allowedExtensions && !allowedExtensions.includes(extension)) { - return `Invalid file type. Please select a file with one of these formats: ${allowedExtensions!.join( + return `无效的文件类型。请选择一个以下格式的文件: ${allowedExtensions!.join( ",", )}`; } @@ -69,7 +69,7 @@ export default function FileUploader({ if (isFileSizeExceeded(file)) { return onFileUploadError( - `File size exceeded. Limit is ${fileSizeLimit / 1024 / 1024} MB`, + `文件尺寸超标。请选择不大于 ${fileSizeLimit / 1024 / 1024} MB 的文件。`, ); } diff --git a/frontend/app/layout.tsx b/frontend/app/layout.tsx index 8f7cab9..9fdbff4 100644 --- a/frontend/app/layout.tsx +++ b/frontend/app/layout.tsx @@ -3,11 +3,11 @@ import { Inter } from "next/font/google"; import "./globals.css"; import "./markdown.css"; -const inter = Inter({ subsets: ["latin"] }); +const inter = Inter({ subsets: ["latin", "latin-ext"] }); export const metadata: Metadata = { - title: "Create Llama App", - description: "Generated by create-llama", + title: "博微造价工程文件知识问答", + description: "博微技术中心搭建的造价工程文件知识问答", }; export default function RootLayout({ @@ -16,7 +16,7 @@ export default function RootLayout({ children: React.ReactNode; }) { return ( - + {children} ); diff --git a/frontend/app/page.tsx b/frontend/app/page.tsx index 04d4302..44625f9 100644 --- a/frontend/app/page.tsx +++ b/frontend/app/page.tsx @@ -4,7 +4,7 @@ import ChatSection from "./components/chat-section"; export default function Home() { return (
-
+
diff --git a/frontend/run.bat b/frontend/run.bat new file mode 100644 index 0000000..b896a08 --- /dev/null +++ b/frontend/run.bat @@ -0,0 +1 @@ +npm run dev \ No newline at end of file diff --git a/phoenixserver/.env b/phoenixserver/.env new file mode 100644 index 0000000..1754812 --- /dev/null +++ b/phoenixserver/.env @@ -0,0 +1,3 @@ +ENV_PHOENIX_HOST=0.0.0.0 +ENV_PHOENIX_PORT=6006 +PHOENIX_HOST_ROOT_PATH=./.phoenix/ \ No newline at end of file diff --git a/phoenixserver/phoenixserver.py b/phoenixserver/phoenixserver.py new file mode 100644 index 0000000..7dd0e91 --- /dev/null +++ b/phoenixserver/phoenixserver.py @@ -0,0 +1,18 @@ +import os +import phoenix as px + + +os.environ['PHOENIX_HOST'] = "0.0.0.0" + +session = px.launch_app(use_temp_dir=False) + +import msvcrt + +def wait_for_keypress(): + print("Press any key to continue...") + msvcrt.getch() # 等待按键 + print("\nKey pressed!") + +wait_for_keypress() + +px.close_app() \ No newline at end of file diff --git a/phoenixserver/runphoenixserver.bat b/phoenixserver/runphoenixserver.bat new file mode 100644 index 0000000..a593c5d --- /dev/null +++ b/phoenixserver/runphoenixserver.bat @@ -0,0 +1,5 @@ +SET ENV_PHOENIX_HOST=0.0.0.0 +SET ENV_PHOENIX_PORT=6006 +SET PHOENIX_HOST_ROOT_PATH=./.phoenix/ + +C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-pCyqx0Uo-py3.11\Scripts\python phoenixserver.py \ No newline at end of file