diff --git a/backend/app/engine/__init__.py b/backend/app/engine/__init__.py index 6e2a97a..4ee1c9c 100644 --- a/backend/app/engine/__init__.py +++ b/backend/app/engine/__init__.py @@ -31,13 +31,19 @@ def get_chat_engine(filters=None, params=None): summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool", description="适用于任何需要进行全面总结、概括的要求。", ) - query_engine = create_query_engine(index,top_k,use_reranker,filters) + query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = "COMPACT") query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool", description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后建议使用“zjdata_query_tool”工具。", ) + + query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = "TREE_SUMMARIZE") + query_engine_tool_1 = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool_1", + description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后,且在询问工程中单位的具体数值,例如用量,费率,合计,金额等的时候建议使用“zj_query_tool_1”工具。", + ) tools.append(summary_query_tool) tools.append(query_engine_tool) + tools.append(query_engine_tool_1) # Add additional tools tools += ToolFactory.from_env() diff --git a/backend/app/engine/engine.py b/backend/app/engine/engine.py index 379275e..4bbd993 100644 --- a/backend/app/engine/engine.py +++ b/backend/app/engine/engine.py @@ -86,7 +86,7 @@ def create_summary_query_engine(index, top_k=3, use_reranker=False, filters=None return summary_query_engine # Create a query engine -def create_query_engine(index, top_k=3, use_reranker=False, filters=None): +def create_query_engine(index, top_k=3, use_reranker=False, filters=None, response_mode=None): # 创建向量检索查询工具 postprocess = None if use_reranker: @@ -103,6 +103,7 @@ def create_query_engine(index, top_k=3, use_reranker=False, filters=None): node_postprocessors=postprocess, use_async=True, streaming=True, + ResponseMode = response_mode ) return query_engine \ No newline at end of file diff --git a/backend/app/engine/loaders/__init__.py b/backend/app/engine/loaders/__init__.py index a220170..4f585b4 100644 --- a/backend/app/engine/loaders/__init__.py +++ b/backend/app/engine/loaders/__init__.py @@ -1,5 +1,4 @@ import logging - import yaml from app.engine.loaders.db import DBLoaderConfig, get_db_documents from app.engine.loaders.file import FileLoaderConfig, get_file_documents @@ -17,24 +16,26 @@ def load_configs(): def get_documents(): documents = [] config = load_configs() + if config is None or len(config.items()) == 0: - return documents + return documents for loader_type, loader_config in config.items(): - logger.info( - f"Loading documents from loader: {loader_type}, config: {loader_config}" - ) + if loader_config.get('enable', True): # 检查 enable 字段 + logger.info( + f"Loading documents from loader: {loader_type}, config: {loader_config}" + ) - loader_config = loader_config or [] - match loader_type: - case "file": - document = get_file_documents(FileLoaderConfig(**loader_config)) - case "web": - document = get_web_documents(WebLoaderConfig(**loader_config)) - case "db": - document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config]) - case _: - raise ValueError(f"Invalid loader type: {loader_type}") - documents.extend(document) + loader_config = loader_config or [] + match loader_type: + case "file": + document = get_file_documents(FileLoaderConfig(**loader_config)) + case "web": + document = get_web_documents(WebLoaderConfig(**loader_config)) + case "db": + document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config]) + case _: + raise ValueError(f"Invalid loader type: {loader_type}") + documents.extend(document) - return documents + return documents \ No newline at end of file diff --git a/backend/app/engine/loaders/db.py b/backend/app/engine/loaders/db.py index d6310e2..00c0381 100644 --- a/backend/app/engine/loaders/db.py +++ b/backend/app/engine/loaders/db.py @@ -2,17 +2,14 @@ import logging from typing import Any, List, Optional from llama_index.core import SQLDatabase, Document -from llama_index.core.objects import SQLTableSchema -from llama_index.core.readers.base import BaseReader from llama_index.readers.database import DatabaseReader from pydantic import BaseModel -from sqlalchemy import create_engine -from sqlalchemy import text +from sqlalchemy import create_engine, text from sqlalchemy.engine import Engine logger = logging.getLogger(__name__) -class CustomDatabaseReader(BaseReader): +class CustomDatabaseReader(DatabaseReader): """Simple Database reader. Concatenates each row into Document used by LlamaIndex. @@ -85,19 +82,20 @@ class CustomDatabaseReader(BaseReader): Returns: List[Document]: A list of Document objects. """ - dco_str = "" + dco_str = "" + with self.sql_database.engine.connect() as connection: if query is None: raise ValueError("A query parameter is necessary to filter the data") else: result = connection.execute(text(query)) - dco_str = ", ".join( + dco_str += ", ".join( [f"{entry}" for entry in result.keys()] - ) + ) + "\n" for item in result.fetchall(): - # fetch each item + # Fetch each item record_str = ", ".join( [f"{entry}" for col, entry in zip(result.keys(), item)] ) @@ -111,45 +109,36 @@ class CustomDatabaseReader(BaseReader): class DBLoaderConfig(BaseModel): uri: str - queries: List[str] + queries: List[dict] -def get_db_documents(configs: list[DBLoaderConfig]): +def get_db_documents(configs: List[DBLoaderConfig]) -> List[Document]: docs = [] - if len(configs) == 0 or configs[0].uri == "": + if not configs or not configs[0].uri: logger.warning( f"Failed to load database, error message: uri is empty. Return as empty document list." ) return docs metadata = { - #'file_name':'', - 'file_type':'application/booway.document.zj', - #'file_path':'', - #'file_size':'', - #'creation_date':'', - #'last_modified_date':'', + 'file_type': 'application/booway.document.zj', } - #from llama_index.readers.database import DatabaseReader for entry in configs: engine = create_engine(entry.uri) sql_database = SQLDatabase(engine) - # table_schema_objs = makeDescriptionByEngine(sql_database) - # table_node_mapping = SQLTableNodeMapping(sql_database) - # - # nodes = table_node_mapping.to_nodes(table_schema_objs) - # for node in nodes: - # node.metadata.update(metadata) - # - # docs.extend(nodes) - - queries = entry.queries or [] loader = CustomDatabaseReader(sql_database) - for query in queries: + for query_dict in entry.queries: + query = query_dict.get("sql", "") + explanation = query_dict.get("explanation", "") logger.info(f"Loading data from database with query: {query}") documents = loader.load_data(query=query) - docs.extend(documents) - return docs + # 添加解释到元数据中 + for doc in documents: + doc.metadata["explanation"] = explanation + doc.metadata.update(metadata) # 更新或添加额外的元数据 + docs.append(doc) + + return docs \ No newline at end of file diff --git a/backend/app/engine/prompt.py b/backend/app/engine/prompt.py index 101b6bf..5871562 100644 --- a/backend/app/engine/prompt.py +++ b/backend/app/engine/prompt.py @@ -5,6 +5,8 @@ text_qa_template_str = ( "你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。" "你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答," "如同直接从文件中提取的内容。\n" + "知识库中已经导入一个工程的全部数据,请你站在当前工程的角度回答用户关于工程文件的问题。\n" + "例如:询问“此工程”指当前导入的工程。询问“此工程名称”指当前导入的工程的工程名称。\n" "## 技能\n" "### 技能 1: 数据查询与提供\n" @@ -39,15 +41,19 @@ refine_template_str = ( "这是原本的问题: {query_str}\n" "我们已经提供了回答: {existing_answer}\n" "现在我们有机会改进这个回答 " - "使用以下更多上下文(仅当需要用时)\n" + "使用以下更多上下文(仅当有助于改进回答时使用)\n" + "你需要仔细的判断新的上下文的信息与原本问题必须一个字都不差,如果有一点差别,那就不能改变我现有的回答。\n" + "在判断回答是否正确的时候,你应该仔细对比新的上下文中包含的信息是否与原本的问题一字不差,如果一字不差,才能当作新的正确回答。\n" + "如果新的上下文对回答没有影响,或者原来的回答已经正确,不要在上次回答的后边再加上多余的补充信息,直接返回原本的回答。\n" + "判断一下如果原回答正确,且在新的上下文仍然包含正确的回答,请将新的回答与原回答一起返回。\n" "------------\n" "{context_msg}\n" "------------\n" - "根据新的上下文, 请改进原来的回答。" - "如果新的上下文没有用, 直接返回原本的回答。\n" - "如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n" + "如果回答中已经包含有正确答案,不要返回多余的解释等信息,只返回正确答案\n" + "如果是表结构或者是数据库的相关内容,仅用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n" "改进的回答: " ) + refine_template = PromptTemplate(refine_template_str) summary_template_str = ( diff --git a/backend/config/loaders.yaml b/backend/config/loaders.yaml index c69c13e..af5d2fe 100644 --- a/backend/config/loaders.yaml +++ b/backend/config/loaders.yaml @@ -1,4 +1,5 @@ file: + enable: true # 添加 enable 字段 # use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable use_llama_parse: false @@ -7,27 +8,41 @@ db: # uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db # query: The query to fetch data from the database. E.g.: SELECT * FROM table - uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1 - #- uri: mysql+pymysql://zjinfo:Y6EAjEEdSYmskA8B@110.42.234.166:3306/zjinfo -# - uri: mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2 + enable: true # 添加 enable 字段 queries: - - sql: select * from ProjectProperties limit 30; + - sql: select * from ProjectProperties; explanation: "工程属性表数据,层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。" - sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable; explanation: "总算表数据,层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。" - - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '线路' limit 50; + - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '线路'; explanation: "专业类型为线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。" - - - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '余物清理' limit 50; + - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '余物清理'; explanation: "专业类型为余物清理的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。" - - - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '拆除线路' limit 50; + - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '拆除线路'; explanation: "专业类型为拆除线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。" - sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee; explanation: "其他费用表数据,层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中" + - sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表' + explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中" + - sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(调试工程)aa' + explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中" + - sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '大型土石方取费表' + explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中" + - sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)' + explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中" + - sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)(1)' + explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中" + - sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(拆除)' + explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中" + + - sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '线路' + explanation: "专业类型为线路的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中" + - sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '余物清理' + explanation: "专业类型为余物清理的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中" #web: # driver_arguments: # # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode diff --git a/backend/data/博微电力造价工程业务数据说明.docx b/backend/data/博微电力造价工程业务数据说明.docx index 425772f..670ce04 100644 Binary files a/backend/data/博微电力造价工程业务数据说明.docx and b/backend/data/博微电力造价工程业务数据说明.docx differ