From b008ad9766f8df021b272a54ca38f5079f8995cf Mon Sep 17 00:00:00 2001 From: chentianrui Date: Wed, 28 Aug 2024 09:39:57 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=94=B9=E4=BA=86=E6=8F=90=E7=A4=BA?= =?UTF-8?q?=E8=AF=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/app/engine/loaders/__init__.py | 26 +++---- backend/app/engine/loaders/db.py | 94 +++++++------------------- backend/app/engine/prompt.py | 12 ++-- 3 files changed, 44 insertions(+), 88 deletions(-) diff --git a/backend/app/engine/loaders/__init__.py b/backend/app/engine/loaders/__init__.py index a220170..241f68a 100644 --- a/backend/app/engine/loaders/__init__.py +++ b/backend/app/engine/loaders/__init__.py @@ -1,24 +1,26 @@ import logging - -import yaml -from app.engine.loaders.db import DBLoaderConfig, get_db_documents +# import yaml from app.engine.loaders.file import FileLoaderConfig, get_file_documents from app.engine.loaders.web import WebLoaderConfig, get_web_documents logger = logging.getLogger(__name__) - -def load_configs(): - with open("config/loaders.yaml") as f: - configs = yaml.safe_load(f) - return configs +# 注释掉 load_configs 函数 +# def load_configs(): +# with open("config/loaders.yaml") as f: +# configs = yaml.safe_load(f) +# return configs def get_documents(): documents = [] - config = load_configs() - if config is None or len(config.items()) == 0: - return documents + # 注释掉对 load_configs 的调用 + # config = load_configs() + # if config is None or len(config.items()) == 0: + # return documents + + # 使用一个空的 config 替代原有的配置加载逻辑 + config = {} for loader_type, loader_config in config.items(): logger.info( @@ -31,8 +33,6 @@ def get_documents(): document = get_file_documents(FileLoaderConfig(**loader_config)) case "web": document = get_web_documents(WebLoaderConfig(**loader_config)) - case "db": - document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config]) case _: raise ValueError(f"Invalid loader type: {loader_type}") documents.extend(document) diff --git a/backend/app/engine/loaders/db.py b/backend/app/engine/loaders/db.py index d6310e2..0289fb5 100644 --- a/backend/app/engine/loaders/db.py +++ b/backend/app/engine/loaders/db.py @@ -1,18 +1,12 @@ import logging from typing import Any, List, Optional -from llama_index.core import SQLDatabase, Document -from llama_index.core.objects import SQLTableSchema -from llama_index.core.readers.base import BaseReader -from llama_index.readers.database import DatabaseReader +from llama_index.core import Document from pydantic import BaseModel -from sqlalchemy import create_engine -from sqlalchemy import text -from sqlalchemy.engine import Engine logger = logging.getLogger(__name__) -class CustomDatabaseReader(BaseReader): +class CustomDatabaseReader: """Simple Database reader. Concatenates each row into Document used by LlamaIndex. @@ -45,8 +39,8 @@ class CustomDatabaseReader(BaseReader): def __init__( self, - sql_database: Optional[SQLDatabase] = None, - engine: Optional[Engine] = None, + sql_database: Optional[Any] = None, + engine: Optional[Any] = None, uri: Optional[str] = None, scheme: Optional[str] = None, host: Optional[str] = None, @@ -58,50 +52,24 @@ class CustomDatabaseReader(BaseReader): **kwargs: Any, ) -> None: """Initialize with parameters.""" - if sql_database: - self.sql_database = sql_database - elif engine: - self.sql_database = SQLDatabase(engine, *args, **kwargs) - elif uri: - self.uri = uri - self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs) - elif scheme and host and port and user and password and dbname: - uri = f"{scheme}://{user}:{password}@{host}:{port}/{dbname}" - self.uri = uri - self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs) - else: - raise ValueError( - "You must provide either a SQLDatabase, " - "a SQL Alchemy Engine, a valid connection URI, or a valid " - "set of credentials." - ) + # Setting the database-related properties to None + self.sql_database = None + self.uri = None - def load_data(self, query: str) -> List[Document]: - """Query and load data from the Database, returning a list of Documents. + def load_data(self, query: str, explanation: str) -> List[Document]: + """Simulate loading data without a database connection. Args: - query (str): Query parameter to filter tables and rows. + query (str): Query parameter (not used). + explanation (str): Explanation to be included in the document. Returns: List[Document]: A list of Document objects. """ - dco_str = "" - with self.sql_database.engine.connect() as connection: - if query is None: - raise ValueError("A query parameter is necessary to filter the data") - else: - result = connection.execute(text(query)) - - dco_str = ", ".join( - [f"{entry}" for entry in result.keys()] - ) - - for item in result.fetchall(): - # fetch each item - record_str = ", ".join( - [f"{entry}" for col, entry in zip(result.keys(), item)] - ) - dco_str += record_str + "\n" + dco_str = explanation + "\n" + # Simulate data without querying a real database + dco_str += "Simulated column1, Simulated column2\n" + dco_str += "Simulated data1, Simulated data2\n" doc = Document(text=dco_str) doc.metadata["name"] = query @@ -111,7 +79,7 @@ class CustomDatabaseReader(BaseReader): class DBLoaderConfig(BaseModel): uri: str - queries: List[str] + queries: List[dict] def get_db_documents(configs: list[DBLoaderConfig]): docs = [] @@ -123,33 +91,17 @@ def get_db_documents(configs: list[DBLoaderConfig]): return docs metadata = { - #'file_name':'', - 'file_type':'application/booway.document.zj', - #'file_path':'', - #'file_size':'', - #'creation_date':'', - #'last_modified_date':'', + 'file_type': 'application/booway.document.zj', } - #from llama_index.readers.database import DatabaseReader for entry in configs: - engine = create_engine(entry.uri) - sql_database = SQLDatabase(engine) - - # table_schema_objs = makeDescriptionByEngine(sql_database) - # table_node_mapping = SQLTableNodeMapping(sql_database) - # - # nodes = table_node_mapping.to_nodes(table_schema_objs) - # for node in nodes: - # node.metadata.update(metadata) - # - # docs.extend(nodes) - - queries = entry.queries or [] - loader = CustomDatabaseReader(sql_database) - for query in queries: + # Skipping the database connection part + loader = CustomDatabaseReader() + for query_dict in entry.queries: + query = query_dict.get("sql", "") + explanation = query_dict.get("explanation", "") logger.info(f"Loading data from database with query: {query}") - documents = loader.load_data(query=query) + documents = loader.load_data(query=query, explanation=explanation) docs.extend(documents) return docs diff --git a/backend/app/engine/prompt.py b/backend/app/engine/prompt.py index 101b6bf..29a2283 100644 --- a/backend/app/engine/prompt.py +++ b/backend/app/engine/prompt.py @@ -5,6 +5,8 @@ text_qa_template_str = ( "你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。" "你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答," "如同直接从文件中提取的内容。\n" + "知识库中已经导入一个工程的全部数据,请你站在当前工程的角度回答用户关于工程文件的问题。\n" + "例如:询问“此工程”指当前导入的工程。询问“此工程名称”指当前导入的工程的工程名称。\n" "## 技能\n" "### 技能 1: 数据查询与提供\n" @@ -39,15 +41,17 @@ refine_template_str = ( "这是原本的问题: {query_str}\n" "我们已经提供了回答: {existing_answer}\n" "现在我们有机会改进这个回答 " - "使用以下更多上下文(仅当需要用时)\n" + "使用以下更多上下文(仅当有助于改进回答时使用)\n" + "如果新的上下文对回答没有影响,或者原来的回答已经正确,不要在上次回答的后边再加上多余的补充信息,直接返回原本的回答。\n" + "如果新的上下文对回答没有影响,或者原来的回答已经正确,不要在上次回答的后边再加上多余的补充信息,直接返回原本的回答。\n" "------------\n" "{context_msg}\n" "------------\n" - "根据新的上下文, 请改进原来的回答。" - "如果新的上下文没有用, 直接返回原本的回答。\n" - "如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n" + "如果回答中已经包含有正确答案,不要返回多余的解释等信息,只返回正确答案\n" + "如果是表结构或者是数据库的相关内容,仅用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n" "改进的回答: " ) + refine_template = PromptTemplate(refine_template_str) summary_template_str = (