2024-08-29 10:13:10 +08:00
4 changed files with 55 additions and 65 deletions
@@ -1,5 +1,4 @@
 import logging
 import yaml
 from app.engine.loaders.db import DBLoaderConfig, get_db_documents
 from app.engine.loaders.file import FileLoaderConfig, get_file_documents
@@ -17,10 +16,12 @@ def load_configs():
 def get_documents():
    documents = []
    config = load_configs()
    if config is None or len(config.items()) == 0:
        return documents
    for loader_type, loader_config in config.items():
        if loader_config.get('enable', True):  # 检查 enable 字段
            logger.info(
                f"Loading documents from loader: {loader_type}, config: {loader_config}"
            )
@@ -2,17 +2,14 @@ import logging
 from typing import Any, List, Optional
 from llama_index.core import SQLDatabase, Document
 from llama_index.core.objects import SQLTableSchema
 from llama_index.core.readers.base import BaseReader
 from llama_index.readers.database import DatabaseReader
 from pydantic import BaseModel
-from sqlalchemy import create_engine
+from sqlalchemy import create_engine, text
 from sqlalchemy import text
 from sqlalchemy.engine import Engine
 logger = logging.getLogger(__name__)
-class CustomDatabaseReader(BaseReader):
+class CustomDatabaseReader(DatabaseReader):
    """Simple Database reader.
    Concatenates each row into Document used by LlamaIndex.
@@ -86,18 +83,19 @@ class CustomDatabaseReader(BaseReader):
            List[Document]: A list of Document objects.
        """
        dco_str = ""  
        with self.sql_database.engine.connect() as connection:
            if query is None:
                raise ValueError("A query parameter is necessary to filter the data")
            else:
                result = connection.execute(text(query))
-            dco_str = ", ".join(
+            dco_str += ", ".join(
                [f"{entry}" for entry in result.keys()]
-            )
+            ) + "\n"
            for item in result.fetchall():
-                # fetch each item
+                # Fetch each item
                record_str = ", ".join(
                    [f"{entry}" for col, entry in zip(result.keys(), item)]
                )
@@ -111,45 +109,36 @@ class CustomDatabaseReader(BaseReader):
 class DBLoaderConfig(BaseModel):
    uri: str
-    queries: List[str]
+    queries: List[dict]  
-def get_db_documents(configs: list[DBLoaderConfig]):
+def get_db_documents(configs: List[DBLoaderConfig]) -> List[Document]:
    docs = []
-    if len(configs) == 0 or configs[0].uri == "":
+    if not configs or not configs[0].uri:
        logger.warning(
            f"Failed to load database, error message: uri is empty. Return as empty document list."
        )
        return docs
    metadata = {
        #'file_name':'',
        'file_type': 'application/booway.document.zj',
        #'file_path':'',
        #'file_size':'',
        #'creation_date':'',
        #'last_modified_date':'',
    }
    #from llama_index.readers.database import DatabaseReader
    for entry in configs:
        engine = create_engine(entry.uri)
        sql_database = SQLDatabase(engine)
        # table_schema_objs = makeDescriptionByEngine(sql_database)
        # table_node_mapping = SQLTableNodeMapping(sql_database)
        #
        # nodes = table_node_mapping.to_nodes(table_schema_objs)
        # for node in nodes:
        #     node.metadata.update(metadata)
        #
        # docs.extend(nodes)
        queries = entry.queries or []
        loader = CustomDatabaseReader(sql_database)
-        for query in queries:
+        for query_dict in entry.queries:
            query = query_dict.get("sql", "")
            explanation = query_dict.get("explanation", "")
            logger.info(f"Loading data from database with query: {query}")
            documents = loader.load_data(query=query)
-            docs.extend(documents)
+            # 添加解释到元数据中
            for doc in documents:
                doc.metadata["explanation"] = explanation
                doc.metadata.update(metadata)  # 更新或添加额外的元数据
                docs.append(doc)
    return docs
@@ -5,6 +5,8 @@ text_qa_template_str = (
    "你是一名博微造价工程数据查询助手，专精于电力工程文件中的信息。"
    "你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答，"
    "如同直接从文件中提取的内容。\n"
    "知识库中已经导入一个工程的全部数据，请你站在当前工程的角度回答用户关于工程文件的问题。\n"
    "例如：询问“此工程”指当前导入的工程。询问“此工程名称”指当前导入的工程的工程名称。\n"
    "## 技能\n"
    "### 技能 1: 数据查询与提供\n"
@@ -39,15 +41,19 @@ refine_template_str = (
    "这是原本的问题： {query_str}\n"
    "我们已经提供了回答: {existing_answer}\n"
    "现在我们有机会改进这个回答 "
-    "使用以下更多上下文（仅当需要用时）\n"
+    "使用以下更多上下文（仅当有助于改进回答时使用）\n"
    "你需要仔细的判断新的上下文的信息与原本问题必须一个字都不差，如果有一点差别，那就不能改变我现有的回答。\n"
    "在判断回答是否正确的时候，你应该仔细对比新的上下文中包含的信息是否与原本的问题一字不差，如果一字不差，才能当作新的正确回答。\n"
    "如果新的上下文对回答没有影响，或者原来的回答已经正确，不要在上次回答的后边再加上多余的补充信息，直接返回原本的回答。\n"
    "判断一下如果原回答正确，且在新的上下文仍然包含正确的回答，请将新的回答与原回答一起返回。\n"
    "------------\n"
    "{context_msg}\n"
    "------------\n"
-    "根据新的上下文, 请改进原来的回答。"
+    "如果回答中已经包含有正确答案，不要返回多余的解释等信息，只返回正确答案\n"
-    "如果新的上下文没有用, 直接返回原本的回答。\n"
+    "如果是表结构或者是数据库的相关内容，仅用于推导问题，不需要告诉用户数据库或表结构等物理信息。\n"
    "如果是表结构或者是数据库的相关内容，只用于推导问题，不需要告诉用户数据库或表结构等物理信息。\n"
    "改进的回答: "
 )
 refine_template = PromptTemplate(refine_template_str)
 summary_template_str = (
@@ -1,4 +1,5 @@
 file:
  enable: true  # 添加 enable 字段
  # use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable
  use_llama_parse: false
@@ -7,27 +8,20 @@ db:
  # uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
  # query: The query to fetch data from the database. E.g.: SELECT * FROM table
  - uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
-  #- uri: mysql+pymysql://zjinfo:Y6EAjEEdSYmskA8B@110.42.234.166:3306/zjinfo
+    enable: true  # 添加 enable 字段
 #  - uri: mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
    queries:
-      - sql: select * from ProjectProperties limit 30;
+      - sql: select * from ProjectProperties;
        explanation: "工程属性表数据，层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。"
      - sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable;
        explanation: "总算表数据，层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。"
-
+      - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '线路';
      - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '线路' limit 50;
        explanation: "专业类型为线路的项目划分表数据，层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
-
+      - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '余物清理';
      - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '余物清理' limit 50;
        explanation: "专业类型为余物清理的项目划分表数据，层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
-
+      - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '拆除线路';
      - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '拆除线路' limit 50;
        explanation: "专业类型为拆除线路的项目划分表数据，层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
      - sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee;
        explanation: "其他费用表数据，层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中"
 #web:
 #  driver_arguments:
 #    # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode