dev #5

Closed
ly wants to merge 93 commits from dev into dev-db
3 changed files with 91 additions and 59 deletions
Showing only changes of commit 8a5facb5b6 - Show all commits
+13 -12
View File
@@ -1,28 +1,27 @@
import logging import logging
# import yaml import yaml
from app.engine.loaders.db import DBLoaderConfig, get_db_documents
from app.engine.loaders.file import FileLoaderConfig, get_file_documents from app.engine.loaders.file import FileLoaderConfig, get_file_documents
from app.engine.loaders.web import WebLoaderConfig, get_web_documents from app.engine.loaders.web import WebLoaderConfig, get_web_documents
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
# 注释掉 load_configs 函数
# def load_configs(): def load_configs():
# with open("config/loaders.yaml") as f: with open("config/loaders.yaml") as f:
# configs = yaml.safe_load(f) configs = yaml.safe_load(f)
# return configs return configs
def get_documents(): def get_documents():
documents = [] documents = []
# 注释掉对 load_configs 的调用 config = load_configs()
# config = load_configs()
# if config is None or len(config.items()) == 0:
# return documents
# 使用一个空的 config 替代原有的配置加载逻辑 if config is None or len(config.items()) == 0:
config = {} return documents
for loader_type, loader_config in config.items(): for loader_type, loader_config in config.items():
if loader_config.get('enable', True): # 检查 enable 字段
logger.info( logger.info(
f"Loading documents from loader: {loader_type}, config: {loader_config}" f"Loading documents from loader: {loader_type}, config: {loader_config}"
) )
@@ -33,6 +32,8 @@ def get_documents():
document = get_file_documents(FileLoaderConfig(**loader_config)) document = get_file_documents(FileLoaderConfig(**loader_config))
case "web": case "web":
document = get_web_documents(WebLoaderConfig(**loader_config)) document = get_web_documents(WebLoaderConfig(**loader_config))
case "db":
document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config])
case _: case _:
raise ValueError(f"Invalid loader type: {loader_type}") raise ValueError(f"Invalid loader type: {loader_type}")
documents.extend(document) documents.extend(document)
+58 -21
View File
@@ -1,12 +1,15 @@
import logging import logging
from typing import Any, List, Optional from typing import Any, List, Optional
from llama_index.core import Document from llama_index.core import SQLDatabase, Document
from llama_index.readers.database import DatabaseReader
from pydantic import BaseModel from pydantic import BaseModel
from sqlalchemy import create_engine, text
from sqlalchemy.engine import Engine
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class CustomDatabaseReader: class CustomDatabaseReader(DatabaseReader):
"""Simple Database reader. """Simple Database reader.
Concatenates each row into Document used by LlamaIndex. Concatenates each row into Document used by LlamaIndex.
@@ -39,8 +42,8 @@ class CustomDatabaseReader:
def __init__( def __init__(
self, self,
sql_database: Optional[Any] = None, sql_database: Optional[SQLDatabase] = None,
engine: Optional[Any] = None, engine: Optional[Engine] = None,
uri: Optional[str] = None, uri: Optional[str] = None,
scheme: Optional[str] = None, scheme: Optional[str] = None,
host: Optional[str] = None, host: Optional[str] = None,
@@ -52,24 +55,51 @@ class CustomDatabaseReader:
**kwargs: Any, **kwargs: Any,
) -> None: ) -> None:
"""Initialize with parameters.""" """Initialize with parameters."""
# Setting the database-related properties to None if sql_database:
self.sql_database = None self.sql_database = sql_database
self.uri = None elif engine:
self.sql_database = SQLDatabase(engine, *args, **kwargs)
elif uri:
self.uri = uri
self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs)
elif scheme and host and port and user and password and dbname:
uri = f"{scheme}://{user}:{password}@{host}:{port}/{dbname}"
self.uri = uri
self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs)
else:
raise ValueError(
"You must provide either a SQLDatabase, "
"a SQL Alchemy Engine, a valid connection URI, or a valid "
"set of credentials."
)
def load_data(self, query: str, explanation: str) -> List[Document]: def load_data(self, query: str) -> List[Document]:
"""Simulate loading data without a database connection. """Query and load data from the Database, returning a list of Documents.
Args: Args:
query (str): Query parameter (not used). query (str): Query parameter to filter tables and rows.
explanation (str): Explanation to be included in the document.
Returns: Returns:
List[Document]: A list of Document objects. List[Document]: A list of Document objects.
""" """
dco_str = explanation + "\n" dco_str = ""
# Simulate data without querying a real database
dco_str += "Simulated column1, Simulated column2\n" with self.sql_database.engine.connect() as connection:
dco_str += "Simulated data1, Simulated data2\n" if query is None:
raise ValueError("A query parameter is necessary to filter the data")
else:
result = connection.execute(text(query))
dco_str += ", ".join(
[f"{entry}" for entry in result.keys()]
) + "\n"
for item in result.fetchall():
# Fetch each item
record_str = ", ".join(
[f"{entry}" for col, entry in zip(result.keys(), item)]
)
dco_str += record_str + "\n"
doc = Document(text=dco_str) doc = Document(text=dco_str)
doc.metadata["name"] = query doc.metadata["name"] = query
@@ -81,10 +111,10 @@ class DBLoaderConfig(BaseModel):
uri: str uri: str
queries: List[dict] queries: List[dict]
def get_db_documents(configs: list[DBLoaderConfig]): def get_db_documents(configs: List[DBLoaderConfig]) -> List[Document]:
docs = [] docs = []
if len(configs) == 0 or configs[0].uri == "": if not configs or not configs[0].uri:
logger.warning( logger.warning(
f"Failed to load database, error message: uri is empty. Return as empty document list." f"Failed to load database, error message: uri is empty. Return as empty document list."
) )
@@ -95,13 +125,20 @@ def get_db_documents(configs: list[DBLoaderConfig]):
} }
for entry in configs: for entry in configs:
# Skipping the database connection part engine = create_engine(entry.uri)
loader = CustomDatabaseReader() sql_database = SQLDatabase(engine)
loader = CustomDatabaseReader(sql_database)
for query_dict in entry.queries: for query_dict in entry.queries:
query = query_dict.get("sql", "") query = query_dict.get("sql", "")
explanation = query_dict.get("explanation", "") explanation = query_dict.get("explanation", "")
logger.info(f"Loading data from database with query: {query}") logger.info(f"Loading data from database with query: {query}")
documents = loader.load_data(query=query, explanation=explanation) documents = loader.load_data(query=query)
# 添加解释到元数据中
for doc in documents:
doc.metadata["explanation"] = explanation
doc.metadata.update(metadata) # 更新或添加额外的元数据
docs.append(doc)
docs.extend(documents)
return docs return docs
+6 -12
View File
@@ -1,4 +1,5 @@
file: file:
enable: true # 添加 enable 字段
# use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable # use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable
use_llama_parse: false use_llama_parse: false
@@ -7,27 +8,20 @@ db:
# uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db # uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
# query: The query to fetch data from the database. E.g.: SELECT * FROM table # query: The query to fetch data from the database. E.g.: SELECT * FROM table
- uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1 - uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
#- uri: mysql+pymysql://zjinfo:Y6EAjEEdSYmskA8B@110.42.234.166:3306/zjinfo enable: true # 添加 enable 字段
# - uri: mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
queries: queries:
- sql: select * from ProjectProperties limit 30; - sql: select * from ProjectProperties;
explanation: "工程属性表数据,层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。" explanation: "工程属性表数据,层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。"
- sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable; - sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable;
explanation: "总算表数据,层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。" explanation: "总算表数据,层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。"
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '线路';
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '线路' limit 50;
explanation: "专业类型为线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。" explanation: "专业类型为线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '余物清理';
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '余物清理' limit 50;
explanation: "专业类型为余物清理的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。" explanation: "专业类型为余物清理的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '拆除线路';
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '拆除线路' limit 50;
explanation: "专业类型为拆除线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。" explanation: "专业类型为拆除线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
- sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee; - sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee;
explanation: "其他费用表数据,层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中" explanation: "其他费用表数据,层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中"
#web: #web:
# driver_arguments: # driver_arguments:
# # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode # # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode