Files
zjdataai-app/backend/app/engine/loaders/db.py
T
2024-08-28 09:39:57 +08:00

108 lines
3.3 KiB
Python

import logging
from typing import Any, List, Optional
from llama_index.core import Document
from pydantic import BaseModel
logger = logging.getLogger(__name__)
class CustomDatabaseReader:
"""Simple Database reader.
Concatenates each row into Document used by LlamaIndex.
Args:
sql_database (Optional[SQLDatabase]): SQL database to use,
including table names to specify.
See :ref:`Ref-Struct-Store` for more details.
OR
engine (Optional[Engine]): SQLAlchemy Engine object of the database connection.
OR
uri (Optional[str]): uri of the database connection.
OR
scheme (Optional[str]): scheme of the database connection.
host (Optional[str]): host of the database connection.
port (Optional[int]): port of the database connection.
user (Optional[str]): user of the database connection.
password (Optional[str]): password of the database connection.
dbname (Optional[str]): dbname of the database connection.
Returns:
DatabaseReader: A DatabaseReader object.
"""
def __init__(
self,
sql_database: Optional[Any] = None,
engine: Optional[Any] = None,
uri: Optional[str] = None,
scheme: Optional[str] = None,
host: Optional[str] = None,
port: Optional[str] = None,
user: Optional[str] = None,
password: Optional[str] = None,
dbname: Optional[str] = None,
*args: Any,
**kwargs: Any,
) -> None:
"""Initialize with parameters."""
# Setting the database-related properties to None
self.sql_database = None
self.uri = None
def load_data(self, query: str, explanation: str) -> List[Document]:
"""Simulate loading data without a database connection.
Args:
query (str): Query parameter (not used).
explanation (str): Explanation to be included in the document.
Returns:
List[Document]: A list of Document objects.
"""
dco_str = explanation + "\n"
# Simulate data without querying a real database
dco_str += "Simulated column1, Simulated column2\n"
dco_str += "Simulated data1, Simulated data2\n"
doc = Document(text=dco_str)
doc.metadata["name"] = query
doc.metadata["context"] = query
doc.metadata["file_type"] = "application/vnd.ms-excel"
return [doc]
class DBLoaderConfig(BaseModel):
uri: str
queries: List[dict]
def get_db_documents(configs: list[DBLoaderConfig]):
docs = []
if len(configs) == 0 or configs[0].uri == "":
logger.warning(
f"Failed to load database, error message: uri is empty. Return as empty document list."
)
return docs
metadata = {
'file_type': 'application/booway.document.zj',
}
for entry in configs:
# Skipping the database connection part
loader = CustomDatabaseReader()
for query_dict in entry.queries:
query = query_dict.get("sql", "")
explanation = query_dict.get("explanation", "")
logger.info(f"Loading data from database with query: {query}")
documents = loader.load_data(query=query, explanation=explanation)
docs.extend(documents)
return docs