dev #5
@@ -1,40 +1,41 @@
|
|||||||
import logging
|
import logging
|
||||||
# import yaml
|
import yaml
|
||||||
|
from app.engine.loaders.db import DBLoaderConfig, get_db_documents
|
||||||
from app.engine.loaders.file import FileLoaderConfig, get_file_documents
|
from app.engine.loaders.file import FileLoaderConfig, get_file_documents
|
||||||
from app.engine.loaders.web import WebLoaderConfig, get_web_documents
|
from app.engine.loaders.web import WebLoaderConfig, get_web_documents
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
# 注释掉 load_configs 函数
|
|
||||||
# def load_configs():
|
def load_configs():
|
||||||
# with open("config/loaders.yaml") as f:
|
with open("config/loaders.yaml") as f:
|
||||||
# configs = yaml.safe_load(f)
|
configs = yaml.safe_load(f)
|
||||||
# return configs
|
return configs
|
||||||
|
|
||||||
|
|
||||||
def get_documents():
|
def get_documents():
|
||||||
documents = []
|
documents = []
|
||||||
# 注释掉对 load_configs 的调用
|
config = load_configs()
|
||||||
# config = load_configs()
|
|
||||||
# if config is None or len(config.items()) == 0:
|
|
||||||
# return documents
|
|
||||||
|
|
||||||
# 使用一个空的 config 替代原有的配置加载逻辑
|
if config is None or len(config.items()) == 0:
|
||||||
config = {}
|
return documents
|
||||||
|
|
||||||
for loader_type, loader_config in config.items():
|
for loader_type, loader_config in config.items():
|
||||||
logger.info(
|
if loader_config.get('enable', True): # 检查 enable 字段
|
||||||
f"Loading documents from loader: {loader_type}, config: {loader_config}"
|
logger.info(
|
||||||
)
|
f"Loading documents from loader: {loader_type}, config: {loader_config}"
|
||||||
|
)
|
||||||
|
|
||||||
loader_config = loader_config or []
|
loader_config = loader_config or []
|
||||||
match loader_type:
|
match loader_type:
|
||||||
case "file":
|
case "file":
|
||||||
document = get_file_documents(FileLoaderConfig(**loader_config))
|
document = get_file_documents(FileLoaderConfig(**loader_config))
|
||||||
case "web":
|
case "web":
|
||||||
document = get_web_documents(WebLoaderConfig(**loader_config))
|
document = get_web_documents(WebLoaderConfig(**loader_config))
|
||||||
case _:
|
case "db":
|
||||||
raise ValueError(f"Invalid loader type: {loader_type}")
|
document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config])
|
||||||
documents.extend(document)
|
case _:
|
||||||
|
raise ValueError(f"Invalid loader type: {loader_type}")
|
||||||
|
documents.extend(document)
|
||||||
|
|
||||||
return documents
|
return documents
|
||||||
@@ -1,12 +1,15 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
from llama_index.core import Document
|
from llama_index.core import SQLDatabase, Document
|
||||||
|
from llama_index.readers.database import DatabaseReader
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
from sqlalchemy import create_engine, text
|
||||||
|
from sqlalchemy.engine import Engine
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class CustomDatabaseReader:
|
class CustomDatabaseReader(DatabaseReader):
|
||||||
"""Simple Database reader.
|
"""Simple Database reader.
|
||||||
|
|
||||||
Concatenates each row into Document used by LlamaIndex.
|
Concatenates each row into Document used by LlamaIndex.
|
||||||
@@ -39,8 +42,8 @@ class CustomDatabaseReader:
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
sql_database: Optional[Any] = None,
|
sql_database: Optional[SQLDatabase] = None,
|
||||||
engine: Optional[Any] = None,
|
engine: Optional[Engine] = None,
|
||||||
uri: Optional[str] = None,
|
uri: Optional[str] = None,
|
||||||
scheme: Optional[str] = None,
|
scheme: Optional[str] = None,
|
||||||
host: Optional[str] = None,
|
host: Optional[str] = None,
|
||||||
@@ -52,24 +55,51 @@ class CustomDatabaseReader:
|
|||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize with parameters."""
|
"""Initialize with parameters."""
|
||||||
# Setting the database-related properties to None
|
if sql_database:
|
||||||
self.sql_database = None
|
self.sql_database = sql_database
|
||||||
self.uri = None
|
elif engine:
|
||||||
|
self.sql_database = SQLDatabase(engine, *args, **kwargs)
|
||||||
|
elif uri:
|
||||||
|
self.uri = uri
|
||||||
|
self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs)
|
||||||
|
elif scheme and host and port and user and password and dbname:
|
||||||
|
uri = f"{scheme}://{user}:{password}@{host}:{port}/{dbname}"
|
||||||
|
self.uri = uri
|
||||||
|
self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"You must provide either a SQLDatabase, "
|
||||||
|
"a SQL Alchemy Engine, a valid connection URI, or a valid "
|
||||||
|
"set of credentials."
|
||||||
|
)
|
||||||
|
|
||||||
def load_data(self, query: str, explanation: str) -> List[Document]:
|
def load_data(self, query: str) -> List[Document]:
|
||||||
"""Simulate loading data without a database connection.
|
"""Query and load data from the Database, returning a list of Documents.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query (str): Query parameter (not used).
|
query (str): Query parameter to filter tables and rows.
|
||||||
explanation (str): Explanation to be included in the document.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Document]: A list of Document objects.
|
List[Document]: A list of Document objects.
|
||||||
"""
|
"""
|
||||||
dco_str = explanation + "\n"
|
dco_str = ""
|
||||||
# Simulate data without querying a real database
|
|
||||||
dco_str += "Simulated column1, Simulated column2\n"
|
with self.sql_database.engine.connect() as connection:
|
||||||
dco_str += "Simulated data1, Simulated data2\n"
|
if query is None:
|
||||||
|
raise ValueError("A query parameter is necessary to filter the data")
|
||||||
|
else:
|
||||||
|
result = connection.execute(text(query))
|
||||||
|
|
||||||
|
dco_str += ", ".join(
|
||||||
|
[f"{entry}" for entry in result.keys()]
|
||||||
|
) + "\n"
|
||||||
|
|
||||||
|
for item in result.fetchall():
|
||||||
|
# Fetch each item
|
||||||
|
record_str = ", ".join(
|
||||||
|
[f"{entry}" for col, entry in zip(result.keys(), item)]
|
||||||
|
)
|
||||||
|
dco_str += record_str + "\n"
|
||||||
|
|
||||||
doc = Document(text=dco_str)
|
doc = Document(text=dco_str)
|
||||||
doc.metadata["name"] = query
|
doc.metadata["name"] = query
|
||||||
@@ -81,10 +111,10 @@ class DBLoaderConfig(BaseModel):
|
|||||||
uri: str
|
uri: str
|
||||||
queries: List[dict]
|
queries: List[dict]
|
||||||
|
|
||||||
def get_db_documents(configs: list[DBLoaderConfig]):
|
def get_db_documents(configs: List[DBLoaderConfig]) -> List[Document]:
|
||||||
docs = []
|
docs = []
|
||||||
|
|
||||||
if len(configs) == 0 or configs[0].uri == "":
|
if not configs or not configs[0].uri:
|
||||||
logger.warning(
|
logger.warning(
|
||||||
f"Failed to load database, error message: uri is empty. Return as empty document list."
|
f"Failed to load database, error message: uri is empty. Return as empty document list."
|
||||||
)
|
)
|
||||||
@@ -95,13 +125,20 @@ def get_db_documents(configs: list[DBLoaderConfig]):
|
|||||||
}
|
}
|
||||||
|
|
||||||
for entry in configs:
|
for entry in configs:
|
||||||
# Skipping the database connection part
|
engine = create_engine(entry.uri)
|
||||||
loader = CustomDatabaseReader()
|
sql_database = SQLDatabase(engine)
|
||||||
|
|
||||||
|
loader = CustomDatabaseReader(sql_database)
|
||||||
for query_dict in entry.queries:
|
for query_dict in entry.queries:
|
||||||
query = query_dict.get("sql", "")
|
query = query_dict.get("sql", "")
|
||||||
explanation = query_dict.get("explanation", "")
|
explanation = query_dict.get("explanation", "")
|
||||||
logger.info(f"Loading data from database with query: {query}")
|
logger.info(f"Loading data from database with query: {query}")
|
||||||
documents = loader.load_data(query=query, explanation=explanation)
|
documents = loader.load_data(query=query)
|
||||||
|
|
||||||
|
# 添加解释到元数据中
|
||||||
|
for doc in documents:
|
||||||
|
doc.metadata["explanation"] = explanation
|
||||||
|
doc.metadata.update(metadata) # 更新或添加额外的元数据
|
||||||
|
docs.append(doc)
|
||||||
|
|
||||||
docs.extend(documents)
|
|
||||||
return docs
|
return docs
|
||||||
@@ -1,4 +1,5 @@
|
|||||||
file:
|
file:
|
||||||
|
enable: true # 添加 enable 字段
|
||||||
# use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable
|
# use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable
|
||||||
use_llama_parse: false
|
use_llama_parse: false
|
||||||
|
|
||||||
@@ -7,27 +8,20 @@ db:
|
|||||||
# uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
|
# uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
|
||||||
# query: The query to fetch data from the database. E.g.: SELECT * FROM table
|
# query: The query to fetch data from the database. E.g.: SELECT * FROM table
|
||||||
- uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
|
- uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
|
||||||
#- uri: mysql+pymysql://zjinfo:Y6EAjEEdSYmskA8B@110.42.234.166:3306/zjinfo
|
enable: true # 添加 enable 字段
|
||||||
# - uri: mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
|
|
||||||
queries:
|
queries:
|
||||||
- sql: select * from ProjectProperties limit 30;
|
- sql: select * from ProjectProperties;
|
||||||
explanation: "工程属性表数据,层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。"
|
explanation: "工程属性表数据,层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。"
|
||||||
|
|
||||||
- sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable;
|
- sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable;
|
||||||
explanation: "总算表数据,层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。"
|
explanation: "总算表数据,层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。"
|
||||||
|
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '线路';
|
||||||
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '线路' limit 50;
|
|
||||||
explanation: "专业类型为线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
explanation: "专业类型为线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||||
|
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '余物清理';
|
||||||
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '余物清理' limit 50;
|
|
||||||
explanation: "专业类型为余物清理的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
explanation: "专业类型为余物清理的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||||
|
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '拆除线路';
|
||||||
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '拆除线路' limit 50;
|
|
||||||
explanation: "专业类型为拆除线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
explanation: "专业类型为拆除线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||||
|
|
||||||
- sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee;
|
- sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee;
|
||||||
explanation: "其他费用表数据,层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中"
|
explanation: "其他费用表数据,层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中"
|
||||||
|
|
||||||
#web:
|
#web:
|
||||||
# driver_arguments:
|
# driver_arguments:
|
||||||
# # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode
|
# # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode
|
||||||
|
|||||||
Reference in New Issue
Block a user