dev #5
@@ -1,24 +1,26 @@
|
|||||||
import logging
|
import logging
|
||||||
|
# import yaml
|
||||||
import yaml
|
|
||||||
from app.engine.loaders.db import DBLoaderConfig, get_db_documents
|
|
||||||
from app.engine.loaders.file import FileLoaderConfig, get_file_documents
|
from app.engine.loaders.file import FileLoaderConfig, get_file_documents
|
||||||
from app.engine.loaders.web import WebLoaderConfig, get_web_documents
|
from app.engine.loaders.web import WebLoaderConfig, get_web_documents
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
# 注释掉 load_configs 函数
|
||||||
def load_configs():
|
# def load_configs():
|
||||||
with open("config/loaders.yaml") as f:
|
# with open("config/loaders.yaml") as f:
|
||||||
configs = yaml.safe_load(f)
|
# configs = yaml.safe_load(f)
|
||||||
return configs
|
# return configs
|
||||||
|
|
||||||
|
|
||||||
def get_documents():
|
def get_documents():
|
||||||
documents = []
|
documents = []
|
||||||
config = load_configs()
|
# 注释掉对 load_configs 的调用
|
||||||
if config is None or len(config.items()) == 0:
|
# config = load_configs()
|
||||||
return documents
|
# if config is None or len(config.items()) == 0:
|
||||||
|
# return documents
|
||||||
|
|
||||||
|
# 使用一个空的 config 替代原有的配置加载逻辑
|
||||||
|
config = {}
|
||||||
|
|
||||||
for loader_type, loader_config in config.items():
|
for loader_type, loader_config in config.items():
|
||||||
logger.info(
|
logger.info(
|
||||||
@@ -31,8 +33,6 @@ def get_documents():
|
|||||||
document = get_file_documents(FileLoaderConfig(**loader_config))
|
document = get_file_documents(FileLoaderConfig(**loader_config))
|
||||||
case "web":
|
case "web":
|
||||||
document = get_web_documents(WebLoaderConfig(**loader_config))
|
document = get_web_documents(WebLoaderConfig(**loader_config))
|
||||||
case "db":
|
|
||||||
document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config])
|
|
||||||
case _:
|
case _:
|
||||||
raise ValueError(f"Invalid loader type: {loader_type}")
|
raise ValueError(f"Invalid loader type: {loader_type}")
|
||||||
documents.extend(document)
|
documents.extend(document)
|
||||||
|
|||||||
@@ -1,18 +1,12 @@
|
|||||||
import logging
|
import logging
|
||||||
from typing import Any, List, Optional
|
from typing import Any, List, Optional
|
||||||
|
|
||||||
from llama_index.core import SQLDatabase, Document
|
from llama_index.core import Document
|
||||||
from llama_index.core.objects import SQLTableSchema
|
|
||||||
from llama_index.core.readers.base import BaseReader
|
|
||||||
from llama_index.readers.database import DatabaseReader
|
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from sqlalchemy import create_engine
|
|
||||||
from sqlalchemy import text
|
|
||||||
from sqlalchemy.engine import Engine
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
class CustomDatabaseReader(BaseReader):
|
class CustomDatabaseReader:
|
||||||
"""Simple Database reader.
|
"""Simple Database reader.
|
||||||
|
|
||||||
Concatenates each row into Document used by LlamaIndex.
|
Concatenates each row into Document used by LlamaIndex.
|
||||||
@@ -45,8 +39,8 @@ class CustomDatabaseReader(BaseReader):
|
|||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
sql_database: Optional[SQLDatabase] = None,
|
sql_database: Optional[Any] = None,
|
||||||
engine: Optional[Engine] = None,
|
engine: Optional[Any] = None,
|
||||||
uri: Optional[str] = None,
|
uri: Optional[str] = None,
|
||||||
scheme: Optional[str] = None,
|
scheme: Optional[str] = None,
|
||||||
host: Optional[str] = None,
|
host: Optional[str] = None,
|
||||||
@@ -58,50 +52,24 @@ class CustomDatabaseReader(BaseReader):
|
|||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> None:
|
) -> None:
|
||||||
"""Initialize with parameters."""
|
"""Initialize with parameters."""
|
||||||
if sql_database:
|
# Setting the database-related properties to None
|
||||||
self.sql_database = sql_database
|
self.sql_database = None
|
||||||
elif engine:
|
self.uri = None
|
||||||
self.sql_database = SQLDatabase(engine, *args, **kwargs)
|
|
||||||
elif uri:
|
|
||||||
self.uri = uri
|
|
||||||
self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs)
|
|
||||||
elif scheme and host and port and user and password and dbname:
|
|
||||||
uri = f"{scheme}://{user}:{password}@{host}:{port}/{dbname}"
|
|
||||||
self.uri = uri
|
|
||||||
self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs)
|
|
||||||
else:
|
|
||||||
raise ValueError(
|
|
||||||
"You must provide either a SQLDatabase, "
|
|
||||||
"a SQL Alchemy Engine, a valid connection URI, or a valid "
|
|
||||||
"set of credentials."
|
|
||||||
)
|
|
||||||
|
|
||||||
def load_data(self, query: str) -> List[Document]:
|
def load_data(self, query: str, explanation: str) -> List[Document]:
|
||||||
"""Query and load data from the Database, returning a list of Documents.
|
"""Simulate loading data without a database connection.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
query (str): Query parameter to filter tables and rows.
|
query (str): Query parameter (not used).
|
||||||
|
explanation (str): Explanation to be included in the document.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
List[Document]: A list of Document objects.
|
List[Document]: A list of Document objects.
|
||||||
"""
|
"""
|
||||||
dco_str = ""
|
dco_str = explanation + "\n"
|
||||||
with self.sql_database.engine.connect() as connection:
|
# Simulate data without querying a real database
|
||||||
if query is None:
|
dco_str += "Simulated column1, Simulated column2\n"
|
||||||
raise ValueError("A query parameter is necessary to filter the data")
|
dco_str += "Simulated data1, Simulated data2\n"
|
||||||
else:
|
|
||||||
result = connection.execute(text(query))
|
|
||||||
|
|
||||||
dco_str = ", ".join(
|
|
||||||
[f"{entry}" for entry in result.keys()]
|
|
||||||
)
|
|
||||||
|
|
||||||
for item in result.fetchall():
|
|
||||||
# fetch each item
|
|
||||||
record_str = ", ".join(
|
|
||||||
[f"{entry}" for col, entry in zip(result.keys(), item)]
|
|
||||||
)
|
|
||||||
dco_str += record_str + "\n"
|
|
||||||
|
|
||||||
doc = Document(text=dco_str)
|
doc = Document(text=dco_str)
|
||||||
doc.metadata["name"] = query
|
doc.metadata["name"] = query
|
||||||
@@ -111,7 +79,7 @@ class CustomDatabaseReader(BaseReader):
|
|||||||
|
|
||||||
class DBLoaderConfig(BaseModel):
|
class DBLoaderConfig(BaseModel):
|
||||||
uri: str
|
uri: str
|
||||||
queries: List[str]
|
queries: List[dict]
|
||||||
|
|
||||||
def get_db_documents(configs: list[DBLoaderConfig]):
|
def get_db_documents(configs: list[DBLoaderConfig]):
|
||||||
docs = []
|
docs = []
|
||||||
@@ -123,33 +91,17 @@ def get_db_documents(configs: list[DBLoaderConfig]):
|
|||||||
return docs
|
return docs
|
||||||
|
|
||||||
metadata = {
|
metadata = {
|
||||||
#'file_name':'',
|
'file_type': 'application/booway.document.zj',
|
||||||
'file_type':'application/booway.document.zj',
|
|
||||||
#'file_path':'',
|
|
||||||
#'file_size':'',
|
|
||||||
#'creation_date':'',
|
|
||||||
#'last_modified_date':'',
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#from llama_index.readers.database import DatabaseReader
|
|
||||||
for entry in configs:
|
for entry in configs:
|
||||||
engine = create_engine(entry.uri)
|
# Skipping the database connection part
|
||||||
sql_database = SQLDatabase(engine)
|
loader = CustomDatabaseReader()
|
||||||
|
for query_dict in entry.queries:
|
||||||
# table_schema_objs = makeDescriptionByEngine(sql_database)
|
query = query_dict.get("sql", "")
|
||||||
# table_node_mapping = SQLTableNodeMapping(sql_database)
|
explanation = query_dict.get("explanation", "")
|
||||||
#
|
|
||||||
# nodes = table_node_mapping.to_nodes(table_schema_objs)
|
|
||||||
# for node in nodes:
|
|
||||||
# node.metadata.update(metadata)
|
|
||||||
#
|
|
||||||
# docs.extend(nodes)
|
|
||||||
|
|
||||||
queries = entry.queries or []
|
|
||||||
loader = CustomDatabaseReader(sql_database)
|
|
||||||
for query in queries:
|
|
||||||
logger.info(f"Loading data from database with query: {query}")
|
logger.info(f"Loading data from database with query: {query}")
|
||||||
documents = loader.load_data(query=query)
|
documents = loader.load_data(query=query, explanation=explanation)
|
||||||
|
|
||||||
docs.extend(documents)
|
docs.extend(documents)
|
||||||
return docs
|
return docs
|
||||||
|
|||||||
@@ -5,6 +5,8 @@ text_qa_template_str = (
|
|||||||
"你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。"
|
"你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。"
|
||||||
"你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答,"
|
"你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答,"
|
||||||
"如同直接从文件中提取的内容。\n"
|
"如同直接从文件中提取的内容。\n"
|
||||||
|
"知识库中已经导入一个工程的全部数据,请你站在当前工程的角度回答用户关于工程文件的问题。\n"
|
||||||
|
"例如:询问“此工程”指当前导入的工程。询问“此工程名称”指当前导入的工程的工程名称。\n"
|
||||||
|
|
||||||
"## 技能\n"
|
"## 技能\n"
|
||||||
"### 技能 1: 数据查询与提供\n"
|
"### 技能 1: 数据查询与提供\n"
|
||||||
@@ -39,15 +41,17 @@ refine_template_str = (
|
|||||||
"这是原本的问题: {query_str}\n"
|
"这是原本的问题: {query_str}\n"
|
||||||
"我们已经提供了回答: {existing_answer}\n"
|
"我们已经提供了回答: {existing_answer}\n"
|
||||||
"现在我们有机会改进这个回答 "
|
"现在我们有机会改进这个回答 "
|
||||||
"使用以下更多上下文(仅当需要用时)\n"
|
"使用以下更多上下文(仅当有助于改进回答时使用)\n"
|
||||||
|
"如果新的上下文对回答没有影响,或者原来的回答已经正确,不要在上次回答的后边再加上多余的补充信息,直接返回原本的回答。\n"
|
||||||
|
"如果新的上下文对回答没有影响,或者原来的回答已经正确,不要在上次回答的后边再加上多余的补充信息,直接返回原本的回答。\n"
|
||||||
"------------\n"
|
"------------\n"
|
||||||
"{context_msg}\n"
|
"{context_msg}\n"
|
||||||
"------------\n"
|
"------------\n"
|
||||||
"根据新的上下文, 请改进原来的回答。"
|
"如果回答中已经包含有正确答案,不要返回多余的解释等信息,只返回正确答案\n"
|
||||||
"如果新的上下文没有用, 直接返回原本的回答。\n"
|
"如果是表结构或者是数据库的相关内容,仅用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
|
||||||
"如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
|
|
||||||
"改进的回答: "
|
"改进的回答: "
|
||||||
)
|
)
|
||||||
|
|
||||||
refine_template = PromptTemplate(refine_template_str)
|
refine_template = PromptTemplate(refine_template_str)
|
||||||
|
|
||||||
summary_template_str = (
|
summary_template_str = (
|
||||||
|
|||||||
Reference in New Issue
Block a user