修改知识库范围

This commit is contained in:
2025-04-09 14:21:18 +08:00
parent 0ddf56a52a
commit b6b697efdb
4 changed files with 58 additions and 25 deletions
+8 -6
View File
@@ -9,13 +9,15 @@ MODEL_LIST=Qwen2.5-72B=openai:Qwen2.5-72B-Instruct-GPTQ-Int8
MODEL_BASE_URL=http://172.20.0.145:9995/v1 MODEL_BASE_URL=http://172.20.0.145:9995/v1
# 文件路径配置 # 文件路径配置
KNOWLEDGE_SOURCE_DIR=data MINGCI_KNOWLEDGE_SOURCE_DIR=data/业务名词库
MEMORY_DB_FILE=tmp/agent_memory.db MINGCI_VECTOR_DB_PATH=tmp/mingcidb
VECTOR_DB_PATH=tmp/lancedb
SESSION_STORAGE_PATH=tmp/agent_sessions_json
# 知识库加载控制 KNOWLEDGE_SOURCE_DIR=data/控件布局
LOAD_KNOWLEDGE=true VECTOR_DB_PATH=tmp/knowledgedb
MEMORY_DB_FILE=tmp/agent_memory.db
SESSION_STORAGE_PATH=tmp/agent_sessions_json
AGNO_MONITOR=true AGNO_MONITOR=true
AGNO_TELEMETRY=true AGNO_TELEMETRY=true
+23 -2
View File
@@ -130,8 +130,17 @@ def initialize_memory(model) -> AgentMemory:
def initialize_vector_db() -> LanceDb: def initialize_vector_db() -> LanceDb:
"""初始化并返回配置好的LanceDb实例""" """初始化并返回配置好的LanceDb实例"""
return LanceDb( return LanceDb(
table_name="recipes", table_name="knowledge",
uri=os.getenv("VECTOR_DB_PATH", "tmp/lancedb"), uri=os.getenv("VECTOR_DB_PATH", "tmp/knowledgedb"),
search_type=SearchType.hybrid,
embedder=OpenAIEmbedder(id=embedding_model, base_url=embedding_baseUrl, api_key=api_key)
)
def initialize_mingci_vector_db() -> LanceDb:
"""初始化并返回配置好的LanceDb实例"""
return LanceDb(
table_name="mingci",
uri=os.getenv("MINGCI_VECTOR_DB_PATH", "tmp/mingcidb"),
search_type=SearchType.hybrid, search_type=SearchType.hybrid,
embedder=OpenAIEmbedder(id=embedding_model, base_url=embedding_baseUrl, api_key=api_key) embedder=OpenAIEmbedder(id=embedding_model, base_url=embedding_baseUrl, api_key=api_key)
) )
@@ -149,6 +158,18 @@ def initialize_knowledge_base() -> AgentKnowledge:
reader=TextReader(), # 默认文本读取器 reader=TextReader(), # 默认文本读取器
) )
def initialize_mingci_knowledge_base() -> AgentKnowledge:
"""初始化并返回配置好的AgentKnowledge实例"""
return AgentKnowledge(
vector_db=initialize_mingci_vector_db(),
num_documents=3, # 检索3个最相关的文档
chunking_strategy=DocumentChunking(
chunk_size=500,
overlap=50,
), # 固定大小分块
optimize_on=1000, # 每1000条数据进行向量优化
reader=TextReader(), # 默认文本读取器
)
def get_agentic_rag_agent( def get_agentic_rag_agent(
model_id: str = "openai:gpt-4o", model_id: str = "openai:gpt-4o",
+25 -16
View File
@@ -5,7 +5,7 @@ from agno.document import Document
from agno.utils.log import logger from agno.utils.log import logger
from dotenv import load_dotenv from dotenv import load_dotenv
from agentic_rag import initialize_knowledge_base, get_reader from agentic_rag import initialize_knowledge_base, get_reader, initialize_mingci_knowledge_base
# 加载.env文件 # 加载.env文件
load_dotenv() load_dotenv()
@@ -14,25 +14,34 @@ import os
def main(): def main():
print("Hello from agno-agentic-rag!") print("Hello from agno-agentic-rag!")
# 从.env加载知识库来源目录并初始化知识库 # 从.env加载知识库来源目录并初始化知识库
load_knowledge = os.getenv("LOAD_KNOWLEDGE", "false").lower() == "true" mingci_knowledge_source_dir = os.getenv("MINGCI_KNOWLEDGE_SOURCE_DIR")
if mingci_knowledge_source_dir and os.path.exists(mingci_knowledge_source_dir):
# 初始化知识库
knowledge_base = initialize_mingci_knowledge_base()
LoadKnowledgeToDatabase(knowledge_base, mingci_knowledge_source_dir)
knowledge_source_dir = os.getenv("KNOWLEDGE_SOURCE_DIR") knowledge_source_dir = os.getenv("KNOWLEDGE_SOURCE_DIR")
if load_knowledge and knowledge_source_dir and os.path.exists(knowledge_source_dir): if knowledge_source_dir and os.path.exists(knowledge_source_dir):
# 初始化知识库 # 初始化知识库
knowledge_base = initialize_knowledge_base() knowledge_base = initialize_knowledge_base()
logger.info(f"加载知识库: {knowledge_source_dir}") LoadKnowledgeToDatabase(knowledge_base, knowledge_source_dir)
for root, _, files in os.walk(knowledge_source_dir):
for file in files: def LoadKnowledgeToDatabase(knowledge_base, knowledge_source_dir):
file_path = os.path.join(root, file) logger.info(f"加载知识库: {knowledge_source_dir}")
file_ext = os.path.splitext(file)[1][1:] # 获取文件扩展名 for root, _, files in os.walk(knowledge_source_dir):
reader = get_reader(file_ext) for file in files:
if reader: file_path = os.path.join(root, file)
try: file_ext = os.path.splitext(file)[1][1:] # 获取文件扩展名
filePath = Path(file_path) reader = get_reader(file_ext)
docs: List[Document] = reader.read(filePath) if reader:
knowledge_base.load_documents(docs, upsert=True) try:
except Exception as e: filePath = Path(file_path)
logger.warning(f"无法加载文档 {file_path}: {str(e)}") docs: List[Document] = reader.read(filePath)
knowledge_base.load_documents(docs, upsert=True)
except Exception as e:
logger.warning(f"无法加载文档 {file_path}: {str(e)}")
if __name__ == "__main__": if __name__ == "__main__":
+1
View File
@@ -11,6 +11,7 @@ dependencies = [
"nest-asyncio>=1.6.0", "nest-asyncio>=1.6.0",
"streamlit>=1.44.1", "streamlit>=1.44.1",
"openai", "openai",
"pylance",
"extra-streamlit-components>=0.1.71", "extra-streamlit-components>=0.1.71",
"sqlalchemy>=2.0.38", "sqlalchemy>=2.0.38",
"websockets>=14.2", "websockets>=14.2",