修改知识库范围
This commit is contained in:
+8
-6
@@ -9,13 +9,15 @@ MODEL_LIST=Qwen2.5-72B=openai:Qwen2.5-72B-Instruct-GPTQ-Int8
|
||||
MODEL_BASE_URL=http://172.20.0.145:9995/v1
|
||||
|
||||
# 文件路径配置
|
||||
KNOWLEDGE_SOURCE_DIR=data
|
||||
MEMORY_DB_FILE=tmp/agent_memory.db
|
||||
VECTOR_DB_PATH=tmp/lancedb
|
||||
SESSION_STORAGE_PATH=tmp/agent_sessions_json
|
||||
MINGCI_KNOWLEDGE_SOURCE_DIR=data/业务名词库
|
||||
MINGCI_VECTOR_DB_PATH=tmp/mingcidb
|
||||
|
||||
# 知识库加载控制
|
||||
LOAD_KNOWLEDGE=true
|
||||
KNOWLEDGE_SOURCE_DIR=data/控件布局
|
||||
VECTOR_DB_PATH=tmp/knowledgedb
|
||||
|
||||
MEMORY_DB_FILE=tmp/agent_memory.db
|
||||
|
||||
SESSION_STORAGE_PATH=tmp/agent_sessions_json
|
||||
|
||||
AGNO_MONITOR=true
|
||||
AGNO_TELEMETRY=true
|
||||
|
||||
+23
-2
@@ -130,8 +130,17 @@ def initialize_memory(model) -> AgentMemory:
|
||||
def initialize_vector_db() -> LanceDb:
|
||||
"""初始化并返回配置好的LanceDb实例"""
|
||||
return LanceDb(
|
||||
table_name="recipes",
|
||||
uri=os.getenv("VECTOR_DB_PATH", "tmp/lancedb"),
|
||||
table_name="knowledge",
|
||||
uri=os.getenv("VECTOR_DB_PATH", "tmp/knowledgedb"),
|
||||
search_type=SearchType.hybrid,
|
||||
embedder=OpenAIEmbedder(id=embedding_model, base_url=embedding_baseUrl, api_key=api_key)
|
||||
)
|
||||
|
||||
def initialize_mingci_vector_db() -> LanceDb:
|
||||
"""初始化并返回配置好的LanceDb实例"""
|
||||
return LanceDb(
|
||||
table_name="mingci",
|
||||
uri=os.getenv("MINGCI_VECTOR_DB_PATH", "tmp/mingcidb"),
|
||||
search_type=SearchType.hybrid,
|
||||
embedder=OpenAIEmbedder(id=embedding_model, base_url=embedding_baseUrl, api_key=api_key)
|
||||
)
|
||||
@@ -149,6 +158,18 @@ def initialize_knowledge_base() -> AgentKnowledge:
|
||||
reader=TextReader(), # 默认文本读取器
|
||||
)
|
||||
|
||||
def initialize_mingci_knowledge_base() -> AgentKnowledge:
|
||||
"""初始化并返回配置好的AgentKnowledge实例"""
|
||||
return AgentKnowledge(
|
||||
vector_db=initialize_mingci_vector_db(),
|
||||
num_documents=3, # 检索3个最相关的文档
|
||||
chunking_strategy=DocumentChunking(
|
||||
chunk_size=500,
|
||||
overlap=50,
|
||||
), # 固定大小分块
|
||||
optimize_on=1000, # 每1000条数据进行向量优化
|
||||
reader=TextReader(), # 默认文本读取器
|
||||
)
|
||||
|
||||
def get_agentic_rag_agent(
|
||||
model_id: str = "openai:gpt-4o",
|
||||
|
||||
@@ -5,7 +5,7 @@ from agno.document import Document
|
||||
from agno.utils.log import logger
|
||||
from dotenv import load_dotenv
|
||||
|
||||
from agentic_rag import initialize_knowledge_base, get_reader
|
||||
from agentic_rag import initialize_knowledge_base, get_reader, initialize_mingci_knowledge_base
|
||||
|
||||
# 加载.env文件
|
||||
load_dotenv()
|
||||
@@ -14,12 +14,21 @@ import os
|
||||
def main():
|
||||
print("Hello from agno-agentic-rag!")
|
||||
# 从.env加载知识库来源目录并初始化知识库
|
||||
load_knowledge = os.getenv("LOAD_KNOWLEDGE", "false").lower() == "true"
|
||||
mingci_knowledge_source_dir = os.getenv("MINGCI_KNOWLEDGE_SOURCE_DIR")
|
||||
if mingci_knowledge_source_dir and os.path.exists(mingci_knowledge_source_dir):
|
||||
# 初始化知识库
|
||||
knowledge_base = initialize_mingci_knowledge_base()
|
||||
|
||||
LoadKnowledgeToDatabase(knowledge_base, mingci_knowledge_source_dir)
|
||||
|
||||
knowledge_source_dir = os.getenv("KNOWLEDGE_SOURCE_DIR")
|
||||
if load_knowledge and knowledge_source_dir and os.path.exists(knowledge_source_dir):
|
||||
if knowledge_source_dir and os.path.exists(knowledge_source_dir):
|
||||
# 初始化知识库
|
||||
knowledge_base = initialize_knowledge_base()
|
||||
|
||||
LoadKnowledgeToDatabase(knowledge_base, knowledge_source_dir)
|
||||
|
||||
def LoadKnowledgeToDatabase(knowledge_base, knowledge_source_dir):
|
||||
logger.info(f"加载知识库: {knowledge_source_dir}")
|
||||
for root, _, files in os.walk(knowledge_source_dir):
|
||||
for file in files:
|
||||
|
||||
@@ -11,6 +11,7 @@ dependencies = [
|
||||
"nest-asyncio>=1.6.0",
|
||||
"streamlit>=1.44.1",
|
||||
"openai",
|
||||
"pylance",
|
||||
"extra-streamlit-components>=0.1.71",
|
||||
"sqlalchemy>=2.0.38",
|
||||
"websockets>=14.2",
|
||||
|
||||
Reference in New Issue
Block a user