Compare commits
2 Commits
01ed406dfa
...
b6b697efdb
| Author | SHA1 | Date | |
|---|---|---|---|
| b6b697efdb | |||
| 0ddf56a52a |
+13
-13
@@ -2,22 +2,22 @@
|
|||||||
# DB_URL=postgresql+psycopg://ai:ai@localhost:5532/ai
|
# DB_URL=postgresql+psycopg://ai:ai@localhost:5532/ai
|
||||||
|
|
||||||
# OpenAI API配置
|
# OpenAI API配置
|
||||||
API_KEY=sk-oftybjqntjpxlhkcxkieluljb
|
API_KEY=sk-oftybjqntjpxlhkc
|
||||||
EMBEDDING_MODEL=BAAI/bge-large-zh-v1.5
|
EMBEDDING_MODEL=bge-m3
|
||||||
EMBEDDING_BASE_URL=https://api.siliconflow.cn/v1
|
EMBEDDING_BASE_URL=http://10.1.16.39:9995/v1
|
||||||
MODEL_LIST=Qwen2.5-72B=openai:Qwen/Qwen2.5-72B-Instruct&gpt-4o=openai:gpt-4o
|
MODEL_LIST=Qwen2.5-72B=openai:Qwen2.5-72B-Instruct-GPTQ-Int8
|
||||||
|
MODEL_BASE_URL=http://172.20.0.145:9995/v1
|
||||||
# 模型配置
|
|
||||||
MODEL_BASE_URL=https://api.siliconflow.cn/v1
|
|
||||||
|
|
||||||
# 文件路径配置
|
# 文件路径配置
|
||||||
KNOWLEDGE_SOURCE_DIR=data
|
MINGCI_KNOWLEDGE_SOURCE_DIR=data/业务名词库
|
||||||
MEMORY_DB_FILE=tmp/agent_memory.db
|
MINGCI_VECTOR_DB_PATH=tmp/mingcidb
|
||||||
VECTOR_DB_PATH=tmp/lancedb
|
|
||||||
SESSION_STORAGE_PATH=tmp/agent_sessions_json
|
|
||||||
|
|
||||||
# 知识库加载控制
|
KNOWLEDGE_SOURCE_DIR=data/控件布局
|
||||||
LOAD_KNOWLEDGE=true
|
VECTOR_DB_PATH=tmp/knowledgedb
|
||||||
|
|
||||||
|
MEMORY_DB_FILE=tmp/agent_memory.db
|
||||||
|
|
||||||
|
SESSION_STORAGE_PATH=tmp/agent_sessions_json
|
||||||
|
|
||||||
AGNO_MONITOR=true
|
AGNO_MONITOR=true
|
||||||
AGNO_TELEMETRY=true
|
AGNO_TELEMETRY=true
|
||||||
|
|||||||
+26
-2
@@ -31,6 +31,7 @@
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from agno.document.chunking.document import DocumentChunking
|
from agno.document.chunking.document import DocumentChunking
|
||||||
|
from agno.models.deepseek import DeepSeek
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
# 加载.env文件
|
# 加载.env文件
|
||||||
@@ -107,6 +108,8 @@ def get_model_by_provider(provider: str, model_name: str):
|
|||||||
# return Claude(id=model_name)
|
# return Claude(id=model_name)
|
||||||
# elif provider == "groq":
|
# elif provider == "groq":
|
||||||
# return Groq(id=model_name)
|
# return Groq(id=model_name)
|
||||||
|
elif provider == "deepseek":
|
||||||
|
return DeepSeek(id=model_name, base_url=model_baseUrl, api_key=api_key)
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unsupported model provider: {provider}")
|
raise ValueError(f"Unsupported model provider: {provider}")
|
||||||
|
|
||||||
@@ -127,8 +130,17 @@ def initialize_memory(model) -> AgentMemory:
|
|||||||
def initialize_vector_db() -> LanceDb:
|
def initialize_vector_db() -> LanceDb:
|
||||||
"""初始化并返回配置好的LanceDb实例"""
|
"""初始化并返回配置好的LanceDb实例"""
|
||||||
return LanceDb(
|
return LanceDb(
|
||||||
table_name="recipes",
|
table_name="knowledge",
|
||||||
uri=os.getenv("VECTOR_DB_PATH", "tmp/lancedb"),
|
uri=os.getenv("VECTOR_DB_PATH", "tmp/knowledgedb"),
|
||||||
|
search_type=SearchType.hybrid,
|
||||||
|
embedder=OpenAIEmbedder(id=embedding_model, base_url=embedding_baseUrl, api_key=api_key)
|
||||||
|
)
|
||||||
|
|
||||||
|
def initialize_mingci_vector_db() -> LanceDb:
|
||||||
|
"""初始化并返回配置好的LanceDb实例"""
|
||||||
|
return LanceDb(
|
||||||
|
table_name="mingci",
|
||||||
|
uri=os.getenv("MINGCI_VECTOR_DB_PATH", "tmp/mingcidb"),
|
||||||
search_type=SearchType.hybrid,
|
search_type=SearchType.hybrid,
|
||||||
embedder=OpenAIEmbedder(id=embedding_model, base_url=embedding_baseUrl, api_key=api_key)
|
embedder=OpenAIEmbedder(id=embedding_model, base_url=embedding_baseUrl, api_key=api_key)
|
||||||
)
|
)
|
||||||
@@ -146,6 +158,18 @@ def initialize_knowledge_base() -> AgentKnowledge:
|
|||||||
reader=TextReader(), # 默认文本读取器
|
reader=TextReader(), # 默认文本读取器
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def initialize_mingci_knowledge_base() -> AgentKnowledge:
|
||||||
|
"""初始化并返回配置好的AgentKnowledge实例"""
|
||||||
|
return AgentKnowledge(
|
||||||
|
vector_db=initialize_mingci_vector_db(),
|
||||||
|
num_documents=3, # 检索3个最相关的文档
|
||||||
|
chunking_strategy=DocumentChunking(
|
||||||
|
chunk_size=500,
|
||||||
|
overlap=50,
|
||||||
|
), # 固定大小分块
|
||||||
|
optimize_on=1000, # 每1000条数据进行向量优化
|
||||||
|
reader=TextReader(), # 默认文本读取器
|
||||||
|
)
|
||||||
|
|
||||||
def get_agentic_rag_agent(
|
def get_agentic_rag_agent(
|
||||||
model_id: str = "openai:gpt-4o",
|
model_id: str = "openai:gpt-4o",
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from agno.document import Document
|
|||||||
from agno.utils.log import logger
|
from agno.utils.log import logger
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
from agentic_rag import initialize_knowledge_base, get_reader
|
from agentic_rag import initialize_knowledge_base, get_reader, initialize_mingci_knowledge_base
|
||||||
|
|
||||||
# 加载.env文件
|
# 加载.env文件
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
@@ -14,25 +14,34 @@ import os
|
|||||||
def main():
|
def main():
|
||||||
print("Hello from agno-agentic-rag!")
|
print("Hello from agno-agentic-rag!")
|
||||||
# 从.env加载知识库来源目录并初始化知识库
|
# 从.env加载知识库来源目录并初始化知识库
|
||||||
load_knowledge = os.getenv("LOAD_KNOWLEDGE", "false").lower() == "true"
|
mingci_knowledge_source_dir = os.getenv("MINGCI_KNOWLEDGE_SOURCE_DIR")
|
||||||
|
if mingci_knowledge_source_dir and os.path.exists(mingci_knowledge_source_dir):
|
||||||
|
# 初始化知识库
|
||||||
|
knowledge_base = initialize_mingci_knowledge_base()
|
||||||
|
|
||||||
|
LoadKnowledgeToDatabase(knowledge_base, mingci_knowledge_source_dir)
|
||||||
|
|
||||||
knowledge_source_dir = os.getenv("KNOWLEDGE_SOURCE_DIR")
|
knowledge_source_dir = os.getenv("KNOWLEDGE_SOURCE_DIR")
|
||||||
if load_knowledge and knowledge_source_dir and os.path.exists(knowledge_source_dir):
|
if knowledge_source_dir and os.path.exists(knowledge_source_dir):
|
||||||
# 初始化知识库
|
# 初始化知识库
|
||||||
knowledge_base = initialize_knowledge_base()
|
knowledge_base = initialize_knowledge_base()
|
||||||
|
|
||||||
logger.info(f"加载知识库: {knowledge_source_dir}")
|
LoadKnowledgeToDatabase(knowledge_base, knowledge_source_dir)
|
||||||
for root, _, files in os.walk(knowledge_source_dir):
|
|
||||||
for file in files:
|
def LoadKnowledgeToDatabase(knowledge_base, knowledge_source_dir):
|
||||||
file_path = os.path.join(root, file)
|
logger.info(f"加载知识库: {knowledge_source_dir}")
|
||||||
file_ext = os.path.splitext(file)[1][1:] # 获取文件扩展名
|
for root, _, files in os.walk(knowledge_source_dir):
|
||||||
reader = get_reader(file_ext)
|
for file in files:
|
||||||
if reader:
|
file_path = os.path.join(root, file)
|
||||||
try:
|
file_ext = os.path.splitext(file)[1][1:] # 获取文件扩展名
|
||||||
filePath = Path(file_path)
|
reader = get_reader(file_ext)
|
||||||
docs: List[Document] = reader.read(filePath)
|
if reader:
|
||||||
knowledge_base.load_documents(docs, upsert=True)
|
try:
|
||||||
except Exception as e:
|
filePath = Path(file_path)
|
||||||
logger.warning(f"无法加载文档 {file_path}: {str(e)}")
|
docs: List[Document] = reader.read(filePath)
|
||||||
|
knowledge_base.load_documents(docs, upsert=True)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"无法加载文档 {file_path}: {str(e)}")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ dependencies = [
|
|||||||
"nest-asyncio>=1.6.0",
|
"nest-asyncio>=1.6.0",
|
||||||
"streamlit>=1.44.1",
|
"streamlit>=1.44.1",
|
||||||
"openai",
|
"openai",
|
||||||
|
"pylance",
|
||||||
"extra-streamlit-components>=0.1.71",
|
"extra-streamlit-components>=0.1.71",
|
||||||
"sqlalchemy>=2.0.38",
|
"sqlalchemy>=2.0.38",
|
||||||
"websockets>=14.2",
|
"websockets>=14.2",
|
||||||
|
|||||||
Reference in New Issue
Block a user