refactor: 移除硬编码配置,使用环境变量管理API密钥和路径

重构XinferenceEmbeddings和XinferenceReRankerModel类,移除硬编码的API密钥和路径
统一使用环境变量管理配置,提高安全性和灵活性
修改数据库路径为相对路径,增强可移植性
This commit is contained in:
2025-09-24 10:21:09 +08:00
parent 0541db1e34
commit d8d32ced9a
4 changed files with 30 additions and 23 deletions
+3 -3
View File
@@ -53,12 +53,12 @@ class BatchQueryResponse(BaseModel):
# 封装查询数据的相关代码
class QingDanDingEQueryService:
def __init__(self, db_path="/data/QueryRewrite/data/db/qingdan_ding_e_ku.db"):
self.db_path = db_path
def __init__(self):
self.db_path = f"{os.getcwd()}/data/db/qingdan_ding_e_ku.db"
self.top_k = TOP_K
# 初始化向量检索相关组件
self.embedding_function = XinferenceEmbeddings(api_key="")
self.embedding_function = XinferenceEmbeddings()
# 初始化向量数据库连接
self.ding_e_vector_db = SQLiteVSS(
+12 -10
View File
@@ -462,10 +462,10 @@ class ExcelToSQLiteProcessor:
print("数据库事务已提交,连接已关闭")
class CreateEmbedingData():
def __init__(self, db_path, api_key="aa"):
def __init__(self, db_path):
self.db_path = db_path
self.conn = sqlite3.connect(db_path)
self.embedding_function = XinferenceEmbeddings(api_key=api_key)
self.embedding_function = XinferenceEmbeddings()
def create_ding_e_zimu_embedding(self):
"""创建定额子目名称的向量索引"""
@@ -535,22 +535,24 @@ def main():
print("开始处理定额库和清单库Excel文件...")
# 配置参数
ding_e_base_dir = "/data/QueryRewrite/data/excel/Excel版 清单定额库/定额库"
qing_dan_base_dir = "/data/QueryRewrite/data/excel/Excel版 清单定额库/清单库"
db_path = "/data/QueryRewrite/data/db/qingdan_ding_e_ku copy.db"
ding_e_base_dir = f"{os.getcwd()}/data/excel/Excel版 清单定额库/定额库"
qing_dan_base_dir = f"{os.getcwd()}/data/excel/Excel版 清单定额库/清单库"
db_path = f"{os.getcwd()}/data/db/qingdan_ding_e_ku.db"
if os.path.exists(db_path):
print("数据库文件已存在, 任务结束...")
return
# 创建处理器实例
# processor = ExcelToSQLiteProcessor(db_path)
processor = ExcelToSQLiteProcessor(db_path)
try:
# 处理定额库文件
# processor.process_ding_e_files(ding_e_base_dir)
processor.process_ding_e_files(ding_e_base_dir)
# # 处理清单库文件
# processor.process_qing_dan_files(qing_dan_base_dir)
processor.process_qing_dan_files(qing_dan_base_dir)
# # 提交并关闭
# processor.commit_and_close()
processor.commit_and_close()
print("=" * 50)
print("所有Excel文件处理完成!数据已成功导入SQLite数据库")
@@ -28,7 +28,7 @@ def get_embedding_model(api_key: str = None) -> Embeddings:
Returns:
嵌入模型实例
"""
return XinferenceEmbeddings(api_key=api_key)
return XinferenceEmbeddings()
class ProfessionalNounVectorizer:
+14 -9
View File
@@ -23,13 +23,13 @@ from urllib.parse import urljoin
class XinferenceEmbeddings(Embeddings):
"""SiliconFlow嵌入模型封装"""
def __init__(self, api_key: str, model: str = os.getenv("EMBEDDING_MODEL_NAME", "bge-m3")):
self.api_key = api_key
def __init__(self, model: str = os.getenv("EMBEDDING_MODEL_NAME", "bge-m3")):
self.model = model
base_url = os.getenv("XINFERENCE_URL", "http://10.1.16.39:9995")
base_url = os.getenv("EMBEDDING_BASE_URL", "http://10.1.16.39:9995")
self.url = urljoin(base_url.rstrip('/') + '/', 'v1/embeddings')
api_key = os.getenv("EMBEDDING_API_KEY", "")
self.headers = {
"Authorization": f"Bearer {self.api_key}",
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
@@ -89,12 +89,13 @@ class XinferenceReRankerModel:
List[dict]: 重排序后的文档列表,每个元素包含document内容、相关性分数和原始索引
"""
base_url = os.getenv("XINFERENCE_URL", "http://10.1.16.39:9995")
base_url = os.getenv("RERANKER_BASE_URL", "http://10.1.16.39:9995")
model_name = os.getenv("RERANKER_MODEL_NAME", "bge-reranker-v2-m3")
api_key = os.getenv("RERANKER_API_KEY", "")
rerank_url = urljoin(base_url.rstrip('/') + '/', 'v1/rerank')
params = {"documents": documents, "query": query, "top_n": top_k, "return_documents": True, "model": model_name}
headers = {
"Authorization": "Bearer <token>", # 这里需要替换为实际的token
"Authorization": f"Bearer {api_key}", # 这里需要替换为实际的token
"Content-Type": "application/json"
}
@@ -123,12 +124,14 @@ class XinferenceReRankerModel:
Returns:
List[dict]: 重排序后的文档列表,每个元素包含document内容、相关性分数和原始索引
"""
base_url = os.getenv("XINFERENCE_URL", "http://10.1.16.39:9995")
base_url = os.getenv("RERANKER_BASE_URL", "http://10.1.16.39:9995")
rerank_url = urljoin(base_url.rstrip('/') + '/', 'v1/rerank')
model_name = os.getenv("RERANKER_MODEL_NAME", "bge-reranker-v2-m3")
api_key = os.getenv("RERANKER_API_KEY", "")
params = {"documents": documents, "query": query, "top_n": top_k, "return_documents": True, "model": model_name}
headers = {
"Authorization": "Bearer <token>", # 这里需要替换为实际的token
"Authorization": f"Bearer {api_key}", # 这里需要替换为实际的token
"Content-Type": "application/json"
}
@@ -214,7 +217,9 @@ class OpenAiLLM:
messages=[{'role': 'user', 'content': user_prompt}],
**kwargs
)
return completion.choices[0].message
message = completion.choices[0].message
message.usage = completion.usage
return message
except Exception as e:
raise RuntimeError(f"OpenAiLLM:ainvoke:error:{str(e)}") from e