上传问题改写、意图识别模块代码
This commit is contained in:
Binary file not shown.
@@ -0,0 +1 @@
|
||||
from dify_client.client import ChatClient, CompletionClient, DifyClient
|
||||
Binary file not shown.
Binary file not shown.
@@ -0,0 +1,459 @@
|
||||
import json
|
||||
|
||||
import requests
|
||||
|
||||
|
||||
class DifyClient:
|
||||
def __init__(self, api_key, base_url: str = "https://api.dify.ai/v1"):
|
||||
self.api_key = api_key
|
||||
self.base_url = base_url
|
||||
|
||||
def _send_request(self, method, endpoint, json=None, params=None, stream=False):
|
||||
headers = {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
response = requests.request(
|
||||
method, url, json=json, params=params, headers=headers, stream=stream, verify=False
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def _send_request_with_files(self, method, endpoint, data, files):
|
||||
headers = {"Authorization": f"Bearer {self.api_key}"}
|
||||
|
||||
url = f"{self.base_url}{endpoint}"
|
||||
response = requests.request(
|
||||
method, url, data=data, headers=headers, files=files
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
def message_feedback(self, message_id, rating, user):
|
||||
data = {"rating": rating, "user": user}
|
||||
return self._send_request("POST", f"/messages/{message_id}/feedbacks", data)
|
||||
|
||||
def get_application_parameters(self, user):
|
||||
params = {"user": user}
|
||||
return self._send_request("GET", "/parameters", params=params)
|
||||
|
||||
def file_upload(self, user, files):
|
||||
data = {"user": user}
|
||||
return self._send_request_with_files(
|
||||
"POST", "/files/upload", data=data, files=files
|
||||
)
|
||||
|
||||
def text_to_audio(self, text: str, user: str, streaming: bool = False):
|
||||
data = {"text": text, "user": user, "streaming": streaming}
|
||||
return self._send_request("POST", "/text-to-audio", data=data)
|
||||
|
||||
def get_meta(self, user):
|
||||
params = {"user": user}
|
||||
return self._send_request("GET", "/meta", params=params)
|
||||
|
||||
|
||||
class CompletionClient(DifyClient):
|
||||
def create_completion_message(self, inputs, response_mode, user, files=None):
|
||||
data = {
|
||||
"inputs": inputs,
|
||||
"response_mode": response_mode,
|
||||
"user": user,
|
||||
"files": files,
|
||||
}
|
||||
return self._send_request(
|
||||
"POST",
|
||||
"/completion-messages",
|
||||
data,
|
||||
stream=True if response_mode == "streaming" else False,
|
||||
)
|
||||
|
||||
|
||||
class ChatClient(DifyClient):
|
||||
def create_chat_message(
|
||||
self,
|
||||
inputs,
|
||||
query,
|
||||
user,
|
||||
response_mode="blocking",
|
||||
conversation_id=None,
|
||||
files=None,
|
||||
):
|
||||
data = {
|
||||
"inputs": inputs,
|
||||
"query": query,
|
||||
"user": user,
|
||||
"response_mode": response_mode,
|
||||
"files": files,
|
||||
}
|
||||
if conversation_id:
|
||||
data["conversation_id"] = conversation_id
|
||||
|
||||
return self._send_request(
|
||||
"POST",
|
||||
"/chat-messages",
|
||||
data,
|
||||
stream=True if response_mode == "streaming" else False,
|
||||
)
|
||||
|
||||
def get_suggested(self, message_id, user: str):
|
||||
params = {"user": user}
|
||||
return self._send_request(
|
||||
"GET", f"/messages/{message_id}/suggested", params=params
|
||||
)
|
||||
|
||||
def stop_message(self, task_id, user):
|
||||
data = {"user": user}
|
||||
return self._send_request("POST", f"/chat-messages/{task_id}/stop", data)
|
||||
|
||||
def get_conversations(self, user, last_id=None, limit=None, pinned=None):
|
||||
params = {"user": user, "last_id": last_id, "limit": limit, "pinned": pinned}
|
||||
return self._send_request("GET", "/conversations", params=params)
|
||||
|
||||
def get_conversation_messages(
|
||||
self, user, conversation_id=None, first_id=None, limit=None
|
||||
):
|
||||
params = {"user": user}
|
||||
|
||||
if conversation_id:
|
||||
params["conversation_id"] = conversation_id
|
||||
if first_id:
|
||||
params["first_id"] = first_id
|
||||
if limit:
|
||||
params["limit"] = limit
|
||||
|
||||
return self._send_request("GET", "/messages", params=params)
|
||||
|
||||
def rename_conversation(
|
||||
self, conversation_id, name, auto_generate: bool, user: str
|
||||
):
|
||||
data = {"name": name, "auto_generate": auto_generate, "user": user}
|
||||
return self._send_request(
|
||||
"POST", f"/conversations/{conversation_id}/name", data
|
||||
)
|
||||
|
||||
def delete_conversation(self, conversation_id, user):
|
||||
data = {"user": user}
|
||||
return self._send_request("DELETE", f"/conversations/{conversation_id}", data)
|
||||
|
||||
def audio_to_text(self, audio_file, user):
|
||||
data = {"user": user}
|
||||
files = {"audio_file": audio_file}
|
||||
return self._send_request_with_files("POST", "/audio-to-text", data, files)
|
||||
|
||||
|
||||
class WorkflowClient(DifyClient):
|
||||
def run(
|
||||
self, inputs: dict, response_mode: str = "streaming", user: str = "abc-123"
|
||||
):
|
||||
data = {"inputs": inputs, "response_mode": response_mode, "user": user}
|
||||
return self._send_request("POST", "/workflows/run", data)
|
||||
|
||||
def stop(self, task_id, user):
|
||||
data = {"user": user}
|
||||
return self._send_request("POST", f"/workflows/tasks/{task_id}/stop", data)
|
||||
|
||||
def get_result(self, workflow_run_id):
|
||||
return self._send_request("GET", f"/workflows/run/{workflow_run_id}")
|
||||
|
||||
|
||||
class KnowledgeBaseClient(DifyClient):
|
||||
def __init__(
|
||||
self,
|
||||
api_key,
|
||||
base_url: str = "https://api.dify.ai/v1",
|
||||
dataset_id: str | None = None,
|
||||
):
|
||||
"""
|
||||
Construct a KnowledgeBaseClient object.
|
||||
|
||||
Args:
|
||||
api_key (str): API key of Dify.
|
||||
base_url (str, optional): Base URL of Dify API. Defaults to 'https://api.dify.ai/v1'.
|
||||
dataset_id (str, optional): ID of the dataset. Defaults to None. You don't need this if you just want to
|
||||
create a new dataset. or list datasets. otherwise you need to set this.
|
||||
"""
|
||||
super().__init__(api_key=api_key, base_url=base_url)
|
||||
self.dataset_id = dataset_id
|
||||
|
||||
def _get_dataset_id(self):
|
||||
if self.dataset_id is None:
|
||||
raise ValueError("dataset_id is not set")
|
||||
return self.dataset_id
|
||||
|
||||
def create_dataset(self, name: str, **kwargs):
|
||||
return self._send_request("POST", "/datasets", {"name": name}, **kwargs)
|
||||
|
||||
def list_datasets(self, page: int = 1, page_size: int = 20, **kwargs):
|
||||
return self._send_request(
|
||||
"GET", f"/datasets?page={page}&limit={page_size}", **kwargs
|
||||
)
|
||||
|
||||
def create_document_by_text(
|
||||
self, name, text, extra_params: dict | None = None, **kwargs
|
||||
):
|
||||
"""
|
||||
Create a document by text.
|
||||
|
||||
:param name: Name of the document
|
||||
:param text: Text content of the document
|
||||
:param extra_params: extra parameters pass to the API, such as indexing_technique, process_rule. (optional)
|
||||
e.g.
|
||||
{
|
||||
'indexing_technique': 'high_quality',
|
||||
'process_rule': {
|
||||
'rules': {
|
||||
'pre_processing_rules': [
|
||||
{'id': 'remove_extra_spaces', 'enabled': True},
|
||||
{'id': 'remove_urls_emails', 'enabled': True}
|
||||
],
|
||||
'segmentation': {
|
||||
'separator': '\n',
|
||||
'max_tokens': 500
|
||||
}
|
||||
},
|
||||
'mode': 'custom'
|
||||
}
|
||||
}
|
||||
:return: Response from the API
|
||||
"""
|
||||
data = {
|
||||
"indexing_technique": "high_quality",
|
||||
"process_rule": {"mode": "automatic"},
|
||||
"name": name,
|
||||
"text": text,
|
||||
}
|
||||
if extra_params is not None and isinstance(extra_params, dict):
|
||||
data.update(extra_params)
|
||||
url = f"/datasets/{self._get_dataset_id()}/document/create_by_text"
|
||||
return self._send_request("POST", url, json=data, **kwargs)
|
||||
|
||||
def update_document_by_text(
|
||||
self, document_id, name, text, extra_params: dict | None = None, **kwargs
|
||||
):
|
||||
"""
|
||||
Update a document by text.
|
||||
|
||||
:param document_id: ID of the document
|
||||
:param name: Name of the document
|
||||
:param text: Text content of the document
|
||||
:param extra_params: extra parameters pass to the API, such as indexing_technique, process_rule. (optional)
|
||||
e.g.
|
||||
{
|
||||
'indexing_technique': 'high_quality',
|
||||
'process_rule': {
|
||||
'rules': {
|
||||
'pre_processing_rules': [
|
||||
{'id': 'remove_extra_spaces', 'enabled': True},
|
||||
{'id': 'remove_urls_emails', 'enabled': True}
|
||||
],
|
||||
'segmentation': {
|
||||
'separator': '\n',
|
||||
'max_tokens': 500
|
||||
}
|
||||
},
|
||||
'mode': 'custom'
|
||||
}
|
||||
}
|
||||
:return: Response from the API
|
||||
"""
|
||||
data = {"name": name, "text": text}
|
||||
if extra_params is not None and isinstance(extra_params, dict):
|
||||
data.update(extra_params)
|
||||
url = (
|
||||
f"/datasets/{self._get_dataset_id()}/documents/{document_id}/update_by_text"
|
||||
)
|
||||
return self._send_request("POST", url, json=data, **kwargs)
|
||||
|
||||
def create_document_by_file(
|
||||
self, file_path, original_document_id=None, extra_params: dict | None = None
|
||||
):
|
||||
"""
|
||||
Create a document by file.
|
||||
|
||||
:param file_path: Path to the file
|
||||
:param original_document_id: pass this ID if you want to replace the original document (optional)
|
||||
:param extra_params: extra parameters pass to the API, such as indexing_technique, process_rule. (optional)
|
||||
e.g.
|
||||
{
|
||||
'indexing_technique': 'high_quality',
|
||||
'process_rule': {
|
||||
'rules': {
|
||||
'pre_processing_rules': [
|
||||
{'id': 'remove_extra_spaces', 'enabled': True},
|
||||
{'id': 'remove_urls_emails', 'enabled': True}
|
||||
],
|
||||
'segmentation': {
|
||||
'separator': '\n',
|
||||
'max_tokens': 500
|
||||
}
|
||||
},
|
||||
'mode': 'custom'
|
||||
}
|
||||
}
|
||||
:return: Response from the API
|
||||
"""
|
||||
files = {"file": open(file_path, "rb")}
|
||||
data = {
|
||||
"process_rule": {"mode": "automatic"},
|
||||
"indexing_technique": "high_quality",
|
||||
}
|
||||
if extra_params is not None and isinstance(extra_params, dict):
|
||||
data.update(extra_params)
|
||||
if original_document_id is not None:
|
||||
data["original_document_id"] = original_document_id
|
||||
url = f"/datasets/{self._get_dataset_id()}/document/create_by_file"
|
||||
return self._send_request_with_files(
|
||||
"POST", url, {"data": json.dumps(data)}, files
|
||||
)
|
||||
|
||||
def update_document_by_file(
|
||||
self, document_id, file_path, extra_params: dict | None = None
|
||||
):
|
||||
"""
|
||||
Update a document by file.
|
||||
|
||||
:param document_id: ID of the document
|
||||
:param file_path: Path to the file
|
||||
:param extra_params: extra parameters pass to the API, such as indexing_technique, process_rule. (optional)
|
||||
e.g.
|
||||
{
|
||||
'indexing_technique': 'high_quality',
|
||||
'process_rule': {
|
||||
'rules': {
|
||||
'pre_processing_rules': [
|
||||
{'id': 'remove_extra_spaces', 'enabled': True},
|
||||
{'id': 'remove_urls_emails', 'enabled': True}
|
||||
],
|
||||
'segmentation': {
|
||||
'separator': '\n',
|
||||
'max_tokens': 500
|
||||
}
|
||||
},
|
||||
'mode': 'custom'
|
||||
}
|
||||
}
|
||||
:return:
|
||||
"""
|
||||
files = {"file": open(file_path, "rb")}
|
||||
data = {}
|
||||
if extra_params is not None and isinstance(extra_params, dict):
|
||||
data.update(extra_params)
|
||||
url = (
|
||||
f"/datasets/{self._get_dataset_id()}/documents/{document_id}/update_by_file"
|
||||
)
|
||||
return self._send_request_with_files(
|
||||
"POST", url, {"data": json.dumps(data)}, files
|
||||
)
|
||||
|
||||
def batch_indexing_status(self, batch_id: str, **kwargs):
|
||||
"""
|
||||
Get the status of the batch indexing.
|
||||
|
||||
:param batch_id: ID of the batch uploading
|
||||
:return: Response from the API
|
||||
"""
|
||||
url = f"/datasets/{self._get_dataset_id()}/documents/{batch_id}/indexing-status"
|
||||
return self._send_request("GET", url, **kwargs)
|
||||
|
||||
def delete_dataset(self):
|
||||
"""
|
||||
Delete this dataset.
|
||||
|
||||
:return: Response from the API
|
||||
"""
|
||||
url = f"/datasets/{self._get_dataset_id()}"
|
||||
return self._send_request("DELETE", url)
|
||||
|
||||
def delete_document(self, document_id):
|
||||
"""
|
||||
Delete a document.
|
||||
|
||||
:param document_id: ID of the document
|
||||
:return: Response from the API
|
||||
"""
|
||||
url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}"
|
||||
return self._send_request("DELETE", url)
|
||||
|
||||
def list_documents(
|
||||
self,
|
||||
page: int | None = None,
|
||||
page_size: int | None = None,
|
||||
keyword: str | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Get a list of documents in this dataset.
|
||||
|
||||
:return: Response from the API
|
||||
"""
|
||||
params = {}
|
||||
if page is not None:
|
||||
params["page"] = page
|
||||
if page_size is not None:
|
||||
params["limit"] = page_size
|
||||
if keyword is not None:
|
||||
params["keyword"] = keyword
|
||||
url = f"/datasets/{self._get_dataset_id()}/documents"
|
||||
return self._send_request("GET", url, params=params, **kwargs)
|
||||
|
||||
def add_segments(self, document_id, segments, **kwargs):
|
||||
"""
|
||||
Add segments to a document.
|
||||
|
||||
:param document_id: ID of the document
|
||||
:param segments: List of segments to add, example: [{"content": "1", "answer": "1", "keyword": ["a"]}]
|
||||
:return: Response from the API
|
||||
"""
|
||||
data = {"segments": segments}
|
||||
url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/segments"
|
||||
return self._send_request("POST", url, json=data, **kwargs)
|
||||
|
||||
def query_segments(
|
||||
self,
|
||||
document_id,
|
||||
keyword: str | None = None,
|
||||
status: str | None = None,
|
||||
**kwargs,
|
||||
):
|
||||
"""
|
||||
Query segments in this document.
|
||||
|
||||
:param document_id: ID of the document
|
||||
:param keyword: query keyword, optional
|
||||
:param status: status of the segment, optional, e.g. completed
|
||||
"""
|
||||
url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/segments"
|
||||
params = {}
|
||||
if keyword is not None:
|
||||
params["keyword"] = keyword
|
||||
if status is not None:
|
||||
params["status"] = status
|
||||
if "params" in kwargs:
|
||||
params.update(kwargs["params"])
|
||||
return self._send_request("GET", url, params=params, **kwargs)
|
||||
|
||||
def delete_document_segment(self, document_id, segment_id):
|
||||
"""
|
||||
Delete a segment from a document.
|
||||
|
||||
:param document_id: ID of the document
|
||||
:param segment_id: ID of the segment
|
||||
:return: Response from the API
|
||||
"""
|
||||
url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/segments/{segment_id}"
|
||||
return self._send_request("DELETE", url)
|
||||
|
||||
def update_document_segment(self, document_id, segment_id, segment_data, **kwargs):
|
||||
"""
|
||||
Update a segment in a document.
|
||||
|
||||
:param document_id: ID of the document
|
||||
:param segment_id: ID of the segment
|
||||
:param segment_data: Data of the segment, example: {"content": "1", "answer": "1", "keyword": ["a"], "enabled": True}
|
||||
:return: Response from the API
|
||||
"""
|
||||
data = {"segment": segment_data}
|
||||
url = f"/datasets/{self._get_dataset_id()}/documents/{document_id}/segments/{segment_id}"
|
||||
return self._send_request("POST", url, json=data, **kwargs)
|
||||
@@ -0,0 +1,215 @@
|
||||
import psycopg2
|
||||
from psycopg2 import sql
|
||||
import os
|
||||
import json
|
||||
from datetime import timezone, timedelta
|
||||
|
||||
class PgSql:
|
||||
"""
|
||||
用于连接和操作 PostgreSQL 数据库的类。
|
||||
|
||||
该类封装了数据库连接、关闭连接以及执行特定查询的方法,
|
||||
主要用于从 Dify 应用相关的表中获取数据。
|
||||
"""
|
||||
def __init__(self):
|
||||
"""
|
||||
初始化 PgSql 实例并建立数据库连接。
|
||||
"""
|
||||
self.connection = None
|
||||
self.connect_sql()
|
||||
|
||||
def connect_sql(self):
|
||||
"""
|
||||
连接到 PostgreSQL 数据库。
|
||||
|
||||
使用预定义的凭据连接到 'dify' 数据库。
|
||||
如果连接失败,会打印错误信息。
|
||||
"""
|
||||
try:
|
||||
# 连接数据库
|
||||
self.connection = psycopg2.connect(
|
||||
user="postgres",
|
||||
password="difyai123456",
|
||||
host="172.20.0.145",
|
||||
port=5432,
|
||||
database="dify"
|
||||
)
|
||||
|
||||
except (Exception, psycopg2.Error) as error:
|
||||
print("Error while connecting to PostgreSQL", error)
|
||||
|
||||
def close_connection(self):
|
||||
"""
|
||||
关闭当前的 PostgreSQL 数据库连接。
|
||||
|
||||
如果存在活动的连接,则关闭它并打印确认信息。
|
||||
"""
|
||||
if self.connection:
|
||||
self.connection.close()
|
||||
print("PostgreSQL connection is closed")
|
||||
|
||||
|
||||
def get_appinfo(self, appid:str)->dict | None:
|
||||
"""
|
||||
根据应用 ID 从 'apps' 表中获取应用信息。
|
||||
|
||||
Args:
|
||||
appid: 目标应用的 ID。
|
||||
|
||||
Returns:
|
||||
一个字典,其中键是列名,值是对应的应用数据。
|
||||
如果未找到应用或发生错误,则返回 None。
|
||||
"""
|
||||
try:
|
||||
with self.connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT * FROM apps WHERE id = %s
|
||||
""",
|
||||
(appid,)
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
colnames = [desc[0] for desc in cursor.description]
|
||||
return dict(zip(colnames, result))
|
||||
return None
|
||||
except (Exception, psycopg2.Error) as error:
|
||||
print("Error while getting tenant_id by appid", error)
|
||||
|
||||
|
||||
def get_messages_info(self, appid:str, query:str)->dict | None:
|
||||
"""
|
||||
根据应用 ID 和查询内容从 'messages' 表中获取消息信息。
|
||||
|
||||
Args:
|
||||
appid: 目标应用的 ID。
|
||||
query: 用户查询的具体内容。
|
||||
|
||||
Returns:
|
||||
一个字典,其中键是列名,值是对应的消息数据。
|
||||
如果未找到消息或发生错误,则返回 None。
|
||||
"""
|
||||
try:
|
||||
with self.connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT * FROM messages WHERE app_id = %s AND query = %s ORDER BY created_at DESC
|
||||
""",
|
||||
(appid, query)
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
colnames = [desc[0] for desc in cursor.description]
|
||||
return dict(zip(colnames, result))
|
||||
return None
|
||||
except (Exception, psycopg2.Error) as error:
|
||||
print("Error while getting messages_info", error)
|
||||
|
||||
def get_messages_info_by_id(self, message_id:str)->dict | None:
|
||||
"""
|
||||
根据消息 ID 从 'messages' 表中获取消息信息。
|
||||
"""
|
||||
try:
|
||||
with self.connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT * FROM messages WHERE id = %s
|
||||
""",
|
||||
(message_id, )
|
||||
)
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
colnames = [desc[0] for desc in cursor.description]
|
||||
return dict(zip(colnames, result))
|
||||
return None
|
||||
except (Exception, psycopg2.Error) as error:
|
||||
print("Error while getting messages_info", error)
|
||||
|
||||
def get_workflow_node_executions_info(self, workflow_run_id:str)->list[dict] | None:
|
||||
"""
|
||||
根据工作流运行 ID 从 'workflow_node_executions' 表中获取节点执行信息。
|
||||
|
||||
Args:
|
||||
workflow_run_id: 目标工作流运行的 ID。
|
||||
|
||||
Returns:
|
||||
一个字典,其中键是列名,值是对应的节点执行数据。
|
||||
如果未找到执行信息或发生错误,则返回 None。
|
||||
"""
|
||||
try:
|
||||
with self.connection.cursor() as cursor:
|
||||
cursor.execute(
|
||||
"""
|
||||
SELECT * FROM workflow_node_executions WHERE workflow_run_id = %s
|
||||
""",
|
||||
(workflow_run_id,)
|
||||
)
|
||||
result = cursor.fetchall()
|
||||
if result:
|
||||
colnames = [desc[0] for desc in cursor.description]
|
||||
return [dict(zip(colnames, row)) for row in result]
|
||||
return None
|
||||
except (Exception, psycopg2.Error) as error:
|
||||
print("Error while getting workflow_node_executions_info", error)
|
||||
|
||||
class DifyTool:
|
||||
"""
|
||||
提供用于获取 Dify 应用调试信息的工具类。
|
||||
|
||||
该类利用 PgSql 类从数据库中检索与特定应用和查询相关的
|
||||
应用信息、消息详情以及工作流节点执行情况。
|
||||
"""
|
||||
@staticmethod
|
||||
def get_message_debug_info_id(message_id:str)->dict | None:
|
||||
"""
|
||||
根据消息 ID 从 'messages' 表中获取消息信息。
|
||||
"""
|
||||
dify_pgsql = PgSql()
|
||||
messages_info = dify_pgsql.get_messages_info_by_id(message_id)
|
||||
if not messages_info:
|
||||
return None
|
||||
workflow_node_executions_info = dify_pgsql.get_workflow_node_executions_info(messages_info['workflow_run_id'])
|
||||
if not workflow_node_executions_info:
|
||||
return None
|
||||
return {
|
||||
"messages_info": messages_info,
|
||||
"workflow_node_executions_info": workflow_node_executions_info
|
||||
}
|
||||
|
||||
|
||||
@staticmethod
|
||||
def get_message_debug_info(appid:str, query:str)->dict:
|
||||
"""
|
||||
获取指定应用和查询相关的调试信息。
|
||||
|
||||
此静态方法会创建一个临时的 PgSql 实例来查询数据库,
|
||||
然后聚合应用信息、消息信息和工作流节点执行信息。
|
||||
|
||||
Args:
|
||||
appid: 目标应用的 ID。
|
||||
query: 用户查询的具体内容。
|
||||
|
||||
Returns:
|
||||
一个包含 "appinfo", "messages_info", 和
|
||||
"workflow_node_executions_info"键的字典,分别对应
|
||||
查询到的应用数据、消息数据和节点执行数据。
|
||||
"""
|
||||
dify_pgsql = PgSql()
|
||||
appinfo = dify_pgsql.get_appinfo(appid)
|
||||
if not appinfo:
|
||||
return None
|
||||
messages_info = dify_pgsql.get_messages_info(appid, query)
|
||||
if not messages_info:
|
||||
return None
|
||||
workflow_node_executions_info = dify_pgsql.get_workflow_node_executions_info(messages_info['workflow_run_id'])
|
||||
if not workflow_node_executions_info:
|
||||
return None
|
||||
return {
|
||||
"appinfo": appinfo,
|
||||
"messages_info": messages_info,
|
||||
"workflow_node_executions_info": workflow_node_executions_info
|
||||
}
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
print(DifyTool.get_message_debug_info("ccf92b97-2789-4a3f-90e0-135a869a37c5", "电力建设计价通软件,导入结算后没有暂列金怎么办?要手动添加么?"))
|
||||
@@ -0,0 +1,54 @@
|
||||
from flask import Flask, request, Response
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from rag2_0.intent_recognition import IntentRecognizer
|
||||
import json
|
||||
import time
|
||||
# 加载环境变量
|
||||
load_dotenv()
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# 初始化意图识别器
|
||||
api_key = os.getenv("OPENAI_API_KEY")
|
||||
base_url = os.getenv("OPENAI_API_BASE")
|
||||
model_name = os.getenv("LLM_MODEL_NAME", "gpt-3.5-turbo")
|
||||
recognizer = IntentRecognizer(api_key=api_key, base_url=base_url, model_name=model_name)
|
||||
|
||||
@app.route('/intent_recognize', methods=['POST'])
|
||||
def intent_recognize():
|
||||
try:
|
||||
data = request.get_json(force=True)
|
||||
query = data.get('query')
|
||||
if not query:
|
||||
return Response(json.dumps({"error": "缺少query参数"}, ensure_ascii=False), content_type='application/json; charset=utf-8', status=400)
|
||||
start_time = time.time()
|
||||
classification, keywords, rewrite, query_keys = recognizer.process_query(query)
|
||||
end_time = time.time()
|
||||
print(f"意图识别耗时: {end_time - start_time:.2f}秒")
|
||||
# keywords对象转为字符串
|
||||
keywords_str = ""
|
||||
if keywords and keywords.terms:
|
||||
term_details = []
|
||||
for term in keywords.terms:
|
||||
term_info = {
|
||||
"名称": term.name,
|
||||
"同义词": ";".join(term.synonymous) if term.synonymous else "",
|
||||
"描述": term.description
|
||||
}
|
||||
term_details.append(term_info)
|
||||
keywords_str = term_details
|
||||
result = {
|
||||
"source_query": query,
|
||||
"source_query_keys": query_keys,
|
||||
"vertical_classification": classification.vertical_classification,
|
||||
"sub_classification": classification.sub_classification,
|
||||
"rewrite_query": rewrite.rewrite,
|
||||
"keywords": keywords_str
|
||||
}
|
||||
return Response(json.dumps(result, ensure_ascii=False), content_type='application/json; charset=utf-8')
|
||||
except Exception as e:
|
||||
return Response(json.dumps({"error": str(e)}, ensure_ascii=False), content_type='application/json; charset=utf-8', status=500)
|
||||
|
||||
if __name__ == "__main__":
|
||||
app.run(host="0.0.0.0", port=8001)
|
||||
@@ -0,0 +1,136 @@
|
||||
#!/usr/bin/env python
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import os
|
||||
from rag2_0.dify.dify_client import ChatClient, DifyClient
|
||||
import pandas as pd
|
||||
# 使用线程池并发执行
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from tqdm import tqdm
|
||||
from rag2_0.dify.dify_tool import DifyTool
|
||||
import json
|
||||
|
||||
class DifyComparisonTester:
|
||||
"""
|
||||
Dify新旧流程对比测试类,用于比较两个不同流程的问答效果
|
||||
"""
|
||||
def __init__(self, excel_path:str, baseurl:str, old_workflow_api_key:str, new_workflow_api_key:str):
|
||||
"""
|
||||
初始化对比测试器
|
||||
|
||||
Args:
|
||||
excel_path: 包含问题的Excel文件路径
|
||||
baseurl: Dify API的基础URL
|
||||
old_workflow_api_key: 旧流程的API密钥
|
||||
new_workflow_api_key: 新流程的API密钥
|
||||
"""
|
||||
self.excel_path = excel_path
|
||||
self.baseurl = baseurl
|
||||
self.old_workflow_api_key = old_workflow_api_key
|
||||
self.new_workflow_api_key = new_workflow_api_key
|
||||
self.old_chat = ChatClient(api_key=old_workflow_api_key, base_url=baseurl)
|
||||
self.new_chat = ChatClient(api_key=new_workflow_api_key, base_url=baseurl)
|
||||
|
||||
def process_question(self, q:str):
|
||||
"""
|
||||
处理单个问题,并行获取新旧流程的回答
|
||||
|
||||
Args:
|
||||
q: 问题内容
|
||||
|
||||
Returns:
|
||||
dict: 包含问题和两个流程回答的字典
|
||||
"""
|
||||
q="qwqwwq"
|
||||
def get_old_answer():
|
||||
try:
|
||||
return self.old_chat.create_chat_message(inputs={}, query=q, user="AutoTestDifyChat").json()
|
||||
except Exception as e:
|
||||
return f"error: {str(e)}"
|
||||
|
||||
def get_new_answer():
|
||||
try:
|
||||
return self.new_chat.create_chat_message(inputs={}, query=q, user="AutoTestDifyChat").json()
|
||||
except Exception as e:
|
||||
return f"error: {str(e)}"
|
||||
|
||||
# 并行执行old_chat和new_chat
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
future_old = executor.submit(get_old_answer)
|
||||
future_new = executor.submit(get_new_answer)
|
||||
|
||||
old_result = future_old.result()
|
||||
new_result = future_new.result()
|
||||
old_message_id = old_result["message_id"]
|
||||
new_message_id = new_result["message_id"]
|
||||
old_message_info = DifyTool.get_message_debug_info_id(message_id=old_message_id)
|
||||
new_message_info = DifyTool.get_message_debug_info_id(message_id=new_message_id)
|
||||
for workflow_node in new_message_info["workflow_node_executions_info"]:
|
||||
if workflow_node["title"] == "问题优化结果解析":
|
||||
outputs = json.loads(workflow_node["outputs"])
|
||||
rewrite_query = outputs["optimize_query"]
|
||||
old_answer = old_result["answer"]
|
||||
new_answer = new_result["answer"]
|
||||
|
||||
return {"问题": q, "问题改写": rewrite_query, "旧流程答案": old_answer, "新流程答案": new_answer}
|
||||
|
||||
def run_comparison(self):
|
||||
"""
|
||||
运行对比测试,处理所有问题并生成结果Excel
|
||||
|
||||
Returns:
|
||||
str: 输出Excel文件的路径
|
||||
"""
|
||||
# 读取Excel文件中的问题
|
||||
df = pd.read_excel(self.excel_path)
|
||||
questions = df.iloc[:,0].tolist()
|
||||
results = []
|
||||
|
||||
# 按顺序处理问题
|
||||
with tqdm(total=len(questions), desc="处理问题进度") as pbar:
|
||||
for q in questions:
|
||||
result = self.process_question(q)
|
||||
results.append(result)
|
||||
pbar.update(1)
|
||||
|
||||
# 生成输出Excel文件
|
||||
out_path = os.path.join(os.path.dirname(self.excel_path), "dify问答_对比结果.xlsx")
|
||||
df_results = pd.DataFrame(results)
|
||||
|
||||
# 使用ExcelWriter设置格式
|
||||
with pd.ExcelWriter(out_path, engine='xlsxwriter') as writer:
|
||||
df_results.to_excel(writer, index=False, sheet_name='Sheet1')
|
||||
|
||||
# 获取工作簿和工作表对象
|
||||
workbook = writer.book
|
||||
worksheet = writer.sheets['Sheet1']
|
||||
|
||||
# 设置列宽
|
||||
worksheet.set_column('A:A', 50) # 问题列宽 50个Excel单位
|
||||
worksheet.set_column('B:B', 70) # 旧流程答案列宽 70个Excel单位
|
||||
worksheet.set_column('C:C', 70) # 新流程答案列宽 70个Excel单位
|
||||
|
||||
return out_path
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 定义Excel路径
|
||||
excel_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".." ,"data/excel/历史提问数据(dislike)_1000条_软件明确.xlsx")
|
||||
|
||||
if not os.path.exists(excel_path):
|
||||
print(f"错误:Excel文件不存在: {excel_path}")
|
||||
exit(1)
|
||||
|
||||
# Dify API配置
|
||||
baseurl = "http://172.20.0.145/v1"
|
||||
old_workflow_api_key = "app-wUdkWJx5zeOvmvBUZizMoSw3"
|
||||
new_workflow_api_key = "app-Lf1pQ1NVwdMfCRVNTBCOTPHT"
|
||||
|
||||
# 创建测试器并运行
|
||||
tester = DifyComparisonTester(excel_path, baseurl, old_workflow_api_key, new_workflow_api_key)
|
||||
output_file = tester.run_comparison()
|
||||
print(f"对比结果已保存至: {output_file}")
|
||||
|
||||
# 单个问题测试示例
|
||||
# c = DifyChat(baseurl="http://172.20.0.145/v1", api_key="app-LjJaeLoAfqa6aoGzqU9UvxSf")
|
||||
# c.chat("如何新建配电线路工程")
|
||||
Reference in New Issue
Block a user