From 8aec1f3f6fa0971566d981997dbabe3a6e4b2c1a Mon Sep 17 00:00:00 2001
From: ouyangyouzhang <ouyangyouzhang@booway.com.cn>
Date: Wed, 11 Jun 2025 09:32:44 +0800
Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96Dify=E5=AF=B9=E6=AF=94?=
 =?UTF-8?q?=E6=B5=8B=E8=AF=95=E5=99=A8=EF=BC=8C=E6=96=B0=E5=A2=9E=E5=91=BD?=
 =?UTF-8?q?=E4=BB=A4=E8=A1=8C=E5=8F=82=E6=95=B0=E8=A7=A3=E6=9E=90=E5=8A=9F?=
 =?UTF-8?q?=E8=83=BD=EF=BC=8C=E6=94=AF=E6=8C=81=E9=80=89=E6=8B=A9=E6=B5=8B?=
 =?UTF-8?q?=E8=AF=95=E6=A8=A1=E5=BC=8F=EF=BC=88=E4=BB=85=E6=96=B0=E6=B5=81?=
 =?UTF-8?q?=E7=A8=8B=E6=88=96=E6=96=B0=E8=80=81=E6=B5=81=E7=A8=8B=E5=AF=B9?=
 =?UTF-8?q?=E6=AF=94=EF=BC=89=EF=BC=8C=E9=87=8D=E6=9E=84=E9=97=AE=E9=A2=98?=
 =?UTF-8?q?=E5=A4=84=E7=90=86=E9=80=BB=E8=BE=91=E4=BB=A5=E6=8F=90=E9=AB=98?=
 =?UTF-8?q?=E5=8F=AF=E8=AF=BB=E6=80=A7=E5=92=8C=E7=BB=B4=E6=8A=A4=E6=80=A7?=
 =?UTF-8?q?=EF=BC=8C=E6=9B=B4=E6=96=B0=E8=BE=93=E5=87=BA=E6=96=87=E4=BB=B6?=
 =?UTF-8?q?=E5=91=BD=E5=90=8D=E8=A7=84=E5=88=99=EF=BC=8C=E7=A1=AE=E4=BF=9D?=
 =?UTF-8?q?=E7=BB=93=E6=9E=9C=E4=BF=9D=E5=AD=98=E8=87=B3=E6=8C=87=E5=AE=9A?=
 =?UTF-8?q?=E8=B7=AF=E5=BE=84=E3=80=82?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 rag2_0/dify/test_dify_chatapi.py | 226 +++++++++++++++++++------------
 1 file changed, 141 insertions(+), 85 deletions(-)

diff --git a/rag2_0/dify/test_dify_chatapi.py b/rag2_0/dify/test_dify_chatapi.py
index f173393..24daabf 100755
--- a/rag2_0/dify/test_dify_chatapi.py
+++ b/rag2_0/dify/test_dify_chatapi.py
@@ -19,6 +19,7 @@ from pydantic import BaseModel, Field
 from langchain.output_parsers import PydanticOutputParser
 from threading import Lock
 import sys
+import argparse
 
 load_dotenv()
 
@@ -30,27 +31,34 @@ class DifyComparisonTester:
     """
     Dify新旧流程对比测试类，用于比较两个不同流程的问答效果并进行评判
     """
-    def __init__(self, excel_path:str, baseurl:str, old_workflow_api_key:str, new_workflow_api_key:str, 
-                 wiki_excel_path:str=None, output_path:str=None, max_workers:int=1):
+    def __init__(self, excel_path:str, baseurl:str, new_workflow_api_key:str, 
+                 old_workflow_api_key:str=None, wiki_excel_path:str=None, 
+                 output_path:str=None, max_workers:int=1, mode:str="both"):
         """
         初始化对比测试器
         
         Args:
             excel_path: 包含问题的Excel文件路径
             baseurl: Dify API的基础URL
-            old_workflow_api_key: 旧流程的API密钥
             new_workflow_api_key: 新流程的API密钥
+            old_workflow_api_key: 旧流程的API密钥，仅在mode="both"时需要
             wiki_excel_path: Wiki Excel文件路径，用于获取标准答案
             output_path: 输出Excel文件路径
             max_workers: 最大工作线程数
+            mode: 测试模式，"new_only"表示仅测试新对话，"both"表示测试新老对话
         """
         self.excel_path = excel_path
+        self.mode = mode
+        
         # 使用NewWorkflowChat和OldWorkFlowChat代替ChatClient
-        self.old_chat = OldWorkFlowChat(api_key=old_workflow_api_key, base_url=baseurl)
         self.new_chat = NewWorkflowChat(api_key=new_workflow_api_key, base_url=baseurl)
+        if mode == "both" and old_workflow_api_key:
+            self.old_chat = OldWorkFlowChat(api_key=old_workflow_api_key, base_url=baseurl)
+        else:
+            self.old_chat = None
         
         # 评判相关参数
-        self.output_path = output_path or os.path.join(os.path.dirname(self.excel_path), "dify问答_综合评判结果.xlsx")
+        self.output_path = output_path or os.path.join(os.path.dirname(self.excel_path), "dify问答_新流程结果.xlsx")
         self.max_workers = max_workers
         self.content_source_parser = PydanticOutputParser(pydantic_object=ContentSource)
         self.results_lock = Lock()
@@ -68,45 +76,6 @@ class DifyComparisonTester:
         model = os.getenv("LLM_MODEL_NAME")
         return OpenAiLLM(api_key=api_key, base_url=base_url, model=model)
 
-    def process_question(self, q:str):
-        """
-        处理单个问题，并行获取新旧流程的回答
-        
-        Args:
-            q: 问题内容
-            
-        Returns:
-            dict: 包含问题和两个流程回答的字典
-        """
-        def get_old_answer():
-            try:
-                return self.old_chat.process_question(query=q)
-            except Exception as e:
-                return f"error: {str(e)}"
-                
-        def get_new_answer():
-            try:
-                return self.new_chat.process_question(query=q)
-            except Exception as e:
-                return f"error: {str(e)}"
-
-        # 并行执行old_chat和new_chat
-        with ThreadPoolExecutor(max_workers=2) as executor:
-            future_old = executor.submit(get_old_answer)
-            future_new = executor.submit(get_new_answer)
-            try:
-                old_result = future_old.result()
-                new_result = future_new.result()
-                
-                if isinstance(old_result, str) and old_result.startswith("error:"):
-                    return None, None
-                if isinstance(new_result, str) and new_result.startswith("error:"):
-                    return None, None
-
-            except Exception as e:
-                return None, None, None
-        return future_old, future_new
-        
     def find_wiki_link(self, query) -> str | None:
         """
         根据查询找出对应的词条链接
@@ -510,6 +479,37 @@ content: "{content}"
         result = f"{similarity_percentage}%"
         return result
 
+    def process_question(self, q:str) -> tuple:
+        """
+        处理单个问题，获取新旧流程的回答
+        
+        Args:
+            q: 问题内容
+            
+        Returns:
+            tuple: (old_result, new_result) 包含旧流程和新流程的回答信息
+        """
+        try:
+            # 如果是仅测试新流程模式
+            if self.mode == "new_only" or self.old_chat is None:
+                new_result = self.new_chat.process_question(q)
+                return None, new_result
+            else:
+                # 使用ThreadPoolExecutor并发执行新旧流程
+                with ThreadPoolExecutor(max_workers=2) as executor:
+                    # 并发提交新旧流程的任务
+                    future_new = executor.submit(self.new_chat.process_question, q)
+                    future_old = executor.submit(self.old_chat.process_question, q)
+                    
+                    # 获取结果
+                    new_result = future_new.result()
+                    old_result = future_old.result()
+                
+                return old_result, new_result
+        except Exception as e:
+            print(f"处理问题 '{q}' 时发生错误: {str(e)}")
+            return None, None
+
     def process_question_with_judge(self, q:str):
         """
         处理单个问题，获取新旧流程的回答并进行评判
@@ -522,9 +522,49 @@ content: "{content}"
         """
         # 获取基本的问题和回答
         future_old, future_new = self.process_question(q)
-        if future_old is None or future_new is None:
+        if future_new is None:
             return None
             
+        # 如果是仅测试新流程模式
+        if self.mode == "new_only" or future_old is None:
+            query = future_new["问题"]
+            new_answer = future_new["新流程答案"]
+            
+            # 获取词条链接和标准答案
+            wiki_url = self.find_wiki_link(query)
+            standard_answer = ""
+            answer_title = ""
+            
+            try:
+                if wiki_url and not pd.isna(wiki_url):
+                    standard_answer = self.get_wiki_content(wiki_url)
+                    answer_title = self.get_wiki_title(wiki_url)
+            except Exception as e:
+                print(f"处理问题 '{query}' 获取标准答案时发生错误: {str(e)}")
+            
+            # 判断答案正确性
+            judge_result = ""
+            if standard_answer:
+                # 调用LLM判断新答案是否正确
+                new_result = self.judge_answer(standard_answer, new_answer)
+                if new_result is not None:
+                    judge_result = "正确" if new_result else "错误"
+            
+            # 返回结果
+            return {
+                "问题": query,
+                "问题改写": future_new["新问题改写"],
+                "问题分类": future_new["新问题分类"],
+                "槽点信息": future_new["槽点信息"],
+                "新流程答案": new_answer,
+                "回答判断": judge_result,
+                "答案词条": answer_title if answer_title else "", 
+                "检索词条": future_new["新检索词条"],
+            }
+        
+        # 如果是测试新老流程模式
+        if future_old is None:
+            return None
         query = future_old["问题"]
         old_answer = future_old["旧流程答案"]
         new_answer = future_new["新流程答案"]
@@ -549,23 +589,23 @@ content: "{content}"
             
         if judge_result is None:
             judge_result = ""
-                
+        
         # retrieve_title_score = self.get_retrieve_title_similarity(old_retrieve_content=old_workflow_info["检索内容"], new_retrieve_content=new_workflow_info["检索内容"])
         
         # 返回结果
         return {
             "问题": query,
-            "新问题改写": future_new["问题改写"],
-            "旧问题改写": future_old["问题改写"],
-            "新问题分类": future_new["问题分类"],
+            "新问题改写": future_new["新问题改写"],
+            "旧问题改写": future_old["旧问题改写"],
+            "新问题分类": future_new["新问题分类"],
             "槽点信息": future_new["槽点信息"],
             "新流程答案": new_answer,
             "旧流程答案": old_answer,
             "回答判断": judge_result,
             # "词条检索相似度": retrieve_title_score,
             "答案词条": answer_title if answer_title else "", 
-            "新检索词条": future_new["检索词条"],
-            "旧检索词条": future_old["检索词条"],
+            "新检索词条": future_new["新检索词条"],
+            "旧检索词条": future_old["旧检索词条"],
         }
         
     def run_comparison(self, with_judge=False):
@@ -582,15 +622,17 @@ content: "{content}"
         df = pd.read_excel(self.excel_path)
         questions=[]
         for idx, row in df.iterrows():
-            if row['回答中的软件名称'] == "未知":
-                continue
-            if row['提问中的软件名称'] != "未知":
+            if "回答中的软件名称" in row and "提问中的软件名称" in row:
+                if row['回答中的软件名称'] == "未知" and row['提问中的软件名称'] == "未知":
+                    continue
+                if row['提问中的软件名称'] != "未知":
+                    questions.append(row['提问'])
+                else:
+                    questions.append(f"{row['回答中的软件名称']}, {row['提问']}")
+            else:
                 questions.append(row['提问'])
-            questions.append(f"{row['回答中的软件名称']}, {row['提问']}")
             
         results = []
-        # 选择处理函数
-        process_func = self.process_question_with_judge if with_judge else self.process_question
         is_debug = hasattr(sys, 'gettrace') and sys.gettrace() is not None
         if not is_debug:
             # 使用多线程并发处理问题
@@ -602,7 +644,7 @@ content: "{content}"
                     # 提交所有任务
                     futures = []
                     for q in questions:
-                        future = executor.submit(process_func, q)
+                        future = executor.submit(self.process_question_with_judge, q)
                         futures.append(future)
                     
                     # 处理结果
@@ -614,12 +656,13 @@ content: "{content}"
                         pbar.update(1)
         else:
             for q in questions:
-                result = process_func(q)
+                result = self.process_question_with_judge(q)
                 print(json.dumps(result,ensure_ascii=False,indent=2))
                 if result is not None:
                     results.append(result)
+                    
         # 生成输出Excel文件
-        out_path = self.output_path if with_judge else os.path.join(os.path.dirname(self.excel_path), "dify问答_对比结果.xlsx")
+        out_path = self.output_path
         df_results = pd.DataFrame(results)
         
         # 使用ExcelWriter设置格式
@@ -639,37 +682,50 @@ content: "{content}"
 
 
 if __name__ == "__main__":
-    # 定义Excel路径
-    excel_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".." ,"data/excel/历史提问数据(like)_提问明确.xlsx")
+    # 创建命令行参数解析器
+
+    default_excel_path=os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".." ,"data/excel/历史提问数据(like)_提问明确.xlsx")
+    default_wiki_excel_path=os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".." ,"data/excel/部分提问_软件名称明确.xlsx")
+    parser = argparse.ArgumentParser(description='Dify对话测试工具')
+    parser.add_argument('--mode', type=str, choices=['new_only', 'both'], default='new_only',
+                        help='测试模式: new_only表示仅测试新对话, both表示测试新老对话')
+    parser.add_argument('--excel_path', type=str, 
+                        default=default_excel_path,
+                        help='包含问题的Excel文件路径')
+    parser.add_argument('--baseurl', type=str, default="http://172.20.0.145/v1", 
+                        help='Dify API的基础URL')
+    parser.add_argument('--new_api_key', type=str, default="app-qxsSybCs7ABiKlC1JabTYVn6",
+                        help='新流程的API密钥')
+    parser.add_argument('--old_api_key', type=str, default="app-wUdkWJx5zeOvmvBUZizMoSw3",
+                        help='旧流程的API密钥')
+    parser.add_argument('--wiki_excel_path', type=str, 
+                        default=default_wiki_excel_path,
+                        help='Wiki Excel文件路径，用于获取标准答案')
+    parser.add_argument('--output_path', type=str, default=None,
+                        help='输出Excel文件路径')
+    parser.add_argument('--max_workers', type=int, default=5,
+                        help='最大工作线程数')
     
-    if not os.path.exists(excel_path):
-        print(f"错误：Excel文件不存在: {excel_path}")
+    # 解析命令行参数
+    args = parser.parse_args()
+    
+    # 检查Excel文件是否存在
+    if not os.path.exists(args.excel_path):
+        print(f"错误：Excel文件不存在: {args.excel_path}")
         exit(1)
     
-    # Dify API配置
-    baseurl = "http://172.20.0.145/v1" 
-    old_workflow_api_key = "app-wUdkWJx5zeOvmvBUZizMoSw3"
-    new_workflow_api_key = "app-qxsSybCs7ABiKlC1JabTYVn6"
-    
-    # Wiki Excel路径和Dify应用ID（用于评判）
-    wiki_excel_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "..", ".." ,"data/excel/部分提问_软件名称明确.xlsx")
-    
     # 创建测试器并运行
     tester = DifyComparisonTester(
-        excel_path=excel_path, 
-        baseurl=baseurl, 
-        old_workflow_api_key=old_workflow_api_key, 
-        new_workflow_api_key=new_workflow_api_key,
-        wiki_excel_path=wiki_excel_path,
-        max_workers=5
+        excel_path=args.excel_path, 
+        baseurl=args.baseurl, 
+        new_workflow_api_key=args.new_api_key, 
+        old_workflow_api_key=args.old_api_key if args.mode == "both" else None,
+        wiki_excel_path=args.wiki_excel_path,
+        output_path=args.output_path,
+        max_workers=args.max_workers,
+        mode=args.mode
     )
     
     # 运行对比测试（带评判）
     output_file = tester.run_comparison(with_judge=True)
-    print(f"对比评判结果已保存至: {output_file}")
-
-# 单个问题测试示例
-# 使用新的工作流类进行测试
-# new_chat = NewWorkflowChat(api_key="app-qxsSybCs7ABiKlC1JabTYVn6", base_url="http://172.20.0.145/v1")
-# result = new_chat.process_question("如何新建配电线路工程")
-# print(json.dumps(result, ensure_ascii=False, indent=2))
+    print(f"测试结果已保存至: {output_file}")