QueryRewrite/rag2_0/dify/chat_dify_by_workorder.py

from rag2_0.dify.dify_tool import NewWorkflowChat
import pandas as pd
from concurrent.futures import ThreadPoolExecutor
from tqdm import tqdm
import concurrent.futures


class ChatDifyByWorkorder:

    def __init__(self, api_key=None, base_url="https://api.dify.ai/v1") -> None:
        """
        初始化ChatDifyByWorkorder类

        Args:
            api_key: Dify API密钥，默认为None
            base_url: Dify API的基础URL，默认为"https://api.dify.ai/v1"
        """
        baseurl = "http://172.20.0.145/v1"
        new_workflow_api_key = "app-qxsSybCs7ABiKlC1JabTYVn6"
        self.new_chat = NewWorkflowChat(api_key=new_workflow_api_key, base_url=baseurl)
        self.new_chat_answer = NewWorkflowChat(api_key=new_workflow_api_key, base_url=baseurl)


    def get_soft_name(self, row) -> str:
        if "博微配网计价通D3" in row["产品线"]:
            return "博微配网计价通D3"
        elif "博微电力建设计价通软件" in row["产品线"]:
            return "电力建设计价通软件"
        elif "新能源系列" in row["产品线"] and "博微新型储能电站建设计价通C1软件" in row["产品名称"]:
            return "储能C1软件"
        elif "博微西藏计价通Z1" in row["产品线"]:
            return "西藏计价通Z1"
        elif "博微技改检修计价通T1软件" in row["产品线"] and "技改检修计价通T1软件-概预算" in row["产品名称"]:
            return "技改检修工程计价通T1"
        elif "博微技改检修计价通T1软件" in row["产品线"] and "技改检修计价通T1软件-清单" in row["产品名称"]:
            return "检修清单计价通T1"
        return ""

    def process_query(self, q:str) -> dict:
        """
        发送问题并获取回答及相关工作流信息

        Args:
            q: 用户问题

        Returns:
            dict: 包含问题、回答和工作流信息的字典
        """
        retry_count = 0
        max_retries = 2

        while retry_count <= max_retries:
            try:
                # 发送问题获取回答和消息ID
                result = self.new_chat.process_question(q)
                return result
            except Exception as e:
                retry_count += 1
                if retry_count <= max_retries:
                    continue
                else:
                    raise e

    def process_answer(self, q:str) -> dict:
        """
        发送问题并获取回答及相关工作流信息

        Args:
            q: 用户问题

        Returns:
            dict: 包含问题、回答和工作流信息的字典
        """
        retry_count = 0
        max_retries = 2

        while retry_count <= max_retries:
            try:
                # 发送问题获取回答和消息ID
                result = self.new_chat_answer.process_question(q)
                return result
            except Exception as e:
                retry_count += 1
                if retry_count <= max_retries:
                    continue
                else:
                    raise

    def process_row(self, row):
        """处理单行数据"""
        soft_name = self.get_soft_name(row=row)
        if soft_name == "":
            return None

        # 使用线程池并发执行查询
        with ThreadPoolExecutor() as executor:
            try:
                # 提交两个任务并获取Future对象
                query_future = executor.submit(self.process_query, q=f"{soft_name},{row['客户问题']}")
                answer_future = executor.submit(self.process_answer, q=f"{soft_name},{row['解决方案']}")

                # 获取结果
                query_result = query_future.result()
                answer_result = answer_future.result()
            except Exception as e:
                print(f"处理工单 {row.get('工单编号', '未知')} 时发生错误: {str(e)}")
                return None

        worker_id = str(row["工单编号"])
        if query_result is None or answer_result is None:
            print("处理对话出现错误")
            return None

        worker_order_info = {
            "工单编号": worker_id,
            "用户问题": row['客户问题'],
            "解决方案": row['解决方案'],
            "AI回答": query_result["新流程答案"],
            "用户问题检索到的词条": query_result["新检索词条"],
            "解决方案检索到的词条": answer_result["新检索词条"],
        }
        return worker_order_info

    def run(self, excel_path:str):
        df_data = pd.read_excel(excel_path)
        list_worker_order_info = []

        # 创建进度条
        with tqdm(total=len(df_data), desc="处理工单") as pbar:
            # 创建线程池，最大并发数可以根据需要调整
            with ThreadPoolExecutor(max_workers=5) as executor:
                # 提交所有任务
                future_to_row = {executor.submit(self.process_row, row): idx for idx, row in df_data.iterrows()}

                # 处理完成的任务
                for future in concurrent.futures.as_completed(future_to_row):
                    result = future.result()
                    if result is not None:
                        list_worker_order_info.append(result)
                    pbar.update(1)

        return list_worker_order_info


if __name__=="__main__":
    worker_chat = ChatDifyByWorkorder()
    result = worker_chat.run(excel_path="data/excel/工单记录_均衡提取2000条.xlsx")
    # 可以选择保存结果到Excel
    if result:
        pd.DataFrame(result).to_excel("data/excel/工单处理结果.xlsx", index=False)