优化DifyQueryRetrieval和ModelTool中的请求超时设置，调整DifyExporter类中的分类逻辑，移除无用字段，更新词条处理逻辑，

2025-07-16 16:26:46 +08:00
parent a934f2c398
commit 8a58fef1a7
6 changed files with 19 additions and 30 deletions
@@ -16416,13 +16416,6 @@
    "synonymous": [],
    "description": "中标清单中的材料或机械消耗量"
  },
  {
    "name": "清单项",
    "synonymous": [
      "清单项目"
    ],
    "description": "工程量清单中的具体项目"
  },
  {
    "name": "结算工程解锁",
    "synonymous": [],
@@ -17476,13 +17469,6 @@
    "synonymous": [],
    "description": "软件中用于输出各类成果文件或展示工程计价结果的界面"
  },
  {
    "name": "南方电网接口格式",
    "synonymous": [
      "南网规约接口"
    ],
    "description": "符合南方电网数据交换标准的接口规范，用于导出符合南方电网规范的接口数据，以便上传至基建一体化信息系统。"
  },
  {
    "name": "投标限价数据",
    "synonymous": [],
@@ -41,7 +41,7 @@ class DifyQueryRetrieval:
    def retrieve_by_dataset(self, query: str, dataset_name: str) -> List[Dict[str, Any]]:
        try:
            knowledge_base_client = KnowledgeBaseClient(api_key=self._api_key, base_url=self._base_url, dataset_id=self._datasets_list[dataset_name])
-            documents = knowledge_base_client.retrieve(query)
+            documents = knowledge_base_client.retrieve(query, timeout=300)
            retrieved_documents = documents.json().get("records", [])
            # 添加数据集信息
@@ -117,9 +117,14 @@ class DifyExporter:
        intent_result = json.loads(intent_node_execution_info[0]["outputs"])
        vertical_classification = intent_result.get("vertical_classification", "")
        sub_classification = intent_result.get("sub_classification", "")
-        if vertical_classification == "固定话术类":
+        if sub_classification == "固定话术类":
            return "使用固定话术"
        worker_node_execution_info = [node_execution_info for node_execution_info in msg_debug_info['workflow_node_executions_info'] 
                                    if node_execution_info["title"] == "检索工单数据"]
        if len(worker_node_execution_info) != 0:   
            return "检索工单"
        return ""
    def get_node_info_by_title(self, workflow_node_executions_info:list, title:str) -> dict:
@@ -198,13 +203,12 @@ class DifyExporter:
            return None
        wiki_list = self.get_wiki_list(msg_debug_info)
        # 获取备注
        remark = self.get_remark(msg_debug_info)
        if len(wiki_list) ==0:
            wiki_list_str = self.get_remark(msg_debug_info)
        else:
            wiki_list = list(set(wiki_list))
            wiki_list_str = "\n".join(wiki_list)
        if wiki_list_str == "":
            wiki_list_str = "无"
        rating = self.dify_pgsql.get_message_rating(msg_id)
        # 直接通过字典键获取query_type
        workflow_run_id = message['workflow_run_id']
@@ -220,7 +224,6 @@ class DifyExporter:
            "评价": rating,
            "问题分类": query_type,
            "检索到的词条": wiki_list_str,
            "备注": remark
        }
    def process_conversations(self):
@@ -274,7 +277,7 @@ class DifyExporter:
        # 设置列的顺序
        columns_order = [
            "msg_id","当前软件", "提问", "回答", "提问人", "提问时间", 
-            "评价", "问题分类", "检索到的词条", "备注"
+            "评价", "问题分类", "检索到的词条"
        ]
        # 确保所有列都存在，如果不存在则添加空列
@@ -38,7 +38,7 @@ class SiliconFlowEmbeddings(Embeddings):
            "input": input,
            "encoding_format": "float"
        }
-        response = requests.post(self.url, json=payload, headers=self.headers)
+        response = requests.post(self.url, json=payload, headers=self.headers, timeout=300)
        response.raise_for_status()
        data = response.json()
        return [item["embedding"] for item in data["data"]]
@@ -50,7 +50,7 @@ class SiliconFlowEmbeddings(Embeddings):
            "input": input,
            "encoding_format": "float"
        }
-        async with httpx.AsyncClient() as client:
+        async with httpx.AsyncClient(timeout=300) as client:
            response = await client.post(self.url, json=payload, headers=self.headers)
            response.raise_for_status()
            data = response.json()
@@ -101,7 +101,7 @@ class SiliconFlowReRankerModel:
            "Content-Type": "application/json"
        }
        try:
-            response = requests.post(url, json=payload, headers=headers)
+            response = requests.post(url, json=payload, headers=headers, timeout=300)
            response.raise_for_status()
            results = response.json()
            return [{"document": item["document"]["text"], "score": item["relevance_score"], "index": item["index"]} for item in results["results"]]
@@ -138,7 +138,7 @@ class SiliconFlowReRankerModel:
            "Content-Type": "application/json"
        }
        try:
-            async with httpx.AsyncClient() as client:
+            async with httpx.AsyncClient(timeout=300) as client:
                response = await client.post(url, json=payload, headers=headers)
                response.raise_for_status()
                results = response.json()
@@ -173,7 +173,7 @@ class XinferenceReRankerModel:
        }
        try:
-            response = requests.post(url, json=params, headers=headers)
+            response = requests.post(url, json=params, headers=headers, timeout=300)
            response.raise_for_status()  # 检查响应状态
            results = response.json()
@@ -206,7 +206,7 @@ class XinferenceReRankerModel:
        }
        try:
-            async with httpx.AsyncClient() as client:
+            async with httpx.AsyncClient(timeout=300) as client:
                response = await client.post(url, json=params, headers=headers)
                response.raise_for_status()  # 检查响应状态
                results = response.json()