diff --git a/data/nouns/merged_nouns.json b/data/nouns/merged_nouns.json index 5b44458..78af049 100644 --- a/data/nouns/merged_nouns.json +++ b/data/nouns/merged_nouns.json @@ -16416,13 +16416,6 @@ "synonymous": [], "description": "中标清单中的材料或机械消耗量" }, - { - "name": "清单项", - "synonymous": [ - "清单项目" - ], - "description": "工程量清单中的具体项目" - }, { "name": "结算工程解锁", "synonymous": [], @@ -17476,13 +17469,6 @@ "synonymous": [], "description": "软件中用于输出各类成果文件或展示工程计价结果的界面" }, - { - "name": "南方电网接口格式", - "synonymous": [ - "南网规约接口" - ], - "description": "符合南方电网数据交换标准的接口规范,用于导出符合南方电网规范的接口数据,以便上传至基建一体化信息系统。" - }, { "name": "投标限价数据", "synonymous": [], diff --git a/data/nouns/professional_nouns_index/index.faiss b/data/nouns/professional_nouns_index/index.faiss index d8c5910..8599386 100644 Binary files a/data/nouns/professional_nouns_index/index.faiss and b/data/nouns/professional_nouns_index/index.faiss differ diff --git a/data/nouns/professional_nouns_index/index.pkl b/data/nouns/professional_nouns_index/index.pkl index 711e251..462a759 100644 Binary files a/data/nouns/professional_nouns_index/index.pkl and b/data/nouns/professional_nouns_index/index.pkl differ diff --git a/rag2_0/dify/DifyQueryRetrieval.py b/rag2_0/dify/DifyQueryRetrieval.py index d524287..38b8568 100644 --- a/rag2_0/dify/DifyQueryRetrieval.py +++ b/rag2_0/dify/DifyQueryRetrieval.py @@ -41,7 +41,7 @@ class DifyQueryRetrieval: def retrieve_by_dataset(self, query: str, dataset_name: str) -> List[Dict[str, Any]]: try: knowledge_base_client = KnowledgeBaseClient(api_key=self._api_key, base_url=self._base_url, dataset_id=self._datasets_list[dataset_name]) - documents = knowledge_base_client.retrieve(query) + documents = knowledge_base_client.retrieve(query, timeout=300) retrieved_documents = documents.json().get("records", []) # 添加数据集信息 diff --git a/rag2_0/dify/export_new_dify.py b/rag2_0/dify/export_new_dify.py index 6385dce..6eaad46 100644 --- a/rag2_0/dify/export_new_dify.py +++ b/rag2_0/dify/export_new_dify.py @@ -117,9 +117,14 @@ class DifyExporter: intent_result = json.loads(intent_node_execution_info[0]["outputs"]) vertical_classification = intent_result.get("vertical_classification", "") sub_classification = intent_result.get("sub_classification", "") - if vertical_classification == "固定话术类": + if sub_classification == "固定话术类": return "使用固定话术" + worker_node_execution_info = [node_execution_info for node_execution_info in msg_debug_info['workflow_node_executions_info'] + if node_execution_info["title"] == "检索工单数据"] + if len(worker_node_execution_info) != 0: + return "检索工单" + return "" def get_node_info_by_title(self, workflow_node_executions_info:list, title:str) -> dict: @@ -198,13 +203,12 @@ class DifyExporter: return None wiki_list = self.get_wiki_list(msg_debug_info) - # 获取备注 - remark = self.get_remark(msg_debug_info) - wiki_list = list(set(wiki_list)) - wiki_list_str = "\n".join(wiki_list) - if wiki_list_str == "": - wiki_list_str = "无" + if len(wiki_list) ==0: + wiki_list_str = self.get_remark(msg_debug_info) + else: + wiki_list = list(set(wiki_list)) + wiki_list_str = "\n".join(wiki_list) rating = self.dify_pgsql.get_message_rating(msg_id) # 直接通过字典键获取query_type workflow_run_id = message['workflow_run_id'] @@ -220,7 +224,6 @@ class DifyExporter: "评价": rating, "问题分类": query_type, "检索到的词条": wiki_list_str, - "备注": remark } def process_conversations(self): @@ -274,7 +277,7 @@ class DifyExporter: # 设置列的顺序 columns_order = [ "msg_id","当前软件", "提问", "回答", "提问人", "提问时间", - "评价", "问题分类", "检索到的词条", "备注" + "评价", "问题分类", "检索到的词条" ] # 确保所有列都存在,如果不存在则添加空列 diff --git a/rag2_0/tool/ModelTool.py b/rag2_0/tool/ModelTool.py index 86a5472..0978d86 100755 --- a/rag2_0/tool/ModelTool.py +++ b/rag2_0/tool/ModelTool.py @@ -38,7 +38,7 @@ class SiliconFlowEmbeddings(Embeddings): "input": input, "encoding_format": "float" } - response = requests.post(self.url, json=payload, headers=self.headers) + response = requests.post(self.url, json=payload, headers=self.headers, timeout=300) response.raise_for_status() data = response.json() return [item["embedding"] for item in data["data"]] @@ -50,7 +50,7 @@ class SiliconFlowEmbeddings(Embeddings): "input": input, "encoding_format": "float" } - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(timeout=300) as client: response = await client.post(self.url, json=payload, headers=self.headers) response.raise_for_status() data = response.json() @@ -101,7 +101,7 @@ class SiliconFlowReRankerModel: "Content-Type": "application/json" } try: - response = requests.post(url, json=payload, headers=headers) + response = requests.post(url, json=payload, headers=headers, timeout=300) response.raise_for_status() results = response.json() return [{"document": item["document"]["text"], "score": item["relevance_score"], "index": item["index"]} for item in results["results"]] @@ -138,7 +138,7 @@ class SiliconFlowReRankerModel: "Content-Type": "application/json" } try: - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(timeout=300) as client: response = await client.post(url, json=payload, headers=headers) response.raise_for_status() results = response.json() @@ -173,7 +173,7 @@ class XinferenceReRankerModel: } try: - response = requests.post(url, json=params, headers=headers) + response = requests.post(url, json=params, headers=headers, timeout=300) response.raise_for_status() # 检查响应状态 results = response.json() @@ -206,7 +206,7 @@ class XinferenceReRankerModel: } try: - async with httpx.AsyncClient() as client: + async with httpx.AsyncClient(timeout=300) as client: response = await client.post(url, json=params, headers=headers) response.raise_for_status() # 检查响应状态 results = response.json()