diff --git a/rag2_0/dify/WorkorderToDify.py b/rag2_0/dify/WorkorderToDify.py index 3e1bcaf..c3e56f7 100644 --- a/rag2_0/dify/WorkorderToDify.py +++ b/rag2_0/dify/WorkorderToDify.py @@ -4,7 +4,8 @@ import datetime import logging import concurrent.futures import threading - +from dotenv import load_dotenv +load_dotenv() # 配置日志 logging.basicConfig( level=logging.INFO, @@ -22,7 +23,7 @@ import pandas as pd class WorkorderToDify: - def __init__(self, excel_path="data/excel/2025.1-6月工单(人工整理后).xlsx"): + def __init__(self, excel_path="data/excel/2025.1-6月工单(已补充标签)-人工删减.xlsx"): self.pd_data = pd.read_excel(excel_path) self.dify_api = DifyApi.DifyApi() self.dataset_ids = {} @@ -162,9 +163,6 @@ class WorkorderToDify: result_workorders = [] for query, data in merged_workorders.items(): workorder = data["workorder"].copy() - # 合并所有内容,使用换行符和分隔符分隔 - if len(data["contents"])>1: - breakpoint() merged_content = "\n\n---\n\n".join(data["contents"]) workorder["content"] = merged_content result_workorders.append(workorder) @@ -174,6 +172,11 @@ class WorkorderToDify: logging.info("所有技能组工单去重处理完成") + def del_all_document(self): + """删除所有文档""" + for skill_group, dataset_id in self.dataset_ids.items(): + self.dify_api.remove_dataset_all_doc(dataset_id) + def upload_workorders(self): """上传每个技能组的工单作为独立文档""" logging.info("开始上传工单文档") @@ -185,7 +188,7 @@ class WorkorderToDify: lock = threading.Lock() # 创建一个线程池 - max_workers = min(20, os.cpu_count() * 5) # 最多20个线程,或者CPU核心数的5倍 + max_workers = min(20, os.cpu_count() * 2) # 最多20个线程,或者CPU核心数的5倍 logging.info(f"创建线程池,最大线程数: {max_workers}") def upload_document(args): @@ -260,6 +263,7 @@ class WorkorderToDify: self.check_and_create_metadata() self.classify_workorders() self.deduplicate_workorders() + # self.del_all_document() self.upload_workorders() diff --git a/rag2_0/dify/intent_recognition_api.py b/rag2_0/dify/intent_recognition_api.py index ccccb0a..c0687f6 100755 --- a/rag2_0/dify/intent_recognition_api.py +++ b/rag2_0/dify/intent_recognition_api.py @@ -67,6 +67,7 @@ class IntentRecognizeResponse(BaseModel): has_slot_filling: bool = False slot_filling: SlotFillingResponse = Field(default_factory=SlotFillingResponse) query_expand: QueryExpandResponse = Field(default_factory=QueryExpandResponse) + dinge_qingdan_info: Dict[str, Any] = Field(default_factory=dict) # 创建FastAPI应用 app = FastAPI( @@ -118,6 +119,7 @@ async def intent_recognize(request: IntentRecognizeRequest): enable_query_expansion=enable_query_expansion, cur_soft_name=current_softname ) + dinge_qingdan_info = result["dinge_qingdan_info"] end_time = time.time() current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S %z") @@ -152,7 +154,8 @@ async def intent_recognize(request: IntentRecognizeRequest): missing_slots=slot_filling.get("missing_slots", {}), filled_data=slot_filling.get("filled_data", {}) ), - query_expand=QueryExpandResponse(**result["query_expand"]) + query_expand=QueryExpandResponse(**result["query_expand"]), + dinge_qingdan_info=dinge_qingdan_info ) return response @@ -174,7 +177,7 @@ if __name__ == "__main__": uvicorn.run( "rag2_0.dify.intent_recognition_api:app", host="0.0.0.0", - port=8001, + port=9001, reload=True, # 开发环境启用热重载 workers=1 # 生产环境可以增加worker数量 ) diff --git a/rag2_0/intent_recognition/IntentRecognition.py b/rag2_0/intent_recognition/IntentRecognition.py index 8c04089..53c33fc 100755 --- a/rag2_0/intent_recognition/IntentRecognition.py +++ b/rag2_0/intent_recognition/IntentRecognition.py @@ -64,7 +64,7 @@ class AsyncIntentRecognizer: model_name = os.getenv("MODEL_NAME", "gpt-3.5-turbo") # 初始化LLM llm_params = { - "temperature": 0.2, # 降低随机性,使结果更确定 + "temperature": 0.4, # 降低随机性,使结果更确定 "top_p": 0.7, "model": model_name, "api_key": api_key, diff --git a/rag2_0/intent_recognition/PromptTemplates.py b/rag2_0/intent_recognition/PromptTemplates.py index 6ceb363..03ed6cc 100755 --- a/rag2_0/intent_recognition/PromptTemplates.py +++ b/rag2_0/intent_recognition/PromptTemplates.py @@ -54,10 +54,14 @@ classification_info="""【垂直领域分类】: 3. 安装下载类:安装下载咨询、组件(插件)选择、环境配置、安装包下载、政策文件(规范文件)下载等 4. 问题排查类:软件安装下载失败、报错,系统兼容性问题等 -【固定话术类包括以下一类】: -1. 规费咨询:咨询规费(规费、社保费、公积金费等)费用考虑、构成、设置等。才属于该类 -规费报表在哪显示-->属于软件功能 -规费怎么设置、社保费是多少-->属于规费咨询 +【固定话术类包括以下类】: + +1. 规费咨询 + **以下两种情况才属于该类** + 1、当询问规费(如社会保障费和住房公积金)费率是/填多少 + 2、去哪里获取规费费率 + **其余涉及规费的属于其他垂直领域分类** + 【其他】: 1. 其他