1、调整规费相关问题的分类

2、意图识别增加清单、定额字段
This commit is contained in:
2025-08-21 17:51:55 +08:00
parent 1a3fa44522
commit 53ac47f4a5
4 changed files with 24 additions and 13 deletions
+10 -6
View File
@@ -4,7 +4,8 @@ import datetime
import logging
import concurrent.futures
import threading
from dotenv import load_dotenv
load_dotenv()
# 配置日志
logging.basicConfig(
level=logging.INFO,
@@ -22,7 +23,7 @@ import pandas as pd
class WorkorderToDify:
def __init__(self, excel_path="data/excel/2025.1-6月工单(人工整理后).xlsx"):
def __init__(self, excel_path="data/excel/2025.1-6月工单(已补充标签)-人工删减.xlsx"):
self.pd_data = pd.read_excel(excel_path)
self.dify_api = DifyApi.DifyApi()
self.dataset_ids = {}
@@ -162,9 +163,6 @@ class WorkorderToDify:
result_workorders = []
for query, data in merged_workorders.items():
workorder = data["workorder"].copy()
# 合并所有内容,使用换行符和分隔符分隔
if len(data["contents"])>1:
breakpoint()
merged_content = "\n\n---\n\n".join(data["contents"])
workorder["content"] = merged_content
result_workorders.append(workorder)
@@ -174,6 +172,11 @@ class WorkorderToDify:
logging.info("所有技能组工单去重处理完成")
def del_all_document(self):
"""删除所有文档"""
for skill_group, dataset_id in self.dataset_ids.items():
self.dify_api.remove_dataset_all_doc(dataset_id)
def upload_workorders(self):
"""上传每个技能组的工单作为独立文档"""
logging.info("开始上传工单文档")
@@ -185,7 +188,7 @@ class WorkorderToDify:
lock = threading.Lock()
# 创建一个线程池
max_workers = min(20, os.cpu_count() * 5) # 最多20个线程,或者CPU核心数的5倍
max_workers = min(20, os.cpu_count() * 2) # 最多20个线程,或者CPU核心数的5倍
logging.info(f"创建线程池,最大线程数: {max_workers}")
def upload_document(args):
@@ -260,6 +263,7 @@ class WorkorderToDify:
self.check_and_create_metadata()
self.classify_workorders()
self.deduplicate_workorders()
# self.del_all_document()
self.upload_workorders()
+5 -2
View File
@@ -67,6 +67,7 @@ class IntentRecognizeResponse(BaseModel):
has_slot_filling: bool = False
slot_filling: SlotFillingResponse = Field(default_factory=SlotFillingResponse)
query_expand: QueryExpandResponse = Field(default_factory=QueryExpandResponse)
dinge_qingdan_info: Dict[str, Any] = Field(default_factory=dict)
# 创建FastAPI应用
app = FastAPI(
@@ -118,6 +119,7 @@ async def intent_recognize(request: IntentRecognizeRequest):
enable_query_expansion=enable_query_expansion,
cur_soft_name=current_softname
)
dinge_qingdan_info = result["dinge_qingdan_info"]
end_time = time.time()
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S %z")
@@ -152,7 +154,8 @@ async def intent_recognize(request: IntentRecognizeRequest):
missing_slots=slot_filling.get("missing_slots", {}),
filled_data=slot_filling.get("filled_data", {})
),
query_expand=QueryExpandResponse(**result["query_expand"])
query_expand=QueryExpandResponse(**result["query_expand"]),
dinge_qingdan_info=dinge_qingdan_info
)
return response
@@ -174,7 +177,7 @@ if __name__ == "__main__":
uvicorn.run(
"rag2_0.dify.intent_recognition_api:app",
host="0.0.0.0",
port=8001,
port=9001,
reload=True, # 开发环境启用热重载
workers=1 # 生产环境可以增加worker数量
)