1、调整规费相关问题的分类

2、意图识别增加清单、定额字段
This commit is contained in:
2025-08-21 17:51:55 +08:00
parent 1a3fa44522
commit 53ac47f4a5
4 changed files with 24 additions and 13 deletions
+10 -6
View File
@@ -4,7 +4,8 @@ import datetime
import logging
import concurrent.futures
import threading
from dotenv import load_dotenv
load_dotenv()
# 配置日志
logging.basicConfig(
level=logging.INFO,
@@ -22,7 +23,7 @@ import pandas as pd
class WorkorderToDify:
def __init__(self, excel_path="data/excel/2025.1-6月工单(人工整理后).xlsx"):
def __init__(self, excel_path="data/excel/2025.1-6月工单(已补充标签)-人工删减.xlsx"):
self.pd_data = pd.read_excel(excel_path)
self.dify_api = DifyApi.DifyApi()
self.dataset_ids = {}
@@ -162,9 +163,6 @@ class WorkorderToDify:
result_workorders = []
for query, data in merged_workorders.items():
workorder = data["workorder"].copy()
# 合并所有内容,使用换行符和分隔符分隔
if len(data["contents"])>1:
breakpoint()
merged_content = "\n\n---\n\n".join(data["contents"])
workorder["content"] = merged_content
result_workorders.append(workorder)
@@ -174,6 +172,11 @@ class WorkorderToDify:
logging.info("所有技能组工单去重处理完成")
def del_all_document(self):
"""删除所有文档"""
for skill_group, dataset_id in self.dataset_ids.items():
self.dify_api.remove_dataset_all_doc(dataset_id)
def upload_workorders(self):
"""上传每个技能组的工单作为独立文档"""
logging.info("开始上传工单文档")
@@ -185,7 +188,7 @@ class WorkorderToDify:
lock = threading.Lock()
# 创建一个线程池
max_workers = min(20, os.cpu_count() * 5) # 最多20个线程,或者CPU核心数的5倍
max_workers = min(20, os.cpu_count() * 2) # 最多20个线程,或者CPU核心数的5倍
logging.info(f"创建线程池,最大线程数: {max_workers}")
def upload_document(args):
@@ -260,6 +263,7 @@ class WorkorderToDify:
self.check_and_create_metadata()
self.classify_workorders()
self.deduplicate_workorders()
# self.del_all_document()
self.upload_workorders()
+5 -2
View File
@@ -67,6 +67,7 @@ class IntentRecognizeResponse(BaseModel):
has_slot_filling: bool = False
slot_filling: SlotFillingResponse = Field(default_factory=SlotFillingResponse)
query_expand: QueryExpandResponse = Field(default_factory=QueryExpandResponse)
dinge_qingdan_info: Dict[str, Any] = Field(default_factory=dict)
# 创建FastAPI应用
app = FastAPI(
@@ -118,6 +119,7 @@ async def intent_recognize(request: IntentRecognizeRequest):
enable_query_expansion=enable_query_expansion,
cur_soft_name=current_softname
)
dinge_qingdan_info = result["dinge_qingdan_info"]
end_time = time.time()
current_time = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S %z")
@@ -152,7 +154,8 @@ async def intent_recognize(request: IntentRecognizeRequest):
missing_slots=slot_filling.get("missing_slots", {}),
filled_data=slot_filling.get("filled_data", {})
),
query_expand=QueryExpandResponse(**result["query_expand"])
query_expand=QueryExpandResponse(**result["query_expand"]),
dinge_qingdan_info=dinge_qingdan_info
)
return response
@@ -174,7 +177,7 @@ if __name__ == "__main__":
uvicorn.run(
"rag2_0.dify.intent_recognition_api:app",
host="0.0.0.0",
port=8001,
port=9001,
reload=True, # 开发环境启用热重载
workers=1 # 生产环境可以增加worker数量
)
@@ -64,7 +64,7 @@ class AsyncIntentRecognizer:
model_name = os.getenv("MODEL_NAME", "gpt-3.5-turbo")
# 初始化LLM
llm_params = {
"temperature": 0.2, # 降低随机性,使结果更确定
"temperature": 0.4, # 降低随机性,使结果更确定
"top_p": 0.7,
"model": model_name,
"api_key": api_key,
+8 -4
View File
@@ -54,10 +54,14 @@ classification_info="""【垂直领域分类】:
3. 安装下载类:安装下载咨询、组件(插件)选择、环境配置、安装包下载、政策文件(规范文件)下载等
4. 问题排查类:软件安装下载失败、报错,系统兼容性问题等
【固定话术类包括以下类】:
1. 规费咨询:咨询规费(规费、社保费、公积金费等)费用考虑、构成、设置等。才属于该类
规费报表在哪显示-->属于软件功能
规费怎么设置、社保费是多少-->属于规费咨询
【固定话术类包括以下类】:
1. 规费咨询
**以下两种情况才属于该类**
1、当询问规费(如社会保障费和住房公积金)费率是/填多少
2、去哪里获取规费费率
**其余涉及规费的属于其他垂直领域分类**
【其他】:
1. 其他