优化数据处理逻辑,更新工单数据文件路径,增加多个工单数据集的创建与上传功能,同时调整查询参数以提高检索效果。
This commit is contained in:
@@ -1,8 +1,5 @@
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
import json
|
import json
|
||||||
|
|
||||||
from regex import search
|
|
||||||
|
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
sys.path.append(os.getcwd())
|
sys.path.append(os.getcwd())
|
||||||
|
|||||||
@@ -94,7 +94,7 @@ async def retrieve(request: RetrieveRequest):
|
|||||||
request.original_query,
|
request.original_query,
|
||||||
query_list,
|
query_list,
|
||||||
data_set_list,
|
data_set_list,
|
||||||
top_k=3
|
top_k=5
|
||||||
)
|
)
|
||||||
end_time = time.time()
|
end_time = time.time()
|
||||||
|
|
||||||
|
|||||||
@@ -5,25 +5,41 @@ sys.path.append(os.getcwd())
|
|||||||
import rag2_0.dify.dify_client.dify_api as DifyApi
|
import rag2_0.dify.dify_client.dify_api as DifyApi
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
pd_data = pd.read_excel("data/excel/2025年5月30日到6月10号对话记录_转工单.xlsx")
|
pd_data = pd.read_excel("data/excel/工单汇总(给AI)_2.xlsx")
|
||||||
|
|
||||||
|
|
||||||
dify_api = DifyApi.DifyApi()
|
dify_api = DifyApi.DifyApi()
|
||||||
dataset_id = dify_api.get_or_create_dataset_by_name("工单问答数据")
|
peiwang_dataset_id = dify_api.get_or_create_dataset_by_name("配网工单数据")
|
||||||
document_id = dify_api.upload_text_to_document(text_name="5月30日到6月10号对话工单", text="", dataset_id=dataset_id)
|
zhuwang_dataset_id = dify_api.get_or_create_dataset_by_name("主网工单数据")
|
||||||
|
jianga_dataset_id = dify_api.get_or_create_dataset_by_name("技改工单数据")
|
||||||
|
chuneng_dataset_id = dify_api.get_or_create_dataset_by_name("储能工单数据")
|
||||||
|
|
||||||
segments_list=[]
|
|
||||||
|
soft_segments_list={}
|
||||||
for index, row in pd_data.iterrows():
|
for index, row in pd_data.iterrows():
|
||||||
query = row["客户问题"]
|
query = row["客户问题"]
|
||||||
answer = row["解决方案"]
|
answer = row["解决方案"]
|
||||||
if "存在抱怨" in answer:
|
skill_group = row["技能组"]
|
||||||
answer = answer.split("存在抱怨")[0]
|
|
||||||
|
|
||||||
content = f"问题:{query}\n回答:{answer}"
|
content = f"问题:{query}\n回答:{answer}"
|
||||||
segments_list.append({
|
if skill_group not in soft_segments_list:
|
||||||
|
soft_segments_list[skill_group]=[]
|
||||||
|
soft_segments_list[skill_group].append({
|
||||||
"content": str(content),
|
"content": str(content),
|
||||||
"answer": "",
|
"answer": "",
|
||||||
"keywords": []
|
"keywords": []
|
||||||
})
|
})
|
||||||
|
|
||||||
dify_api.add_document_segments(dataset_id=dataset_id, document_id=document_id, segments_list=segments_list)
|
for skill_group, segments_list in soft_segments_list.items():
|
||||||
|
if skill_group == "配网":
|
||||||
|
dataset_id = peiwang_dataset_id
|
||||||
|
elif skill_group == "主网":
|
||||||
|
dataset_id = zhuwang_dataset_id
|
||||||
|
elif skill_group == "技改":
|
||||||
|
dataset_id = jianga_dataset_id
|
||||||
|
elif skill_group == "储能":
|
||||||
|
dataset_id = chuneng_dataset_id
|
||||||
|
document_id = dify_api.get_document_id(dataset_id=dataset_id, document_name=f"{skill_group}工单数据")
|
||||||
|
if not document_id:
|
||||||
|
document_id = dify_api.upload_text_to_document(text_name=f"{skill_group}工单数据", text="", dataset_id=dataset_id)
|
||||||
|
dify_api.add_document_segments(dataset_id=dataset_id, document_id=document_id, segments_list=segments_list)
|
||||||
Reference in New Issue
Block a user