diff --git a/rag2_0/dify/AnalysisDifyAnswer.py b/rag2_0/dify/AnalysisDifyAnswer.py index f30f78c..1194eac 100644 --- a/rag2_0/dify/AnalysisDifyAnswer.py +++ b/rag2_0/dify/AnalysisDifyAnswer.py @@ -1,8 +1,5 @@ import pandas as pd import json - -from regex import search - import sys import os sys.path.append(os.getcwd()) diff --git a/rag2_0/dify/DifyQueryRetrieval_api.py b/rag2_0/dify/DifyQueryRetrieval_api.py index ca80adc..fa61f14 100644 --- a/rag2_0/dify/DifyQueryRetrieval_api.py +++ b/rag2_0/dify/DifyQueryRetrieval_api.py @@ -94,7 +94,7 @@ async def retrieve(request: RetrieveRequest): request.original_query, query_list, data_set_list, - top_k=3 + top_k=5 ) end_time = time.time() diff --git a/rag2_0/dify/WorkorderToDify.py b/rag2_0/dify/WorkorderToDify.py index dd994ce..3924140 100644 --- a/rag2_0/dify/WorkorderToDify.py +++ b/rag2_0/dify/WorkorderToDify.py @@ -5,25 +5,41 @@ sys.path.append(os.getcwd()) import rag2_0.dify.dify_client.dify_api as DifyApi import pandas as pd -pd_data = pd.read_excel("data/excel/2025年5月30日到6月10号对话记录_转工单.xlsx") +pd_data = pd.read_excel("data/excel/工单汇总(给AI)_2.xlsx") dify_api = DifyApi.DifyApi() -dataset_id = dify_api.get_or_create_dataset_by_name("工单问答数据") -document_id = dify_api.upload_text_to_document(text_name="5月30日到6月10号对话工单", text="", dataset_id=dataset_id) +peiwang_dataset_id = dify_api.get_or_create_dataset_by_name("配网工单数据") +zhuwang_dataset_id = dify_api.get_or_create_dataset_by_name("主网工单数据") +jianga_dataset_id = dify_api.get_or_create_dataset_by_name("技改工单数据") +chuneng_dataset_id = dify_api.get_or_create_dataset_by_name("储能工单数据") -segments_list=[] + +soft_segments_list={} for index, row in pd_data.iterrows(): query = row["客户问题"] answer = row["解决方案"] - if "存在抱怨" in answer: - answer = answer.split("存在抱怨")[0] + skill_group = row["技能组"] content = f"问题:{query}\n回答:{answer}" - segments_list.append({ + if skill_group not in soft_segments_list: + soft_segments_list[skill_group]=[] + soft_segments_list[skill_group].append({ "content": str(content), "answer": "", "keywords": [] }) -dify_api.add_document_segments(dataset_id=dataset_id, document_id=document_id, segments_list=segments_list) \ No newline at end of file +for skill_group, segments_list in soft_segments_list.items(): + if skill_group == "配网": + dataset_id = peiwang_dataset_id + elif skill_group == "主网": + dataset_id = zhuwang_dataset_id + elif skill_group == "技改": + dataset_id = jianga_dataset_id + elif skill_group == "储能": + dataset_id = chuneng_dataset_id + document_id = dify_api.get_document_id(dataset_id=dataset_id, document_name=f"{skill_group}工单数据") + if not document_id: + document_id = dify_api.upload_text_to_document(text_name=f"{skill_group}工单数据", text="", dataset_id=dataset_id) + dify_api.add_document_segments(dataset_id=dataset_id, document_id=document_id, segments_list=segments_list) \ No newline at end of file