From 75c09925261621aab7354ea1c64ba63112bbaf3d Mon Sep 17 00:00:00 2001 From: ouyangyouzhang Date: Thu, 17 Jul 2025 14:13:41 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96=E6=95=B0=E6=8D=AE=E5=A4=84?= =?UTF-8?q?=E7=90=86=E9=80=BB=E8=BE=91=EF=BC=8C=E6=9B=B4=E6=96=B0=E5=B7=A5?= =?UTF-8?q?=E5=8D=95=E6=95=B0=E6=8D=AE=E6=96=87=E4=BB=B6=E8=B7=AF=E5=BE=84?= =?UTF-8?q?=EF=BC=8C=E5=A2=9E=E5=8A=A0=E5=A4=9A=E4=B8=AA=E5=B7=A5=E5=8D=95?= =?UTF-8?q?=E6=95=B0=E6=8D=AE=E9=9B=86=E7=9A=84=E5=88=9B=E5=BB=BA=E4=B8=8E?= =?UTF-8?q?=E4=B8=8A=E4=BC=A0=E5=8A=9F=E8=83=BD=EF=BC=8C=E5=90=8C=E6=97=B6?= =?UTF-8?q?=E8=B0=83=E6=95=B4=E6=9F=A5=E8=AF=A2=E5=8F=82=E6=95=B0=E4=BB=A5?= =?UTF-8?q?=E6=8F=90=E9=AB=98=E6=A3=80=E7=B4=A2=E6=95=88=E6=9E=9C=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- rag2_0/dify/AnalysisDifyAnswer.py | 3 --- rag2_0/dify/DifyQueryRetrieval_api.py | 2 +- rag2_0/dify/WorkorderToDify.py | 32 ++++++++++++++++++++------- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/rag2_0/dify/AnalysisDifyAnswer.py b/rag2_0/dify/AnalysisDifyAnswer.py index f30f78c..1194eac 100644 --- a/rag2_0/dify/AnalysisDifyAnswer.py +++ b/rag2_0/dify/AnalysisDifyAnswer.py @@ -1,8 +1,5 @@ import pandas as pd import json - -from regex import search - import sys import os sys.path.append(os.getcwd()) diff --git a/rag2_0/dify/DifyQueryRetrieval_api.py b/rag2_0/dify/DifyQueryRetrieval_api.py index ca80adc..fa61f14 100644 --- a/rag2_0/dify/DifyQueryRetrieval_api.py +++ b/rag2_0/dify/DifyQueryRetrieval_api.py @@ -94,7 +94,7 @@ async def retrieve(request: RetrieveRequest): request.original_query, query_list, data_set_list, - top_k=3 + top_k=5 ) end_time = time.time() diff --git a/rag2_0/dify/WorkorderToDify.py b/rag2_0/dify/WorkorderToDify.py index dd994ce..3924140 100644 --- a/rag2_0/dify/WorkorderToDify.py +++ b/rag2_0/dify/WorkorderToDify.py @@ -5,25 +5,41 @@ sys.path.append(os.getcwd()) import rag2_0.dify.dify_client.dify_api as DifyApi import pandas as pd -pd_data = pd.read_excel("data/excel/2025年5月30日到6月10号对话记录_转工单.xlsx") +pd_data = pd.read_excel("data/excel/工单汇总(给AI)_2.xlsx") dify_api = DifyApi.DifyApi() -dataset_id = dify_api.get_or_create_dataset_by_name("工单问答数据") -document_id = dify_api.upload_text_to_document(text_name="5月30日到6月10号对话工单", text="", dataset_id=dataset_id) +peiwang_dataset_id = dify_api.get_or_create_dataset_by_name("配网工单数据") +zhuwang_dataset_id = dify_api.get_or_create_dataset_by_name("主网工单数据") +jianga_dataset_id = dify_api.get_or_create_dataset_by_name("技改工单数据") +chuneng_dataset_id = dify_api.get_or_create_dataset_by_name("储能工单数据") -segments_list=[] + +soft_segments_list={} for index, row in pd_data.iterrows(): query = row["客户问题"] answer = row["解决方案"] - if "存在抱怨" in answer: - answer = answer.split("存在抱怨")[0] + skill_group = row["技能组"] content = f"问题:{query}\n回答:{answer}" - segments_list.append({ + if skill_group not in soft_segments_list: + soft_segments_list[skill_group]=[] + soft_segments_list[skill_group].append({ "content": str(content), "answer": "", "keywords": [] }) -dify_api.add_document_segments(dataset_id=dataset_id, document_id=document_id, segments_list=segments_list) \ No newline at end of file +for skill_group, segments_list in soft_segments_list.items(): + if skill_group == "配网": + dataset_id = peiwang_dataset_id + elif skill_group == "主网": + dataset_id = zhuwang_dataset_id + elif skill_group == "技改": + dataset_id = jianga_dataset_id + elif skill_group == "储能": + dataset_id = chuneng_dataset_id + document_id = dify_api.get_document_id(dataset_id=dataset_id, document_name=f"{skill_group}工单数据") + if not document_id: + document_id = dify_api.upload_text_to_document(text_name=f"{skill_group}工单数据", text="", dataset_id=dataset_id) + dify_api.add_document_segments(dataset_id=dataset_id, document_id=document_id, segments_list=segments_list) \ No newline at end of file