From 82724d206b50700971bbefeec43f8e4235b3a22f Mon Sep 17 00:00:00 2001 From: ouyangyouzhang Date: Tue, 15 Jul 2025 09:27:44 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0=E6=97=A5=E5=BF=97=E7=9B=AE?= =?UTF-8?q?=E5=BD=95=E5=88=9B=E5=BB=BA=E9=80=BB=E8=BE=91=EF=BC=8C=E8=B0=83?= =?UTF-8?q?=E6=95=B4=E5=AF=B9=E8=AF=9D=E5=88=B0=E5=B7=A5=E5=8D=95=E7=9A=84?= =?UTF-8?q?=E6=97=A5=E6=9C=9F=E5=8F=82=E6=95=B0=E9=BB=98=E8=AE=A4=E5=80=BC?= =?UTF-8?q?=EF=BC=8C=E6=96=B0=E5=A2=9E=E5=AF=B9=E8=AF=9D=E8=AE=B0=E5=BD=95?= =?UTF-8?q?=E5=88=86=E6=9E=90=E5=8A=9F=E8=83=BD=EF=BC=8C=E4=BC=98=E5=8C=96?= =?UTF-8?q?API=E5=AF=86=E9=92=A5=E7=AE=A1=E7=90=86=E5=99=A8=E4=B8=AD?= =?UTF-8?q?=E7=9A=84=E8=B4=AD=E4=B9=B0=E4=BD=99=E9=A2=9D=E8=AE=A1=E7=AE=97?= =?UTF-8?q?=EF=BC=8C=E5=B9=B6=E6=B7=BB=E5=8A=A0=E5=A4=9A=E4=B8=AAAPI?= =?UTF-8?q?=E5=AF=86=E9=92=A5=E3=80=82=E5=90=8C=E6=97=B6=EF=BC=8C=E6=96=B0?= =?UTF-8?q?=E5=A2=9E=E6=95=B0=E6=8D=AE=E5=A4=84=E7=90=86=E5=92=8C=E5=88=86?= =?UTF-8?q?=E6=9E=90=E6=A8=A1=E5=9D=97=E4=BB=A5=E6=94=AF=E6=8C=81=E5=B7=A5?= =?UTF-8?q?=E5=8D=95=E9=97=AE=E7=AD=94=E6=95=B0=E6=8D=AE=E7=9A=84=E4=B8=8A?= =?UTF-8?q?=E4=BC=A0=E5=92=8C=E5=A4=84=E7=90=86=E3=80=82?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api_key.txt | 49 ----------- rag2_0/demo/dialogue_to_workorder.py | 11 +-- rag2_0/demo/heli_db_to_excel.py | 5 +- rag2_0/dify/AnalysisDifyAnswer.py | 116 +++++++++++++++++++++++++++ rag2_0/dify/WorkorderToDify.py | 29 +++++++ rag2_0/tool/APIKeyManager.py | 15 ++-- 6 files changed, 163 insertions(+), 62 deletions(-) create mode 100644 rag2_0/dify/AnalysisDifyAnswer.py create mode 100644 rag2_0/dify/WorkorderToDify.py diff --git a/api_key.txt b/api_key.txt index 7ea2aae..23bad89 100644 --- a/api_key.txt +++ b/api_key.txt @@ -1,52 +1,3 @@ -sk-uollmeyatyiwfzszvxkpyndmzfrbqjpyixewmrastbmaqbhy -sk-xdlsjytiwilvodadkjxvwdgulhhdytkqvfpyrcnllclgzqkb -sk-ffkltifkylutornjhwmnmfjsqsywrjibvujhjtjctzgnkvlp -sk-vmwocqqjqxnsvzmeyvqskahjaclifpmsbhywvnrvwygkfyuj -sk-gzwkmzxeeunaywrdrgirdatqhdtqdgvzqpesvprwbbjhcchn -sk-duchutcxmygrnkhzmmlykvtzwaylqtdxfbbuhvfvzuapazii -sk-nlddwexmjxqtgdvahwvlotnomrzcgskxeakxkxauicknzfkp -sk-lopwluipwvilwpwztvaxfebueeyilefwgncgpeprqvwazxom -sk-rgwrklpvhhrluokkbgavzukuhhpfhqzmozpjzoezfhkxyorc -sk-cdrpglnfmyeeqyhtvxvkpcpwscsbfouwkagjpphuksfzeipy -sk-eyktixexxjqvwufezcmdrazcedtsphyiunhqpamlkrcaxwtg -sk-euzbguamsxqspfdjnrpbchkjkouncqipjvhnkkbvoihgwspe -sk-qlpoqleqaodseswzqklbwjwwcdjrqthbmvweuablibiszpnw -sk-gqjtkwjmrupugviflhsffhkpzxxmjcewviqsneurxnlfqewy -sk-hkxgjpdyxuxjklksunfzaetrhveelucmrldfjnlztibxgjgl -sk-fqjkatqvpkmvlkbqhjfkzmiifmiodayututyprdtldszmacd -sk-wrybirbfwtbfdijjrfpdfxlxzmvcrhfgqqbhyuysibmcmcez -sk-vgkpsmbcchymktakjsheqnjlkopiqcvntqcvxuxmxlllifod -sk-afvrxowsmjmlhguhfpfhmefcldzsohmnyumjdwwkrvzwjrym -sk-jwfemfolekojghqwfwvjxwmzdvdnhaznngqeurvzgwqepddp -sk-vxzwcizvmykzwbxaypvshdkbgjymnmelryyqftkxzxnnpibr -sk-bwcbyczfakqkwrsvmmraeikjfpiurhhanflfqkunpzezvagv -sk-dicoheyoylocrtjsacbvwqzwwibnnasuwhnhsudoemieengi -sk-mvuzqvuqmryqrqolztwthhzcnzibygjycdbeplbbcnbxkncq -sk-qavyzqmnfjbhqhuesfkjcyszkzmjsykpnaebuwzcxhkitoir -sk-jhrbmqmzyrlrqyehkkachwhztkuisphqbmoyvchmaqrcfxtf -sk-mtcfttyljuzteasdjbcrtwwwmqxetmaonbwvojzfijzfiplq -sk-iooturknmbpxhbiulovovtzpyayovyidifzpzkjgqacroxjt -sk-jgjhasobxeuzfgutdyuhcejocwdiwonhkdithchkonhxnawc -sk-mvyqkciotllopyozsfzwtjeicuhnvoihnrrxadfsfiakperx -sk-wticrisyjehvnlrlhmmxhrnlzknqpnkfxowlzvnozskvtvzc -sk-hiniqrfvuqlsgmhrqlezlribsaqdefuhpxxfavoqtszxtasw -sk-zlfncovfrpzczmjquirolpogdrzfarkcwwqkluvifwcvrezq -sk-blhnvpenedysngftlghrkxhweoququkvduikziuypzilpyrp -sk-rynvudqktwvnjleiahwdpeqqdkncsvawvjyicyiojoviiges -sk-ncskqkwomgqpfnfnachehkeaczzgiiuripeyzjrpnuzeosnn -sk-ijqdmtyeuqrbndqjggxyicfjrmpsgbsjwwkitgsmxqvcjrri -sk-ajgisqnkpgoiwxigrnjachusupagqpukteuknemhmnxsasre -sk-zsskrhjnoepgjngcsseklxfpwpozhenurtrluxlstxujdsti -sk-lubrliuefgmrxpfafdwrhzletyvhemqkpvriuuqncivlewgx -sk-pavwfgrpftdtzeiiaoousmkdptwujoeocyzzoudeqkfyoxks -sk-jfwhzkhpgxacedwxzkbwrwvlfqvnlhxaeyghjcmtshogsqub -sk-sqmaankkcsqpbtktdfcmpuxjxgarfzgvygdgxgztlmyxfpkp -sk-xhlgjmwmtkahrpdncwoqynjdrkekmsyftwqmomsodbggvbdd -sk-ynqqptobbeazmjyrmaytsvyczsqwrukpezizrlcloncxtwvc -sk-wswttgfrxrwijvqhctfilhvlxgdkgogrjhvjkdbzvqrocofa -sk-jdijeubeygjmqtxwryrbwmrpvqawinzwpcxodpolhcupzmpa -sk-xbloemctsowwicjvrtrrewreosnfojoijtygsfxfnjntridv -sk-isovavcefvkzlbjewnumeqqevmnoucojsxwskkitfktkemtq sk-vxrlvvdzgythgyycuqehdloubxcdwhgojpowgxvgxsstjtvk sk-krgctzbdqekohpowmvftsjswgpxnwxadezeosdspelmtmukx sk-slcgfmphmbqwuvshoaygfkfaxpzcabtlpkhvfqjodajuynsl diff --git a/rag2_0/demo/dialogue_to_workorder.py b/rag2_0/demo/dialogue_to_workorder.py index bcf13aa..c731993 100755 --- a/rag2_0/demo/dialogue_to_workorder.py +++ b/rag2_0/demo/dialogue_to_workorder.py @@ -21,14 +21,14 @@ sys.path.append(os.getcwd()) from rag2_0.tool.ModelTool import OpenAiLLM load_dotenv() - +os.makedirs("data/logs", exist_ok=True) # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ logging.StreamHandler(), - logging.FileHandler('dialogue_to_workorder.log', encoding='utf-8') + logging.FileHandler('data/logs/dialogue_to_workorder.log', encoding='utf-8') ] ) logger = logging.getLogger("dialogue_to_workorder") @@ -565,7 +565,7 @@ class DialogueToWorkorder: # 按照指定的列顺序重新排列DataFrame的列 columns_order = [ '工单编号', '产品线', '产品名称', '模块名称', '问题类型', - '客户问题', '解决方案', '是否抱怨', '是否投诉', '抱怨级别', + '客户问题', '解决方案', '是否抱怨', "抱怨内容", '是否投诉', '抱怨级别', '会话id', '访客昵称', '处理坐席', '创建时间' ] @@ -609,6 +609,7 @@ class DialogueToWorkorder: '客户问题': 20, '解决方案': 30, '是否抱怨': 9, + '抱怨内容': 30, '是否投诉': 9, '抱怨级别': 9, '会话id': 9, @@ -639,9 +640,9 @@ def parse_arguments(): help='产品详情Excel文件路径') parser.add_argument('--max_workers', type=int, default=16, help='并发处理线程数,默认为16') - parser.add_argument('--start_date', type=str, required=False,default="2025-05-25 00:00:00", + parser.add_argument('--start_date', type=str, required=False,default="2025-05-01 00:00:00", help='开始日期,格式为YYYY-MM-DD') - parser.add_argument('--end_date', type=str, required=False,default="2025-05-30 15:54", + parser.add_argument('--end_date', type=str, required=False,default="2025-05-24 23:59:59", help='结束日期,格式为YYYY-MM-DD') return parser.parse_args() diff --git a/rag2_0/demo/heli_db_to_excel.py b/rag2_0/demo/heli_db_to_excel.py index 7146933..d6a5b65 100755 --- a/rag2_0/demo/heli_db_to_excel.py +++ b/rag2_0/demo/heli_db_to_excel.py @@ -29,7 +29,7 @@ logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[ - logging.FileHandler('./data/log/mariadb_client.log'), + logging.FileHandler('./data/logs/mariadb_client.log'), logging.StreamHandler() ] ) @@ -267,6 +267,9 @@ class DataProcessor: # 转换为字典列表 result = [] for record in filtered_df.to_dict('records'): + # 如果上一条消息和当前消息的发送者、创建时间、消息内容相同,则跳过 + if result and result[-1]['会话id'] == record['SESSION_ID'] and result[-1]['消息发送者'] == record['message_sender'] and result[-1]['创建时间'] == record['CREATE_TIME'] and result[-1]['消息内容'] == record['processed_content']: + continue result.append({ "账号id": record["ACCOUNT"], "会话id": record["SESSION_ID"], diff --git a/rag2_0/dify/AnalysisDifyAnswer.py b/rag2_0/dify/AnalysisDifyAnswer.py new file mode 100644 index 0000000..6174634 --- /dev/null +++ b/rag2_0/dify/AnalysisDifyAnswer.py @@ -0,0 +1,116 @@ +import pandas as pd +import json + +from regex import search + +import ijson + +df = pd.read_excel("data/excel/已分析数据汇总(第一轮).xlsx") +df=df[df["评价"]=="dislike"] + +msg_id_list = df["msg_id"].tolist() +msg_debug_list = {} +# 流式解析 JSON 数组 +with open("data/excel/msg_debug_list.json", "r", encoding="utf-8") as f: + # 使用ijson.items直接获取顶层键值对 + for msg_id, data in ijson.kvitems(f, ''): + if msg_id in msg_id_list: + msg_debug_list[msg_id] = data + +def get_rewrite_query(intent_node_execution_info)->str: + outputs_result =json.loads(intent_node_execution_info['outputs']) + return outputs_result['optimize_query'] + +def judge_error_node_and_reason(intent_node_execution_info, knowledge_filter_node_execution_info_list, answer_wiki_name)->dict: + result = {"问题改写结果":None, "错误环节":None, "错误原因":None, "具体描述":None} + if answer_wiki_name is None or pd.isna(answer_wiki_name): + return result + + outputs_result =json.loads(intent_node_execution_info['outputs']) + result["问题改写结果"] = outputs_result['optimize_query'] + if outputs_result['is_complete'] == False: + result["错误环节"] = "槽点填充" + result["错误原因"] = f"槽点缺失" + result["具体描述"] = f"缺失内容:{outputs_result['missing_slots']}" + return result + + if len(knowledge_filter_node_execution_info_list) == 0: + return result + + knowledge_filter_node_execution_info=knowledge_filter_node_execution_info_list[0] + # 获取检索到的所有词条 + knowledge_filter_outputs = json.loads(knowledge_filter_node_execution_info['outputs']) + source_knowledge = knowledge_filter_outputs['source_kno'] + source_knowledge_title ="\n".join([item['title'] for item in source_knowledge]) + if answer_wiki_name not in source_knowledge_title: + result["错误环节"] = "知识检索" + result["错误原因"] = f"未检索到对应词条" + + # 获取词条名称及对应评分 + result["具体描述"] = "检索到的词条如下:\n" + for index, item in enumerate(source_knowledge): + result["具体描述"] += f"词条名称:{item['title'].split('/')[-1]},重排评分:{item['metadata']['score']:.2f}\n" + return result + + # 获取检索到的词条的metadata + knowledge_filter = knowledge_filter_outputs['knowledge_list_metadata'] + knowledge_filter_title ="\n".join([item['title'] for item in knowledge_filter]) + if answer_wiki_name not in knowledge_filter_title: + result["错误环节"] = "知识过滤" + result["错误原因"] = f"词条被过滤" + result["具体描述"] = "检索到的词条如下:\n" + for index, item in enumerate(source_knowledge): + result["具体描述"] += f"词条名称:{item['title'].split('/')[-1]},重排评分:{item['metadata']['score']:.2f}\n" + return result + + # 检索正确,回答错误 + result["错误环节"] = "生成错误" + result["错误原因"] = f"" + result["具体描述"] = f"" + return result + +df["问题改写结果"] = None +df["错误环节"] = None +df["错误原因"] = None +df["具体描述"] = None + +for index, row in df.iterrows(): + try: + msg_id = row["msg_id"] + answer = row["回答"] + query = row["提问"] + rating = row["评价"] + class_type = row["问题分类"] + dislike_reason = row["点踩原因"] + if dislike_reason is None or pd.isna(dislike_reason): + continue + + answer_wiki_name = row["关联词条"] + search_wiki = row["检索到的词条"] + node_executions_info = msg_debug_list[msg_id] + intent_node_execution_info = [node_execution_info for node_execution_info in node_executions_info + if node_execution_info["title"] == "意图识别结果解析"] + + knowledge_filter_node_execution_info_list = [node_execution_info for node_execution_info in node_executions_info + if node_execution_info["title"] == "提取处理后的知识"] + if len(intent_node_execution_info) == 0: + print(f"msg_id: {msg_id} 缺少节点信息") + continue + + rewrite_query = get_rewrite_query(intent_node_execution_info[0]) + df.loc[index, "问题改写结果"] = rewrite_query + if "有词条" not in dislike_reason: + continue + result = judge_error_node_and_reason(intent_node_execution_info[0], knowledge_filter_node_execution_info_list, answer_wiki_name) + for key, value in result.items(): + df.loc[index, key] = value + + except Exception as e: + print(f"msg_id: {msg_id} 处理失败: {e}") + continue + +df.to_excel("data/excel/已分析数据汇总(第一轮)_分析.xlsx", index=False) + + + + diff --git a/rag2_0/dify/WorkorderToDify.py b/rag2_0/dify/WorkorderToDify.py new file mode 100644 index 0000000..d74e01a --- /dev/null +++ b/rag2_0/dify/WorkorderToDify.py @@ -0,0 +1,29 @@ +import os +import sys + +sys.path.append(os.getcwd()) +import rag2_0.dify.dify_client.dify_api as DifyApi + +import pandas as pd +pd_data = pd.read_excel("data/excel/2025年5月30日到6月10号对话记录_转工单.xlsx") + + +dify_api = DifyApi.DifyApi() +dataset_id = dify_api.get_or_create_dataset_by_name("工单问答数据") +document_id = dify_api.upload_text_to_document(text_name="5月30日到6月10号对话工单", text="", dataset_id=dataset_id) + +segments_list=[] +for index, row in pd_data.iterrows(): + query = row["客户问题"] + answer = row["解决方案"] + if "存在抱怨" in answer: + answer = answer.split("存在抱怨")[0] + + content = f"问题:{query}\n解决方案:{answer}" + segments_list.append({ + "content": str(content), + "answer": "", + "keywords": [] + }) + +dify_api.add_document_segments(dataset_id=dataset_id, document_id=document_id, segments_list=segments_list) \ No newline at end of file diff --git a/rag2_0/tool/APIKeyManager.py b/rag2_0/tool/APIKeyManager.py index b380fac..b5f58ed 100755 --- a/rag2_0/tool/APIKeyManager.py +++ b/rag2_0/tool/APIKeyManager.py @@ -275,17 +275,18 @@ if __name__ == "__main__": stats = instance.get_usage_stats() all_balance=0.0 - buy_balance=19 * 10 * 14 # 购买18次,一次10条api_key,每个api_key有14元 + buy_balance=24 * 10 * 14 # 购买18次,一次10条api_key,每个api_key有14元 invalid_api_keys = [] for key, data in stats.items(): usage_stats = APIKeyManager.get_key_usage_stats(key) all_balance+=float(usage_stats['data']['balance']) - valid,err_info = APIKeyManager.get_valid_api_keys(key) - if not valid: - print(f"api_key:{key}---赠送余额:{usage_stats['data']['balance']}元---报错信息:{err_info}") - # invalid_api_keys.append(key) - else: - print(f"api_key:{key}---赠送余额:{usage_stats['data']['balance']}元") + # valid,err_info = APIKeyManager.get_valid_api_keys(key) + # if not valid: + # print(f"api_key:{key}---赠送余额:{usage_stats['data']['balance']}元---报错信息:{err_info}") + # # invalid_api_keys.append(key) + # else: + # print(f"api_key:{key}---赠送余额:{usage_stats['data']['balance']}元") + print(f"api_key:{key}---赠送余额:{usage_stats['data']['balance']}元") if float(usage_stats['data']['balance']) == 0: invalid_api_keys.append(key)