From 20207fdd1bc8eae1b8dc5eb1b5f4068d0feb9699 Mon Sep 17 00:00:00 2001 From: ouyangyouzhang Date: Fri, 27 Jun 2025 18:12:51 +0800 Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E6=96=B0API=E5=AF=86=E9=92=A5?= =?UTF-8?q?=EF=BC=8C=E4=BC=98=E5=8C=96=E6=84=8F=E5=9B=BE=E8=AF=86=E5=88=AB?= =?UTF-8?q?=E7=A4=BA=E4=BE=8B=EF=BC=8C=E8=B0=83=E6=95=B4=E6=96=87=E6=A1=A3?= =?UTF-8?q?=E7=9B=B8=E5=85=B3=E6=80=A7=E5=88=A4=E6=96=AD=E9=80=BB=E8=BE=91?= =?UTF-8?q?=EF=BC=8C=E5=A2=9E=E5=BC=BAExcel=E6=95=B0=E6=8D=AE=E9=AA=8C?= =?UTF-8?q?=E8=AF=81=E5=8A=9F=E8=83=BD=EF=BC=8C=E6=94=B9=E8=BF=9B=E6=97=A5?= =?UTF-8?q?=E5=BF=97=E8=AE=B0=E5=BD=95=EF=BC=8C?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- api_key.txt | 39 +++++++++++--- rag2_0/demo/intent_recognition_example.py | 54 +++++++------------ rag2_0/demo/validate_excel_data_batch.py | 2 +- .../Multi_PromptTemplates.py | 22 ++++---- rag2_0/tool/APIKeyManager.py | 8 +-- 5 files changed, 70 insertions(+), 55 deletions(-) diff --git a/api_key.txt b/api_key.txt index 62f8d7c..fe7ca6c 100644 --- a/api_key.txt +++ b/api_key.txt @@ -1,10 +1,3 @@ -sk-poszkbjdmamimconjustnrxxqusuzlryxkrzkpronlenrmen -sk-zolvcegarsrwqhwgvwzgtqupodsdmckjiocyvoyldbkusbzc -sk-ywfafulcniaqdgdcsnbtqquaqeuiqlkcnknkaflwxyuemcow -sk-gzdqfoyvulrqscdpjlwlufdecrsyjpmwpkknuhnjsvtyftox -sk-bkcufidsebujopqqwexwxwpmevrpelmvxzdymncvllcyojce -sk-olabhscekudzkyudypkcjvehwqunagubwdmtppugrjmcptwv -sk-zpdqyocliebhqpkuwvebpgcnfjdkvavdltimllmgkthwnwph sk-uollmeyatyiwfzszvxkpyndmzfrbqjpyixewmrastbmaqbhy sk-xdlsjytiwilvodadkjxvwdgulhhdytkqvfpyrcnllclgzqkb sk-ffkltifkylutornjhwmnmfjsqsywrjibvujhjtjctzgnkvlp @@ -54,4 +47,34 @@ sk-wswttgfrxrwijvqhctfilhvlxgdkgogrjhvjkdbzvqrocofa sk-jdijeubeygjmqtxwryrbwmrpvqawinzwpcxodpolhcupzmpa sk-xbloemctsowwicjvrtrrewreosnfojoijtygsfxfnjntridv sk-isovavcefvkzlbjewnumeqqevmnoucojsxwskkitfktkemtq -sk-vxrlvvdzgythgyycuqehdloubxcdwhgojpowgxvgxsstjtvk \ No newline at end of file +sk-vxrlvvdzgythgyycuqehdloubxcdwhgojpowgxvgxsstjtvk +sk-krgctzbdqekohpowmvftsjswgpxnwxadezeosdspelmtmukx +sk-slcgfmphmbqwuvshoaygfkfaxpzcabtlpkhvfqjodajuynsl +sk-qwcrwwxsdmiirrzvwfijgxqupdutypjfldtvikdwkqgwfucs +sk-nyynzajaubwtezsznzcfzzsevmfgpyjrsstckxeufrvzwwej +sk-ksyqqkwbcecqgforztombghpeknrlqdkegtzeezsnhtcpchy +sk-inhqizoumyusllkpovvokdfwvyavcpgpjtxcwrbcftquiqpv +sk-peqxiflijgltfxbyxyfquvuzrfcwlkauvjfkbexgndtwoyqf +sk-ohukjjrxmqmlrdmvyuudkpysblupmfjojnuyzwjkknvnjagg +sk-nmvrynrwqtvrnbdgaeexxrsskjvmsffjbbvikpsmngypwuwn +sk-ruecsoljheouotepobjeeenminndcwwyjdoquqrcxfirmxmt +sk-mtfoqujosppodwgdcbwyglsfylkhtydoyzfnzxfomndpcuyf +sk-uicolvfebfhklerkcfcgymcasqafamthlemhaqqnvqugorfg +sk-zxvqmszvbktxjsgbtuafrxkdebmdjhfijeohhepkguatgpos +sk-vmuaaesfvsvrljroauzysfqydsksklrrrenzhrtxvvqwcewl +sk-ewjtqzbiqmlihkpjqkppwackeswuvbqyzsheaversvsdqisz +sk-zmebjdwdbpyxtribyuusdgaojlvnwqnjpxdcawibmtgsnlrp +sk-vrijdtzxzroipovgowdqrahhiicptgwvdhkmmrcmubuukxca +sk-sqaflqxtnyliiyrkcxxotgksfettijawpkhvfqnaavqtjvrg +sk-koawwrtemsnjvyakmhrykdindvbxjbxuyfqunjqsoymlrsrr +sk-izjhcaimcsrsgytxvlaanrfxzmhpqiclbokhmhnzkrdicknv +sk-tfcrtsrzqeftrdaebdhmfzkwkchqjltkcutqoeeclmnoeemr +sk-jfqkxsfmnyynybqvzkkwmzwxcyjebgdeucdmodunitjgydhv +sk-rutrnfpicpzxnqloqgxgenevcooqyxibbdguvywuqcbpwyjt +sk-jrqvdlkrkwzdfiuvqlmgncblfaihwkhgshukwkxatsrclsfe +sk-rruiajpnseboawytxmvvughdqcrkqlqsjlrcfopwztljfiox +sk-neiwqzlwfxxdrjvictvlbvpkbbpbmyiooddevhnqkerzugpy +sk-zfqdpybvyeutrdwenvfbsehfebkaekoytpqcltulseavtntb +sk-sbcjflkkwscfxzrplbexxifgqtrotnaxtvuoqfrtfyrvhnhr +sk-jpkxknfffbucdhnqahowbpcwdhbrjaqfvrbgnekdyxiflqlu +sk-ylyjcnumxpwxolrwjpzvomlnmezwgxagobztqbjdylohxsvb \ No newline at end of file diff --git a/rag2_0/demo/intent_recognition_example.py b/rag2_0/demo/intent_recognition_example.py index 40ee2b1..186f2c5 100755 --- a/rag2_0/demo/intent_recognition_example.py +++ b/rag2_0/demo/intent_recognition_example.py @@ -16,7 +16,7 @@ from tqdm import tqdm import time import sys import argparse -from typing import List, Dict, Any, Optional +from typing import List, Dict, Any from langchain.output_parsers import PydanticOutputParser from pydantic import BaseModel, Field sys.path.append(os.getcwd()) @@ -28,7 +28,7 @@ from rag2_0.tool.ModelTool import OpenAiLLM load_dotenv() # 示例查询 -examples_query = """ PE2211PK0801是什么软件""" +examples_query = """T1软件中,配件和材料有什么区别""" conversation_context="" chat_history=[ { @@ -102,41 +102,30 @@ class QueryRewriteProcessor: doc_text_list = json.dumps(retrieved_doc, ensure_ascii=False, indent=2) class TempModel(BaseModel): - can_solve_problem: bool = Field(description="是否能解决用户问题") - relevance_score: int = Field(description="相关性评分,0-100分") + can_solve_problem: bool = Field(description="是否能解答用户问题") + relevance_score: int = Field(description="置信度评分,0-100分") explanation: str = Field(description="解释文档是否能解决(回答)提问") class all_relevant_document(BaseModel): - most_relevant_document: list[TempModel] = Field(description="最相关的文档的判断结果") + document_list: list[TempModel] = Field(description="每个文档的判断结果") parser = PydanticOutputParser(pydantic_object=all_relevant_document) # 构建提示词 - prompt = f"""请判断以下检索文档列表中是否与用户提问相关,能够解决用户的问题,并给出相关性评分(0-100分)。输出最相关的文档的判断结果。 + prompt = f"""请判断以下检索文档列表中是否解答用户提问,能够解决用户的问题,能够基于检索文档给出回答,并给出置信度评分(0-100分)。输出每个文档的判断结果。 + 用户提问: {query} -用户提问: {query} + 检索文档列表: + {doc_text_list} -检索文档列表: -{doc_text_list} - -请按照以下JSON格式返回结果: -json``` -{{ - "most_relevant_document":[{{ - "can_solve_problem": true, - "relevance_score": 60, - "explanation":"xxxx" - }}] -}} -``` - -""" - + 请按照以下JSON格式返回结果: + {parser.get_format_instructions()} + """ try: # 初始化LLM并调用 - llm = OpenAiLLM(api_key=self.api_key, base_url=self.base_url, model="deepseek-ai/DeepSeek-R1", response_format={"type": "json_object"}) + llm = OpenAiLLM(api_key=self.api_key, base_url=self.base_url, model="deepseek-ai/DeepSeek-R1") response = llm.invoke(prompt) - result_list = parser.parse(response.content).most_relevant_document + result_list = parser.parse(response.content).document_list # 如果列表为空,返回默认的不相关结果 if not result_list: @@ -145,9 +134,11 @@ json``` "explanation": "无法解析文档相关性结果", "relevance_score": 0.0 } - + true_document_list=[cur for cur in result_list if cur.can_solve_problem] + if len(true_document_list)==0: + true_document_list = result_list # 找出分数最高的文档 - max_score_doc = max(result_list, key=lambda x: x.relevance_score) + max_score_doc = max(true_document_list, key=lambda x: x.relevance_score) return { "is_relevant": max_score_doc.can_solve_problem, @@ -155,12 +146,7 @@ json``` "explanation": max_score_doc.explanation } except Exception as e: - logging.error(f"判断文档相关性时出错: {str(e)}", exc_info=True) - return { - "is_relevant": False, - "explanation": f"判断过程出错: {str(e)}", - "relevance_score": 0.0 - } + raise e def load_questions_from_excel(self, file_path=None): """ @@ -254,7 +240,7 @@ json``` "槽位信息": slot_filling_str, "检索的文档": "\n".join(retrieved_doc_titles), "检索的内容": json.dumps(retrieved_doc, ensure_ascii=False, indent=2) if retrieved_doc else "", - "文档是否相关": "相关" if relevance_result["is_relevant"] else "不相关", + "文档能否解决问题": "能" if relevance_result["is_relevant"] else "不能", "文档相关性解释": relevance_result["explanation"] } except Exception as e: diff --git a/rag2_0/demo/validate_excel_data_batch.py b/rag2_0/demo/validate_excel_data_batch.py index 9965b47..ba60d0a 100755 --- a/rag2_0/demo/validate_excel_data_batch.py +++ b/rag2_0/demo/validate_excel_data_batch.py @@ -555,8 +555,8 @@ def main(): parser.add_argument("--input", "-i", type=str, help="输入Excel文件路径", default=input_excel) parser.add_argument("--output", "-o", type=str, help="输出结果Excel文件路径", default=output_excel) parser.add_argument("--workers", "-w", type=int, default=20, help="并行工作线程数") - logging.info(f"输入文件路径: {args.input}, 输出文件路径: {args.output}, 并行工作线程数: {args.workers}") args = parser.parse_args() + logging.info(f"输入文件路径: {args.input}, 输出文件路径: {args.output}, 并行工作线程数: {args.workers}") is_debug = hasattr(sys, 'gettrace') and sys.gettrace() is not None # 创建验证器实例并执行验证 diff --git a/rag2_0/intent_recognition/Multi_PromptTemplates.py b/rag2_0/intent_recognition/Multi_PromptTemplates.py index a8ce21b..5a31a53 100755 --- a/rag2_0/intent_recognition/Multi_PromptTemplates.py +++ b/rag2_0/intent_recognition/Multi_PromptTemplates.py @@ -10,28 +10,32 @@ Description: 多轮对话下意图分类、改写核心提示词 query_rewrite_prompt_pro=""" # 电力造价问答优化工程师(精简版) **角色**:基于历史对话和术语库重构问题,提升知识库检索准确率。 -最高准则:保持问题核心意图,但允许在指代消除、背景继承下添加隐含功能词。但重构后的问题,所有引入的主体背景等均要来源于历史对话、聊天背景或术语库,不得凭空捏造未提及的内容。 +**最高准则**: +1、保持问题核心意图,但允许在指代消除、背景继承下添加隐含功能词。 +2、重构后的问题,所有引入的主体背景等均要来源于历史对话、聊天背景,不得凭空捏造未提及的内容。 +3、同义词替换:必须是提问中出现了synonymous中的内容,才替换为对应的标准词。不得改变原始意图,否则将导致系统出现灾难性问题 ## 核心原则 1. **指代消除 → 当指示代词("那"/"这")出现时,强制继承历史对话的最新核心主题(如功能或任务),并应用到当前主体。** 2. 背景继承 → 补充历史对话和聊天背景中的隐含信息(包括主题和功能)。 -4. 术语规范 → 同义词转标准词并【】标记。提问中的同义词(synonymous)替换为标准词(name) -5. 语义保真 → 保持问题核心意图,但允许在指代消除、背景继承下添加隐含功能词。 +3. 术语规范 → 同义词转标准词并【】标记。提问中出现的同义词(synonymous)替换为标准词(name) +4. 语义保真 → 保持问题核心意图,但允许在指代消除、背景继承下添加隐含功能词。 ## 处理流程 ### 一、输入解析 - 原始问题(需保留核心语义): - - {query} - - - 术语库集合: + {query} + + - 术语库集合(用于同义词转标准词环节): {keywords} + - 历史对话记录: {chat_history} + - 当前聊天背景: {context} @@ -56,8 +60,8 @@ graph TD 1. **指代消除 → 当指示代词出现时,优先继承历史对话的核心主题(如功能词),并替换当前问题的动词部分。** 2. 背景继承 → 历史对话中确定的背景信息需要保留。 3. 术语处理 → 同义词转标准词 + 【】标记。 -4. 同义词转标准词 → 将提问中的同义词(synonymous)替换为标准词(name) -4. 结构优化 → 保持原问题的5W2H特征,指代消除、背景继承下允许微调意图。 +4. 同义词转标准词 → 将提问中出现的同义词(synonymous)替换为对应标准词(name) +5. 结构优化 → 保持原问题的5W2H特征,指代消除、背景继承下允许微调意图。 ## 输出规范 {output_format} diff --git a/rag2_0/tool/APIKeyManager.py b/rag2_0/tool/APIKeyManager.py index 3de3979..cc8c0e0 100755 --- a/rag2_0/tool/APIKeyManager.py +++ b/rag2_0/tool/APIKeyManager.py @@ -92,7 +92,7 @@ class APIKeyManager: "Content-Type": "application/json" } data = { - "model": "deepseek-ai/DeepSeek-V3", + "model": "Qwen/Qwen2.5-7B-Instruct", "messages": [ {"role": "user", "content": "ping"} ], @@ -275,7 +275,7 @@ if __name__ == "__main__": stats = instance.get_usage_stats() all_balance=0.0 - buy_balance=14 * 10 * 14 # 购买18次,一次10条api_key,每个api_key有14元 + buy_balance=17 * 10 * 14 # 购买18次,一次10条api_key,每个api_key有14元 invalid_api_keys = [] for key, data in stats.items(): usage_stats = APIKeyManager.get_key_usage_stats(key) @@ -295,4 +295,6 @@ if __name__ == "__main__": print(f"开始移除无效的API密钥,并重新保存") APIKeyManager.remove_invalid_api_keys(invalid_api_keys) APIKeyManager.save_api_keys() - print(f"移除无效的API密钥,并重新保存完成") \ No newline at end of file + print(f"移除无效的API密钥,并重新保存完成") + import datetime + print(f"当前时间:{datetime.datetime.now()}") \ No newline at end of file