更新API密钥,优化意图识别示例,调整文档相关性判断逻辑,增强Excel数据验证功能,改进日志记录,
This commit is contained in:
+30
-7
@@ -1,10 +1,3 @@
|
|||||||
sk-poszkbjdmamimconjustnrxxqusuzlryxkrzkpronlenrmen
|
|
||||||
sk-zolvcegarsrwqhwgvwzgtqupodsdmckjiocyvoyldbkusbzc
|
|
||||||
sk-ywfafulcniaqdgdcsnbtqquaqeuiqlkcnknkaflwxyuemcow
|
|
||||||
sk-gzdqfoyvulrqscdpjlwlufdecrsyjpmwpkknuhnjsvtyftox
|
|
||||||
sk-bkcufidsebujopqqwexwxwpmevrpelmvxzdymncvllcyojce
|
|
||||||
sk-olabhscekudzkyudypkcjvehwqunagubwdmtppugrjmcptwv
|
|
||||||
sk-zpdqyocliebhqpkuwvebpgcnfjdkvavdltimllmgkthwnwph
|
|
||||||
sk-uollmeyatyiwfzszvxkpyndmzfrbqjpyixewmrastbmaqbhy
|
sk-uollmeyatyiwfzszvxkpyndmzfrbqjpyixewmrastbmaqbhy
|
||||||
sk-xdlsjytiwilvodadkjxvwdgulhhdytkqvfpyrcnllclgzqkb
|
sk-xdlsjytiwilvodadkjxvwdgulhhdytkqvfpyrcnllclgzqkb
|
||||||
sk-ffkltifkylutornjhwmnmfjsqsywrjibvujhjtjctzgnkvlp
|
sk-ffkltifkylutornjhwmnmfjsqsywrjibvujhjtjctzgnkvlp
|
||||||
@@ -55,3 +48,33 @@ sk-jdijeubeygjmqtxwryrbwmrpvqawinzwpcxodpolhcupzmpa
|
|||||||
sk-xbloemctsowwicjvrtrrewreosnfojoijtygsfxfnjntridv
|
sk-xbloemctsowwicjvrtrrewreosnfojoijtygsfxfnjntridv
|
||||||
sk-isovavcefvkzlbjewnumeqqevmnoucojsxwskkitfktkemtq
|
sk-isovavcefvkzlbjewnumeqqevmnoucojsxwskkitfktkemtq
|
||||||
sk-vxrlvvdzgythgyycuqehdloubxcdwhgojpowgxvgxsstjtvk
|
sk-vxrlvvdzgythgyycuqehdloubxcdwhgojpowgxvgxsstjtvk
|
||||||
|
sk-krgctzbdqekohpowmvftsjswgpxnwxadezeosdspelmtmukx
|
||||||
|
sk-slcgfmphmbqwuvshoaygfkfaxpzcabtlpkhvfqjodajuynsl
|
||||||
|
sk-qwcrwwxsdmiirrzvwfijgxqupdutypjfldtvikdwkqgwfucs
|
||||||
|
sk-nyynzajaubwtezsznzcfzzsevmfgpyjrsstckxeufrvzwwej
|
||||||
|
sk-ksyqqkwbcecqgforztombghpeknrlqdkegtzeezsnhtcpchy
|
||||||
|
sk-inhqizoumyusllkpovvokdfwvyavcpgpjtxcwrbcftquiqpv
|
||||||
|
sk-peqxiflijgltfxbyxyfquvuzrfcwlkauvjfkbexgndtwoyqf
|
||||||
|
sk-ohukjjrxmqmlrdmvyuudkpysblupmfjojnuyzwjkknvnjagg
|
||||||
|
sk-nmvrynrwqtvrnbdgaeexxrsskjvmsffjbbvikpsmngypwuwn
|
||||||
|
sk-ruecsoljheouotepobjeeenminndcwwyjdoquqrcxfirmxmt
|
||||||
|
sk-mtfoqujosppodwgdcbwyglsfylkhtydoyzfnzxfomndpcuyf
|
||||||
|
sk-uicolvfebfhklerkcfcgymcasqafamthlemhaqqnvqugorfg
|
||||||
|
sk-zxvqmszvbktxjsgbtuafrxkdebmdjhfijeohhepkguatgpos
|
||||||
|
sk-vmuaaesfvsvrljroauzysfqydsksklrrrenzhrtxvvqwcewl
|
||||||
|
sk-ewjtqzbiqmlihkpjqkppwackeswuvbqyzsheaversvsdqisz
|
||||||
|
sk-zmebjdwdbpyxtribyuusdgaojlvnwqnjpxdcawibmtgsnlrp
|
||||||
|
sk-vrijdtzxzroipovgowdqrahhiicptgwvdhkmmrcmubuukxca
|
||||||
|
sk-sqaflqxtnyliiyrkcxxotgksfettijawpkhvfqnaavqtjvrg
|
||||||
|
sk-koawwrtemsnjvyakmhrykdindvbxjbxuyfqunjqsoymlrsrr
|
||||||
|
sk-izjhcaimcsrsgytxvlaanrfxzmhpqiclbokhmhnzkrdicknv
|
||||||
|
sk-tfcrtsrzqeftrdaebdhmfzkwkchqjltkcutqoeeclmnoeemr
|
||||||
|
sk-jfqkxsfmnyynybqvzkkwmzwxcyjebgdeucdmodunitjgydhv
|
||||||
|
sk-rutrnfpicpzxnqloqgxgenevcooqyxibbdguvywuqcbpwyjt
|
||||||
|
sk-jrqvdlkrkwzdfiuvqlmgncblfaihwkhgshukwkxatsrclsfe
|
||||||
|
sk-rruiajpnseboawytxmvvughdqcrkqlqsjlrcfopwztljfiox
|
||||||
|
sk-neiwqzlwfxxdrjvictvlbvpkbbpbmyiooddevhnqkerzugpy
|
||||||
|
sk-zfqdpybvyeutrdwenvfbsehfebkaekoytpqcltulseavtntb
|
||||||
|
sk-sbcjflkkwscfxzrplbexxifgqtrotnaxtvuoqfrtfyrvhnhr
|
||||||
|
sk-jpkxknfffbucdhnqahowbpcwdhbrjaqfvrbgnekdyxiflqlu
|
||||||
|
sk-ylyjcnumxpwxolrwjpzvomlnmezwgxagobztqbjdylohxsvb
|
||||||
@@ -16,7 +16,7 @@ from tqdm import tqdm
|
|||||||
import time
|
import time
|
||||||
import sys
|
import sys
|
||||||
import argparse
|
import argparse
|
||||||
from typing import List, Dict, Any, Optional
|
from typing import List, Dict, Any
|
||||||
from langchain.output_parsers import PydanticOutputParser
|
from langchain.output_parsers import PydanticOutputParser
|
||||||
from pydantic import BaseModel, Field
|
from pydantic import BaseModel, Field
|
||||||
sys.path.append(os.getcwd())
|
sys.path.append(os.getcwd())
|
||||||
@@ -28,7 +28,7 @@ from rag2_0.tool.ModelTool import OpenAiLLM
|
|||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# 示例查询
|
# 示例查询
|
||||||
examples_query = """ PE2211PK0801是什么软件"""
|
examples_query = """T1软件中,配件和材料有什么区别"""
|
||||||
conversation_context=""
|
conversation_context=""
|
||||||
chat_history=[
|
chat_history=[
|
||||||
{
|
{
|
||||||
@@ -102,41 +102,30 @@ class QueryRewriteProcessor:
|
|||||||
|
|
||||||
doc_text_list = json.dumps(retrieved_doc, ensure_ascii=False, indent=2)
|
doc_text_list = json.dumps(retrieved_doc, ensure_ascii=False, indent=2)
|
||||||
class TempModel(BaseModel):
|
class TempModel(BaseModel):
|
||||||
can_solve_problem: bool = Field(description="是否能解决用户问题")
|
can_solve_problem: bool = Field(description="是否能解答用户问题")
|
||||||
relevance_score: int = Field(description="相关性评分,0-100分")
|
relevance_score: int = Field(description="置信度评分,0-100分")
|
||||||
explanation: str = Field(description="解释文档是否能解决(回答)提问")
|
explanation: str = Field(description="解释文档是否能解决(回答)提问")
|
||||||
|
|
||||||
class all_relevant_document(BaseModel):
|
class all_relevant_document(BaseModel):
|
||||||
most_relevant_document: list[TempModel] = Field(description="最相关的文档的判断结果")
|
document_list: list[TempModel] = Field(description="每个文档的判断结果")
|
||||||
|
|
||||||
parser = PydanticOutputParser(pydantic_object=all_relevant_document)
|
parser = PydanticOutputParser(pydantic_object=all_relevant_document)
|
||||||
# 构建提示词
|
# 构建提示词
|
||||||
prompt = f"""请判断以下检索文档列表中是否与用户提问相关,能够解决用户的问题,并给出相关性评分(0-100分)。输出最相关的文档的判断结果。
|
prompt = f"""请判断以下检索文档列表中是否解答用户提问,能够解决用户的问题,能够基于检索文档给出回答,并给出置信度评分(0-100分)。输出每个文档的判断结果。
|
||||||
|
用户提问: {query}
|
||||||
|
|
||||||
用户提问: {query}
|
检索文档列表:
|
||||||
|
{doc_text_list}
|
||||||
检索文档列表:
|
|
||||||
{doc_text_list}
|
|
||||||
|
|
||||||
请按照以下JSON格式返回结果:
|
|
||||||
json```
|
|
||||||
{{
|
|
||||||
"most_relevant_document":[{{
|
|
||||||
"can_solve_problem": true,
|
|
||||||
"relevance_score": 60,
|
|
||||||
"explanation":"xxxx"
|
|
||||||
}}]
|
|
||||||
}}
|
|
||||||
```
|
|
||||||
|
|
||||||
"""
|
|
||||||
|
|
||||||
|
请按照以下JSON格式返回结果:
|
||||||
|
{parser.get_format_instructions()}
|
||||||
|
"""
|
||||||
try:
|
try:
|
||||||
# 初始化LLM并调用
|
# 初始化LLM并调用
|
||||||
llm = OpenAiLLM(api_key=self.api_key, base_url=self.base_url, model="deepseek-ai/DeepSeek-R1", response_format={"type": "json_object"})
|
llm = OpenAiLLM(api_key=self.api_key, base_url=self.base_url, model="deepseek-ai/DeepSeek-R1")
|
||||||
response = llm.invoke(prompt)
|
response = llm.invoke(prompt)
|
||||||
|
|
||||||
result_list = parser.parse(response.content).most_relevant_document
|
result_list = parser.parse(response.content).document_list
|
||||||
|
|
||||||
# 如果列表为空,返回默认的不相关结果
|
# 如果列表为空,返回默认的不相关结果
|
||||||
if not result_list:
|
if not result_list:
|
||||||
@@ -145,9 +134,11 @@ json```
|
|||||||
"explanation": "无法解析文档相关性结果",
|
"explanation": "无法解析文档相关性结果",
|
||||||
"relevance_score": 0.0
|
"relevance_score": 0.0
|
||||||
}
|
}
|
||||||
|
true_document_list=[cur for cur in result_list if cur.can_solve_problem]
|
||||||
|
if len(true_document_list)==0:
|
||||||
|
true_document_list = result_list
|
||||||
# 找出分数最高的文档
|
# 找出分数最高的文档
|
||||||
max_score_doc = max(result_list, key=lambda x: x.relevance_score)
|
max_score_doc = max(true_document_list, key=lambda x: x.relevance_score)
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"is_relevant": max_score_doc.can_solve_problem,
|
"is_relevant": max_score_doc.can_solve_problem,
|
||||||
@@ -155,12 +146,7 @@ json```
|
|||||||
"explanation": max_score_doc.explanation
|
"explanation": max_score_doc.explanation
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"判断文档相关性时出错: {str(e)}", exc_info=True)
|
raise e
|
||||||
return {
|
|
||||||
"is_relevant": False,
|
|
||||||
"explanation": f"判断过程出错: {str(e)}",
|
|
||||||
"relevance_score": 0.0
|
|
||||||
}
|
|
||||||
|
|
||||||
def load_questions_from_excel(self, file_path=None):
|
def load_questions_from_excel(self, file_path=None):
|
||||||
"""
|
"""
|
||||||
@@ -254,7 +240,7 @@ json```
|
|||||||
"槽位信息": slot_filling_str,
|
"槽位信息": slot_filling_str,
|
||||||
"检索的文档": "\n".join(retrieved_doc_titles),
|
"检索的文档": "\n".join(retrieved_doc_titles),
|
||||||
"检索的内容": json.dumps(retrieved_doc, ensure_ascii=False, indent=2) if retrieved_doc else "",
|
"检索的内容": json.dumps(retrieved_doc, ensure_ascii=False, indent=2) if retrieved_doc else "",
|
||||||
"文档是否相关": "相关" if relevance_result["is_relevant"] else "不相关",
|
"文档能否解决问题": "能" if relevance_result["is_relevant"] else "不能",
|
||||||
"文档相关性解释": relevance_result["explanation"]
|
"文档相关性解释": relevance_result["explanation"]
|
||||||
}
|
}
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -555,8 +555,8 @@ def main():
|
|||||||
parser.add_argument("--input", "-i", type=str, help="输入Excel文件路径", default=input_excel)
|
parser.add_argument("--input", "-i", type=str, help="输入Excel文件路径", default=input_excel)
|
||||||
parser.add_argument("--output", "-o", type=str, help="输出结果Excel文件路径", default=output_excel)
|
parser.add_argument("--output", "-o", type=str, help="输出结果Excel文件路径", default=output_excel)
|
||||||
parser.add_argument("--workers", "-w", type=int, default=20, help="并行工作线程数")
|
parser.add_argument("--workers", "-w", type=int, default=20, help="并行工作线程数")
|
||||||
logging.info(f"输入文件路径: {args.input}, 输出文件路径: {args.output}, 并行工作线程数: {args.workers}")
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
logging.info(f"输入文件路径: {args.input}, 输出文件路径: {args.output}, 并行工作线程数: {args.workers}")
|
||||||
is_debug = hasattr(sys, 'gettrace') and sys.gettrace() is not None
|
is_debug = hasattr(sys, 'gettrace') and sys.gettrace() is not None
|
||||||
|
|
||||||
# 创建验证器实例并执行验证
|
# 创建验证器实例并执行验证
|
||||||
|
|||||||
@@ -10,28 +10,32 @@ Description: 多轮对话下意图分类、改写核心提示词
|
|||||||
query_rewrite_prompt_pro="""
|
query_rewrite_prompt_pro="""
|
||||||
# 电力造价问答优化工程师(精简版)
|
# 电力造价问答优化工程师(精简版)
|
||||||
**角色**:基于历史对话和术语库重构问题,提升知识库检索准确率。
|
**角色**:基于历史对话和术语库重构问题,提升知识库检索准确率。
|
||||||
最高准则:保持问题核心意图,但允许在指代消除、背景继承下添加隐含功能词。但重构后的问题,所有引入的主体背景等均要来源于历史对话、聊天背景或术语库,不得凭空捏造未提及的内容。
|
**最高准则**:
|
||||||
|
1、保持问题核心意图,但允许在指代消除、背景继承下添加隐含功能词。
|
||||||
|
2、重构后的问题,所有引入的主体背景等均要来源于历史对话、聊天背景,不得凭空捏造未提及的内容。
|
||||||
|
3、同义词替换:必须是提问中出现了synonymous中的内容,才替换为对应的标准词。不得改变原始意图,否则将导致系统出现灾难性问题
|
||||||
|
|
||||||
## 核心原则
|
## 核心原则
|
||||||
1. **指代消除 → 当指示代词("那"/"这")出现时,强制继承历史对话的最新核心主题(如功能或任务),并应用到当前主体。**
|
1. **指代消除 → 当指示代词("那"/"这")出现时,强制继承历史对话的最新核心主题(如功能或任务),并应用到当前主体。**
|
||||||
2. 背景继承 → 补充历史对话和聊天背景中的隐含信息(包括主题和功能)。
|
2. 背景继承 → 补充历史对话和聊天背景中的隐含信息(包括主题和功能)。
|
||||||
4. 术语规范 → 同义词转标准词并【】标记。提问中的同义词(synonymous)替换为标准词(name)
|
3. 术语规范 → 同义词转标准词并【】标记。提问中出现的同义词(synonymous)替换为标准词(name)
|
||||||
5. 语义保真 → 保持问题核心意图,但允许在指代消除、背景继承下添加隐含功能词。
|
4. 语义保真 → 保持问题核心意图,但允许在指代消除、背景继承下添加隐含功能词。
|
||||||
|
|
||||||
## 处理流程
|
## 处理流程
|
||||||
### 一、输入解析
|
### 一、输入解析
|
||||||
- 原始问题(需保留核心语义):
|
- 原始问题(需保留核心语义):
|
||||||
<query>
|
<query> {query} </query>
|
||||||
{query}
|
|
||||||
</query>
|
- 术语库集合(用于同义词转标准词环节):
|
||||||
- 术语库集合:
|
|
||||||
<keywords>
|
<keywords>
|
||||||
{keywords}
|
{keywords}
|
||||||
</keywords>
|
</keywords>
|
||||||
|
|
||||||
- 历史对话记录:
|
- 历史对话记录:
|
||||||
<history>
|
<history>
|
||||||
{chat_history}
|
{chat_history}
|
||||||
</history>
|
</history>
|
||||||
|
|
||||||
- 当前聊天背景:
|
- 当前聊天背景:
|
||||||
<conversation_background>
|
<conversation_background>
|
||||||
{context}
|
{context}
|
||||||
@@ -56,8 +60,8 @@ graph TD
|
|||||||
1. **指代消除 → 当指示代词出现时,优先继承历史对话的核心主题(如功能词),并替换当前问题的动词部分。**
|
1. **指代消除 → 当指示代词出现时,优先继承历史对话的核心主题(如功能词),并替换当前问题的动词部分。**
|
||||||
2. 背景继承 → 历史对话中确定的背景信息需要保留。
|
2. 背景继承 → 历史对话中确定的背景信息需要保留。
|
||||||
3. 术语处理 → 同义词转标准词 + 【】标记。
|
3. 术语处理 → 同义词转标准词 + 【】标记。
|
||||||
4. 同义词转标准词 → 将提问中的同义词(synonymous)替换为标准词(name)
|
4. 同义词转标准词 → 将提问中出现的同义词(synonymous)替换为对应标准词(name)
|
||||||
4. 结构优化 → 保持原问题的5W2H特征,指代消除、背景继承下允许微调意图。
|
5. 结构优化 → 保持原问题的5W2H特征,指代消除、背景继承下允许微调意图。
|
||||||
|
|
||||||
## 输出规范
|
## 输出规范
|
||||||
{output_format}
|
{output_format}
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ class APIKeyManager:
|
|||||||
"Content-Type": "application/json"
|
"Content-Type": "application/json"
|
||||||
}
|
}
|
||||||
data = {
|
data = {
|
||||||
"model": "deepseek-ai/DeepSeek-V3",
|
"model": "Qwen/Qwen2.5-7B-Instruct",
|
||||||
"messages": [
|
"messages": [
|
||||||
{"role": "user", "content": "ping"}
|
{"role": "user", "content": "ping"}
|
||||||
],
|
],
|
||||||
@@ -275,7 +275,7 @@ if __name__ == "__main__":
|
|||||||
|
|
||||||
stats = instance.get_usage_stats()
|
stats = instance.get_usage_stats()
|
||||||
all_balance=0.0
|
all_balance=0.0
|
||||||
buy_balance=14 * 10 * 14 # 购买18次,一次10条api_key,每个api_key有14元
|
buy_balance=17 * 10 * 14 # 购买18次,一次10条api_key,每个api_key有14元
|
||||||
invalid_api_keys = []
|
invalid_api_keys = []
|
||||||
for key, data in stats.items():
|
for key, data in stats.items():
|
||||||
usage_stats = APIKeyManager.get_key_usage_stats(key)
|
usage_stats = APIKeyManager.get_key_usage_stats(key)
|
||||||
@@ -296,3 +296,5 @@ if __name__ == "__main__":
|
|||||||
APIKeyManager.remove_invalid_api_keys(invalid_api_keys)
|
APIKeyManager.remove_invalid_api_keys(invalid_api_keys)
|
||||||
APIKeyManager.save_api_keys()
|
APIKeyManager.save_api_keys()
|
||||||
print(f"移除无效的API密钥,并重新保存完成")
|
print(f"移除无效的API密钥,并重新保存完成")
|
||||||
|
import datetime
|
||||||
|
print(f"当前时间:{datetime.datetime.now()}")
|
||||||
Reference in New Issue
Block a user