更新API密钥,优化意图识别示例,调整文档相关性判断逻辑,增强Excel数据验证功能,改进日志记录,
This commit is contained in:
@@ -16,7 +16,7 @@ from tqdm import tqdm
|
||||
import time
|
||||
import sys
|
||||
import argparse
|
||||
from typing import List, Dict, Any, Optional
|
||||
from typing import List, Dict, Any
|
||||
from langchain.output_parsers import PydanticOutputParser
|
||||
from pydantic import BaseModel, Field
|
||||
sys.path.append(os.getcwd())
|
||||
@@ -28,7 +28,7 @@ from rag2_0.tool.ModelTool import OpenAiLLM
|
||||
load_dotenv()
|
||||
|
||||
# 示例查询
|
||||
examples_query = """ PE2211PK0801是什么软件"""
|
||||
examples_query = """T1软件中,配件和材料有什么区别"""
|
||||
conversation_context=""
|
||||
chat_history=[
|
||||
{
|
||||
@@ -102,41 +102,30 @@ class QueryRewriteProcessor:
|
||||
|
||||
doc_text_list = json.dumps(retrieved_doc, ensure_ascii=False, indent=2)
|
||||
class TempModel(BaseModel):
|
||||
can_solve_problem: bool = Field(description="是否能解决用户问题")
|
||||
relevance_score: int = Field(description="相关性评分,0-100分")
|
||||
can_solve_problem: bool = Field(description="是否能解答用户问题")
|
||||
relevance_score: int = Field(description="置信度评分,0-100分")
|
||||
explanation: str = Field(description="解释文档是否能解决(回答)提问")
|
||||
|
||||
class all_relevant_document(BaseModel):
|
||||
most_relevant_document: list[TempModel] = Field(description="最相关的文档的判断结果")
|
||||
document_list: list[TempModel] = Field(description="每个文档的判断结果")
|
||||
|
||||
parser = PydanticOutputParser(pydantic_object=all_relevant_document)
|
||||
# 构建提示词
|
||||
prompt = f"""请判断以下检索文档列表中是否与用户提问相关,能够解决用户的问题,并给出相关性评分(0-100分)。输出最相关的文档的判断结果。
|
||||
prompt = f"""请判断以下检索文档列表中是否解答用户提问,能够解决用户的问题,能够基于检索文档给出回答,并给出置信度评分(0-100分)。输出每个文档的判断结果。
|
||||
用户提问: {query}
|
||||
|
||||
用户提问: {query}
|
||||
检索文档列表:
|
||||
{doc_text_list}
|
||||
|
||||
检索文档列表:
|
||||
{doc_text_list}
|
||||
|
||||
请按照以下JSON格式返回结果:
|
||||
json```
|
||||
{{
|
||||
"most_relevant_document":[{{
|
||||
"can_solve_problem": true,
|
||||
"relevance_score": 60,
|
||||
"explanation":"xxxx"
|
||||
}}]
|
||||
}}
|
||||
```
|
||||
|
||||
"""
|
||||
|
||||
请按照以下JSON格式返回结果:
|
||||
{parser.get_format_instructions()}
|
||||
"""
|
||||
try:
|
||||
# 初始化LLM并调用
|
||||
llm = OpenAiLLM(api_key=self.api_key, base_url=self.base_url, model="deepseek-ai/DeepSeek-R1", response_format={"type": "json_object"})
|
||||
llm = OpenAiLLM(api_key=self.api_key, base_url=self.base_url, model="deepseek-ai/DeepSeek-R1")
|
||||
response = llm.invoke(prompt)
|
||||
|
||||
result_list = parser.parse(response.content).most_relevant_document
|
||||
result_list = parser.parse(response.content).document_list
|
||||
|
||||
# 如果列表为空,返回默认的不相关结果
|
||||
if not result_list:
|
||||
@@ -145,9 +134,11 @@ json```
|
||||
"explanation": "无法解析文档相关性结果",
|
||||
"relevance_score": 0.0
|
||||
}
|
||||
|
||||
true_document_list=[cur for cur in result_list if cur.can_solve_problem]
|
||||
if len(true_document_list)==0:
|
||||
true_document_list = result_list
|
||||
# 找出分数最高的文档
|
||||
max_score_doc = max(result_list, key=lambda x: x.relevance_score)
|
||||
max_score_doc = max(true_document_list, key=lambda x: x.relevance_score)
|
||||
|
||||
return {
|
||||
"is_relevant": max_score_doc.can_solve_problem,
|
||||
@@ -155,12 +146,7 @@ json```
|
||||
"explanation": max_score_doc.explanation
|
||||
}
|
||||
except Exception as e:
|
||||
logging.error(f"判断文档相关性时出错: {str(e)}", exc_info=True)
|
||||
return {
|
||||
"is_relevant": False,
|
||||
"explanation": f"判断过程出错: {str(e)}",
|
||||
"relevance_score": 0.0
|
||||
}
|
||||
raise e
|
||||
|
||||
def load_questions_from_excel(self, file_path=None):
|
||||
"""
|
||||
@@ -254,7 +240,7 @@ json```
|
||||
"槽位信息": slot_filling_str,
|
||||
"检索的文档": "\n".join(retrieved_doc_titles),
|
||||
"检索的内容": json.dumps(retrieved_doc, ensure_ascii=False, indent=2) if retrieved_doc else "",
|
||||
"文档是否相关": "相关" if relevance_result["is_relevant"] else "不相关",
|
||||
"文档能否解决问题": "能" if relevance_result["is_relevant"] else "不能",
|
||||
"文档相关性解释": relevance_result["explanation"]
|
||||
}
|
||||
except Exception as e:
|
||||
|
||||
Reference in New Issue
Block a user