更新.gitignore以忽略查询日志文件,优化意图识别逻辑,新增锁相关咨询处理方法,并调整后退提示的返回结构以支持固定话术类问题。
This commit is contained in:
@@ -10,3 +10,4 @@ data/excel/*.xlsx
|
|||||||
rag2_0/demo/ProfessionalTermAnalyzer.py
|
rag2_0/demo/ProfessionalTermAnalyzer.py
|
||||||
data/logs/*
|
data/logs/*
|
||||||
rag2_0/dify/Test.py
|
rag2_0/dify/Test.py
|
||||||
|
data/query_logs/*
|
||||||
|
|||||||
@@ -200,7 +200,8 @@ class QueryRewriteProcessor:
|
|||||||
conversation_context=conversation_context,
|
conversation_context=conversation_context,
|
||||||
chat_history=chat_history,
|
chat_history=chat_history,
|
||||||
previous_slots=previous_slots,
|
previous_slots=previous_slots,
|
||||||
enable_query_expansion=True))
|
enable_query_expansion=True,
|
||||||
|
use_jieba=True))
|
||||||
|
|
||||||
# 提取分类信息
|
# 提取分类信息
|
||||||
classification = result["classification"]
|
classification = result["classification"]
|
||||||
@@ -440,7 +441,7 @@ def main():
|
|||||||
for idx, query in enumerate(examples):
|
for idx, query in enumerate(examples):
|
||||||
if query.strip() == "":
|
if query.strip() == "":
|
||||||
continue
|
continue
|
||||||
query="储能C1软件如何新建工程?"
|
query="811619150828能看一下这个锁是16的马"
|
||||||
conversation_context="当前使用软件:配网计价通D3软件"
|
conversation_context="当前使用软件:配网计价通D3软件"
|
||||||
# 在调试模式下使用完整的参数
|
# 在调试模式下使用完整的参数
|
||||||
print(json.dumps(processor.process_query(
|
print(json.dumps(processor.process_query(
|
||||||
|
|||||||
@@ -51,7 +51,7 @@ class QueryExpandResponse(BaseModel):
|
|||||||
all: List[str] = Field(default_factory=list)
|
all: List[str] = Field(default_factory=list)
|
||||||
step_back: Dict[str, Any] = Field(default_factory=Dict)
|
step_back: Dict[str, Any] = Field(default_factory=Dict)
|
||||||
follow_up: Dict[str, Any] = Field(default_factory=Dict)
|
follow_up: Dict[str, Any] = Field(default_factory=Dict)
|
||||||
hyde: Dict[str, Any] = Field(default_factory=Dict)
|
# hyde: Dict[str, Any] = Field(default_factory=Dict)
|
||||||
multi_questions: Dict[str, Any] = Field(default_factory=Dict)
|
multi_questions: Dict[str, Any] = Field(default_factory=Dict)
|
||||||
|
|
||||||
# 定义响应模型
|
# 定义响应模型
|
||||||
@@ -150,7 +150,7 @@ async def intent_recognize(request: IntentRecognizeRequest):
|
|||||||
all=result["query_expand"]["all"],
|
all=result["query_expand"]["all"],
|
||||||
step_back=result["query_expand"]["step_back"],
|
step_back=result["query_expand"]["step_back"],
|
||||||
follow_up=result["query_expand"]["follow_up"],
|
follow_up=result["query_expand"]["follow_up"],
|
||||||
hyde=result["query_expand"]["hyde"],
|
# hyde=result["query_expand"]["hyde"],
|
||||||
multi_questions=result["query_expand"]["multi_questions"]
|
multi_questions=result["query_expand"]["multi_questions"]
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -378,6 +378,18 @@ class AsyncIntentRecognizer:
|
|||||||
|
|
||||||
return bool(matched_suffixes), matched_suffixes
|
return bool(matched_suffixes), matched_suffixes
|
||||||
|
|
||||||
|
def _process_lock_related_query(self, query: str) -> str:
|
||||||
|
"""
|
||||||
|
特殊处理锁相关咨询
|
||||||
|
"""
|
||||||
|
pattern = r'(?<!\d)(?:8116(?:\s*\d){8}|\d{2}\s*-\s*\d{6})(?!\d)'
|
||||||
|
matches = re.findall(pattern, query)
|
||||||
|
if not matches:
|
||||||
|
return query
|
||||||
|
lock_number = "、".join(matches)
|
||||||
|
return f"通过博微软件助手查询软件锁信息,锁注册号为{lock_number}"
|
||||||
|
|
||||||
|
|
||||||
async def process_query_async(self, query: str, conversation_context: str = "",
|
async def process_query_async(self, query: str, conversation_context: str = "",
|
||||||
chat_history: List[Dict[str, str]] = None,
|
chat_history: List[Dict[str, str]] = None,
|
||||||
previous_slots: Dict[str, Any] = None,
|
previous_slots: Dict[str, Any] = None,
|
||||||
@@ -414,7 +426,7 @@ class AsyncIntentRecognizer:
|
|||||||
asyncio.create_task(self._generate_follow_up_questions_async(query, chat_history, conversation_context)),
|
asyncio.create_task(self._generate_follow_up_questions_async(query, chat_history, conversation_context)),
|
||||||
|
|
||||||
# 5.3: HyDE
|
# 5.3: HyDE
|
||||||
asyncio.create_task(self._generate_hypothetical_document_async(query, chat_history, conversation_context)),
|
# asyncio.create_task(self._generate_hypothetical_document_async(query, chat_history, conversation_context)),
|
||||||
|
|
||||||
# 5.4: 多问题查询
|
# 5.4: 多问题查询
|
||||||
asyncio.create_task(self._generate_multi_questions_async(query, chat_history, conversation_context))
|
asyncio.create_task(self._generate_multi_questions_async(query, chat_history, conversation_context))
|
||||||
@@ -442,6 +454,13 @@ class AsyncIntentRecognizer:
|
|||||||
classification_task = self._classify_intent_async(rewrite.rewrite, conversation_context, chat_history, previous_slots)
|
classification_task = self._classify_intent_async(rewrite.rewrite, conversation_context, chat_history, previous_slots)
|
||||||
classification = await classification_task
|
classification = await classification_task
|
||||||
|
|
||||||
|
# 特殊处理 锁相关咨询
|
||||||
|
if classification.vertical_classification == "安装下载注册" and classification.sub_classification == "软件锁类":
|
||||||
|
process_lock_start_time = time.time()
|
||||||
|
rewrite.rewrite = self._process_lock_related_query(rewrite.rewrite)
|
||||||
|
process_lock_end_time = time.time()
|
||||||
|
process_lock_time = process_lock_end_time - process_lock_start_time
|
||||||
|
logging.info(f"锁相关咨询正则匹配 - 总耗时: {process_lock_time:.2f}秒")
|
||||||
# 步骤4: 进行槽位填充
|
# 步骤4: 进行槽位填充
|
||||||
# 如果是有效分类,进行槽位填充
|
# 如果是有效分类,进行槽位填充
|
||||||
slot_filling_result = {}
|
slot_filling_result = {}
|
||||||
@@ -464,24 +483,24 @@ class AsyncIntentRecognizer:
|
|||||||
logging.info(f"异步问题扩展环节耗时统计 - 总耗时: {end_time - start_time:.2f}秒")
|
logging.info(f"异步问题扩展环节耗时统计 - 总耗时: {end_time - start_time:.2f}秒")
|
||||||
|
|
||||||
# 收集结果
|
# 收集结果
|
||||||
step_back_result = query_expand_results[0] if query_expand_results[0] else StepBackPrompt(original_query=query, step_back_query=query)
|
step_back_result = query_expand_results[0] if query_expand_results[0] else StepBackPrompt(original_query=query, can_use_back_prompt=False, step_back_query=[query])
|
||||||
follow_up_result = query_expand_results[1] if query_expand_results[1] else FollowUpQuestions(original_query=query, follow_up_query=query)
|
follow_up_result = query_expand_results[1] if query_expand_results[1] else FollowUpQuestions(original_query=query, follow_up_query=query)
|
||||||
hyde_result = query_expand_results[2] if query_expand_results[2] else HypotheticalDocument(original_query=query, hypothetical_answer="")
|
# hyde_result = query_expand_results[2] if query_expand_results[2] else HypotheticalDocument(original_query=query, hypothetical_answer="")
|
||||||
multi_questions_result = query_expand_results[3] if query_expand_results[3] else MultiQuestions(original_query=query, sub_questions=[query])
|
multi_questions_result = query_expand_results[2] if query_expand_results[2] else MultiQuestions(original_query=query, sub_questions=[query])
|
||||||
|
|
||||||
all_questions = multi_questions_result.sub_questions
|
all_questions = multi_questions_result.sub_questions
|
||||||
all_questions.append(query)
|
all_questions.append(query)
|
||||||
all_questions.append(rewrite.rewrite)
|
all_questions.append(rewrite.rewrite)
|
||||||
all_questions.extend(step_back_result.step_back_query)
|
all_questions.extend(step_back_result.step_back_query)
|
||||||
all_questions.append(follow_up_result.follow_up_query)
|
all_questions.append(follow_up_result.follow_up_query)
|
||||||
all_questions.append(hyde_result.hypothetical_answer)
|
# all_questions.append(hyde_result.hypothetical_answer)
|
||||||
all_questions = list(set(all_questions))
|
all_questions = list(set(all_questions))
|
||||||
|
|
||||||
query_expand = {
|
query_expand = {
|
||||||
"all": all_questions,
|
"all": all_questions,
|
||||||
"step_back": step_back_result.model_dump(),
|
"step_back": step_back_result.model_dump(),
|
||||||
"follow_up": follow_up_result.model_dump(),
|
"follow_up": follow_up_result.model_dump(),
|
||||||
"hyde": hyde_result.model_dump(),
|
# "hyde": hyde_result.model_dump(),
|
||||||
"multi_questions": multi_questions_result.model_dump()
|
"multi_questions": multi_questions_result.model_dump()
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -649,7 +668,7 @@ class AsyncIntentRecognizer:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
# 如果解析失败,返回原始查询作为后退提示
|
# 如果解析失败,返回原始查询作为后退提示
|
||||||
logging.error(f"异步后退提示生成失败: {e}", exc_info=True)
|
logging.error(f"异步后退提示生成失败: {e}", exc_info=True)
|
||||||
return StepBackPrompt(original_query=query, step_back_query=query)
|
return StepBackPrompt(original_query=query, can_use_back_prompt=False, step_back_query=[query])
|
||||||
|
|
||||||
async def _generate_follow_up_questions_async(self, query: str, chat_history: List[Dict[str, str]] = None, conversation_context: str = "") -> FollowUpQuestions:
|
async def _generate_follow_up_questions_async(self, query: str, chat_history: List[Dict[str, str]] = None, conversation_context: str = "") -> FollowUpQuestions:
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -31,7 +31,8 @@ classification_info="""【垂直领域分类】:
|
|||||||
1. 软件问题 -- 指涉及软件使用、功能询问、软件故障排查等方面的提问或请求。
|
1. 软件问题 -- 指涉及软件使用、功能询问、软件故障排查等方面的提问或请求。
|
||||||
2. 业务问题 -- 指涉及电力造价领域专业知识、造价费用计算等电力造价业务知识
|
2. 业务问题 -- 指涉及电力造价领域专业知识、造价费用计算等电力造价业务知识
|
||||||
3. 安装下载注册 -- 指涉及软件(或插件)安装下载、注册、激活等操作类问题。
|
3. 安装下载注册 -- 指涉及软件(或插件)安装下载、注册、激活等操作类问题。
|
||||||
4. 其他 -- 指与软件或电力造价专业无关的日常对话、问候、感慨、情绪表达等。
|
4. 固定话术类 -- 指涉及需要固定话术回答的问题,如:规费咨询
|
||||||
|
5. 其他 -- 指与软件或电力造价专业无关的日常对话、问候、感慨、情绪表达等。
|
||||||
|
|
||||||
【软件问题包括以下两类】:
|
【软件问题包括以下两类】:
|
||||||
1. 软件功能:询问软件功能的使用、功能操作(调整)、功能位置、如何设置、如何转换等
|
1. 软件功能:询问软件功能的使用、功能操作(调整)、功能位置、如何设置、如何转换等
|
||||||
@@ -49,11 +50,19 @@ classification_info="""【垂直领域分类】:
|
|||||||
"用哪个软件打开.BDY3文件?",
|
"用哪个软件打开.BDY3文件?",
|
||||||
"BDD3是什么"
|
"BDD3是什么"
|
||||||
2. 软件锁类:询问软件锁信息、锁注册号查询、许可证查询、锁激活问题等软件锁相关问题
|
2. 软件锁类:询问软件锁信息、锁注册号查询、许可证查询、锁激活问题等软件锁相关问题
|
||||||
|
形如: 8116 开头连续的12位数字 或者 形如 xx-xxxxxx 的数字格式 的内容为锁的注册号,提问出现对应内容时,归类为软件锁类
|
||||||
3. 安装下载类:安装下载咨询、组件(插件)选择、环境配置、安装包下载、政策文件(规范文件)下载等
|
3. 安装下载类:安装下载咨询、组件(插件)选择、环境配置、安装包下载、政策文件(规范文件)下载等
|
||||||
4. 问题排查类:软件安装下载失败、报错,系统兼容性问题等
|
4. 问题排查类:软件安装下载失败、报错,系统兼容性问题等
|
||||||
|
|
||||||
|
【固定话术类包括以下一类】:
|
||||||
|
1. 规费咨询:所有涉及规费咨询等问题(规费、社保费、公积金费等)
|
||||||
|
|
||||||
【其他】:
|
【其他】:
|
||||||
1. 其他"""
|
1. 其他
|
||||||
|
|
||||||
|
分类优先级:
|
||||||
|
固定话术类 > 软件问题 、 业务问题 、 安装下载注册 > 其他
|
||||||
|
"""
|
||||||
|
|
||||||
classification_prompt="""
|
classification_prompt="""
|
||||||
用户正在使用电力造价软件或想询问电力造价领域相关知识,你需要根据用户的输入内容集合历史对话(如果存在),将其归类为以下垂直领域之一:
|
用户正在使用电力造价软件或想询问电力造价领域相关知识,你需要根据用户的输入内容集合历史对话(如果存在),将其归类为以下垂直领域之一:
|
||||||
@@ -228,10 +237,21 @@ step_back_prompt = """
|
|||||||
|
|
||||||
## 示例
|
## 示例
|
||||||
原始问题: "配网D3软件2023版本如何在Windows 11系统上导入单位工程量清单?"
|
原始问题: "配网D3软件2023版本如何在Windows 11系统上导入单位工程量清单?"
|
||||||
后退问题: ["配网D3软件如何导入工程量清单?", "如何导入单位工程量清单?"]
|
后退问题:
|
||||||
|
{{
|
||||||
|
"original_query": "配网D3软件2023版本如何在Windows 11系统上导入单位工程量清单?",
|
||||||
|
"can_use_back_prompt": True,
|
||||||
|
"step_back_query": ["配网D3软件如何导入工程量清单?", "如何导入单位工程量清单?"]
|
||||||
|
}}
|
||||||
|
|
||||||
原始问题: "技改T1软件中的某个设备更换后,如何在系统中更新对应的定额?"
|
原始问题: "技改T1软件中的某个设备更换后,如何在系统中更新对应的定额?"
|
||||||
后退问题: ["技改T1软件中如何更新设备对应的定额?", "如何更新设备对应的定额?"]
|
后退问题:
|
||||||
|
{{
|
||||||
|
"original_query": "技改T1软件中的某个设备更换后,如何在系统中更新对应的定额?",
|
||||||
|
"can_use_back_prompt": True,
|
||||||
|
"step_back_query": ["技改T1软件中如何更新设备对应的定额?", "如何更新设备对应的定额?"]
|
||||||
|
}}
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
follow_up_questions_prompt = """
|
follow_up_questions_prompt = """
|
||||||
|
|||||||
Reference in New Issue
Block a user