更新.gitignore以忽略查询日志文件,优化意图识别逻辑,新增锁相关咨询处理方法,并调整后退提示的返回结构以支持固定话术类问题。
This commit is contained in:
@@ -10,3 +10,4 @@ data/excel/*.xlsx
|
||||
rag2_0/demo/ProfessionalTermAnalyzer.py
|
||||
data/logs/*
|
||||
rag2_0/dify/Test.py
|
||||
data/query_logs/*
|
||||
|
||||
@@ -200,7 +200,8 @@ class QueryRewriteProcessor:
|
||||
conversation_context=conversation_context,
|
||||
chat_history=chat_history,
|
||||
previous_slots=previous_slots,
|
||||
enable_query_expansion=True))
|
||||
enable_query_expansion=True,
|
||||
use_jieba=True))
|
||||
|
||||
# 提取分类信息
|
||||
classification = result["classification"]
|
||||
@@ -440,7 +441,7 @@ def main():
|
||||
for idx, query in enumerate(examples):
|
||||
if query.strip() == "":
|
||||
continue
|
||||
query="储能C1软件如何新建工程?"
|
||||
query="811619150828能看一下这个锁是16的马"
|
||||
conversation_context="当前使用软件:配网计价通D3软件"
|
||||
# 在调试模式下使用完整的参数
|
||||
print(json.dumps(processor.process_query(
|
||||
|
||||
@@ -51,7 +51,7 @@ class QueryExpandResponse(BaseModel):
|
||||
all: List[str] = Field(default_factory=list)
|
||||
step_back: Dict[str, Any] = Field(default_factory=Dict)
|
||||
follow_up: Dict[str, Any] = Field(default_factory=Dict)
|
||||
hyde: Dict[str, Any] = Field(default_factory=Dict)
|
||||
# hyde: Dict[str, Any] = Field(default_factory=Dict)
|
||||
multi_questions: Dict[str, Any] = Field(default_factory=Dict)
|
||||
|
||||
# 定义响应模型
|
||||
@@ -150,7 +150,7 @@ async def intent_recognize(request: IntentRecognizeRequest):
|
||||
all=result["query_expand"]["all"],
|
||||
step_back=result["query_expand"]["step_back"],
|
||||
follow_up=result["query_expand"]["follow_up"],
|
||||
hyde=result["query_expand"]["hyde"],
|
||||
# hyde=result["query_expand"]["hyde"],
|
||||
multi_questions=result["query_expand"]["multi_questions"]
|
||||
)
|
||||
)
|
||||
|
||||
@@ -378,6 +378,18 @@ class AsyncIntentRecognizer:
|
||||
|
||||
return bool(matched_suffixes), matched_suffixes
|
||||
|
||||
def _process_lock_related_query(self, query: str) -> str:
|
||||
"""
|
||||
特殊处理锁相关咨询
|
||||
"""
|
||||
pattern = r'(?<!\d)(?:8116(?:\s*\d){8}|\d{2}\s*-\s*\d{6})(?!\d)'
|
||||
matches = re.findall(pattern, query)
|
||||
if not matches:
|
||||
return query
|
||||
lock_number = "、".join(matches)
|
||||
return f"通过博微软件助手查询软件锁信息,锁注册号为{lock_number}"
|
||||
|
||||
|
||||
async def process_query_async(self, query: str, conversation_context: str = "",
|
||||
chat_history: List[Dict[str, str]] = None,
|
||||
previous_slots: Dict[str, Any] = None,
|
||||
@@ -414,7 +426,7 @@ class AsyncIntentRecognizer:
|
||||
asyncio.create_task(self._generate_follow_up_questions_async(query, chat_history, conversation_context)),
|
||||
|
||||
# 5.3: HyDE
|
||||
asyncio.create_task(self._generate_hypothetical_document_async(query, chat_history, conversation_context)),
|
||||
# asyncio.create_task(self._generate_hypothetical_document_async(query, chat_history, conversation_context)),
|
||||
|
||||
# 5.4: 多问题查询
|
||||
asyncio.create_task(self._generate_multi_questions_async(query, chat_history, conversation_context))
|
||||
@@ -442,6 +454,13 @@ class AsyncIntentRecognizer:
|
||||
classification_task = self._classify_intent_async(rewrite.rewrite, conversation_context, chat_history, previous_slots)
|
||||
classification = await classification_task
|
||||
|
||||
# 特殊处理 锁相关咨询
|
||||
if classification.vertical_classification == "安装下载注册" and classification.sub_classification == "软件锁类":
|
||||
process_lock_start_time = time.time()
|
||||
rewrite.rewrite = self._process_lock_related_query(rewrite.rewrite)
|
||||
process_lock_end_time = time.time()
|
||||
process_lock_time = process_lock_end_time - process_lock_start_time
|
||||
logging.info(f"锁相关咨询正则匹配 - 总耗时: {process_lock_time:.2f}秒")
|
||||
# 步骤4: 进行槽位填充
|
||||
# 如果是有效分类,进行槽位填充
|
||||
slot_filling_result = {}
|
||||
@@ -464,24 +483,24 @@ class AsyncIntentRecognizer:
|
||||
logging.info(f"异步问题扩展环节耗时统计 - 总耗时: {end_time - start_time:.2f}秒")
|
||||
|
||||
# 收集结果
|
||||
step_back_result = query_expand_results[0] if query_expand_results[0] else StepBackPrompt(original_query=query, step_back_query=query)
|
||||
step_back_result = query_expand_results[0] if query_expand_results[0] else StepBackPrompt(original_query=query, can_use_back_prompt=False, step_back_query=[query])
|
||||
follow_up_result = query_expand_results[1] if query_expand_results[1] else FollowUpQuestions(original_query=query, follow_up_query=query)
|
||||
hyde_result = query_expand_results[2] if query_expand_results[2] else HypotheticalDocument(original_query=query, hypothetical_answer="")
|
||||
multi_questions_result = query_expand_results[3] if query_expand_results[3] else MultiQuestions(original_query=query, sub_questions=[query])
|
||||
# hyde_result = query_expand_results[2] if query_expand_results[2] else HypotheticalDocument(original_query=query, hypothetical_answer="")
|
||||
multi_questions_result = query_expand_results[2] if query_expand_results[2] else MultiQuestions(original_query=query, sub_questions=[query])
|
||||
|
||||
all_questions = multi_questions_result.sub_questions
|
||||
all_questions.append(query)
|
||||
all_questions.append(rewrite.rewrite)
|
||||
all_questions.extend(step_back_result.step_back_query)
|
||||
all_questions.append(follow_up_result.follow_up_query)
|
||||
all_questions.append(hyde_result.hypothetical_answer)
|
||||
# all_questions.append(hyde_result.hypothetical_answer)
|
||||
all_questions = list(set(all_questions))
|
||||
|
||||
query_expand = {
|
||||
"all": all_questions,
|
||||
"step_back": step_back_result.model_dump(),
|
||||
"follow_up": follow_up_result.model_dump(),
|
||||
"hyde": hyde_result.model_dump(),
|
||||
# "hyde": hyde_result.model_dump(),
|
||||
"multi_questions": multi_questions_result.model_dump()
|
||||
}
|
||||
|
||||
@@ -649,7 +668,7 @@ class AsyncIntentRecognizer:
|
||||
except Exception as e:
|
||||
# 如果解析失败,返回原始查询作为后退提示
|
||||
logging.error(f"异步后退提示生成失败: {e}", exc_info=True)
|
||||
return StepBackPrompt(original_query=query, step_back_query=query)
|
||||
return StepBackPrompt(original_query=query, can_use_back_prompt=False, step_back_query=[query])
|
||||
|
||||
async def _generate_follow_up_questions_async(self, query: str, chat_history: List[Dict[str, str]] = None, conversation_context: str = "") -> FollowUpQuestions:
|
||||
"""
|
||||
|
||||
@@ -31,7 +31,8 @@ classification_info="""【垂直领域分类】:
|
||||
1. 软件问题 -- 指涉及软件使用、功能询问、软件故障排查等方面的提问或请求。
|
||||
2. 业务问题 -- 指涉及电力造价领域专业知识、造价费用计算等电力造价业务知识
|
||||
3. 安装下载注册 -- 指涉及软件(或插件)安装下载、注册、激活等操作类问题。
|
||||
4. 其他 -- 指与软件或电力造价专业无关的日常对话、问候、感慨、情绪表达等。
|
||||
4. 固定话术类 -- 指涉及需要固定话术回答的问题,如:规费咨询
|
||||
5. 其他 -- 指与软件或电力造价专业无关的日常对话、问候、感慨、情绪表达等。
|
||||
|
||||
【软件问题包括以下两类】:
|
||||
1. 软件功能:询问软件功能的使用、功能操作(调整)、功能位置、如何设置、如何转换等
|
||||
@@ -49,11 +50,19 @@ classification_info="""【垂直领域分类】:
|
||||
"用哪个软件打开.BDY3文件?",
|
||||
"BDD3是什么"
|
||||
2. 软件锁类:询问软件锁信息、锁注册号查询、许可证查询、锁激活问题等软件锁相关问题
|
||||
形如: 8116 开头连续的12位数字 或者 形如 xx-xxxxxx 的数字格式 的内容为锁的注册号,提问出现对应内容时,归类为软件锁类
|
||||
3. 安装下载类:安装下载咨询、组件(插件)选择、环境配置、安装包下载、政策文件(规范文件)下载等
|
||||
4. 问题排查类:软件安装下载失败、报错,系统兼容性问题等
|
||||
|
||||
【固定话术类包括以下一类】:
|
||||
1. 规费咨询:所有涉及规费咨询等问题(规费、社保费、公积金费等)
|
||||
|
||||
【其他】:
|
||||
1. 其他"""
|
||||
1. 其他
|
||||
|
||||
分类优先级:
|
||||
固定话术类 > 软件问题 、 业务问题 、 安装下载注册 > 其他
|
||||
"""
|
||||
|
||||
classification_prompt="""
|
||||
用户正在使用电力造价软件或想询问电力造价领域相关知识,你需要根据用户的输入内容集合历史对话(如果存在),将其归类为以下垂直领域之一:
|
||||
@@ -228,10 +237,21 @@ step_back_prompt = """
|
||||
|
||||
## 示例
|
||||
原始问题: "配网D3软件2023版本如何在Windows 11系统上导入单位工程量清单?"
|
||||
后退问题: ["配网D3软件如何导入工程量清单?", "如何导入单位工程量清单?"]
|
||||
后退问题:
|
||||
{{
|
||||
"original_query": "配网D3软件2023版本如何在Windows 11系统上导入单位工程量清单?",
|
||||
"can_use_back_prompt": True,
|
||||
"step_back_query": ["配网D3软件如何导入工程量清单?", "如何导入单位工程量清单?"]
|
||||
}}
|
||||
|
||||
原始问题: "技改T1软件中的某个设备更换后,如何在系统中更新对应的定额?"
|
||||
后退问题: ["技改T1软件中如何更新设备对应的定额?", "如何更新设备对应的定额?"]
|
||||
后退问题:
|
||||
{{
|
||||
"original_query": "技改T1软件中的某个设备更换后,如何在系统中更新对应的定额?",
|
||||
"can_use_back_prompt": True,
|
||||
"step_back_query": ["技改T1软件中如何更新设备对应的定额?", "如何更新设备对应的定额?"]
|
||||
}}
|
||||
|
||||
"""
|
||||
|
||||
follow_up_questions_prompt = """
|
||||
|
||||
Reference in New Issue
Block a user