删除多余代码
This commit is contained in:
@@ -11,6 +11,7 @@ import logging
|
|||||||
import os
|
import os
|
||||||
import asyncio
|
import asyncio
|
||||||
from langchain.output_parsers import PydanticOutputParser
|
from langchain.output_parsers import PydanticOutputParser
|
||||||
|
from langchain_core.output_parsers import JsonOutputParser
|
||||||
import json
|
import json
|
||||||
from typing import List, Tuple, Dict, Any, Optional
|
from typing import List, Tuple, Dict, Any, Optional
|
||||||
import re
|
import re
|
||||||
@@ -23,11 +24,6 @@ from .PromptTemplates import (classification_prompt, query_rewrite_prompt_pro,
|
|||||||
slot_filling_prompt, step_back_prompt,
|
slot_filling_prompt, step_back_prompt,
|
||||||
follow_up_questions_prompt, hyde_prompt, multi_questions_prompt)
|
follow_up_questions_prompt, hyde_prompt, multi_questions_prompt)
|
||||||
|
|
||||||
from .Multi_PromptTemplates import (
|
|
||||||
intent_and_slot_prompt, output_example,
|
|
||||||
generate_slot_mapping_doc
|
|
||||||
)
|
|
||||||
|
|
||||||
from .DataModels import (
|
from .DataModels import (
|
||||||
Classification, QueryRewrite, Term, TermList,
|
Classification, QueryRewrite, Term, TermList,
|
||||||
SoftwareFunctionSlots, SoftwareTroubleShootingSlots, ProfessionalConsultingSlots,
|
SoftwareFunctionSlots, SoftwareTroubleShootingSlots, ProfessionalConsultingSlots,
|
||||||
@@ -197,7 +193,7 @@ class AsyncIntentRecognizer:
|
|||||||
|
|
||||||
return filtered_tokens
|
return filtered_tokens
|
||||||
|
|
||||||
async def _extract_keywords_with_llm_async(self, query: str, use_jieba: bool = False) -> List[Term]:
|
async def _extract_keywords_async(self, query: str, use_jieba: bool = False) -> List[Term]:
|
||||||
"""
|
"""
|
||||||
异步使用LLM从用户查询中提取专业关键词
|
异步使用LLM从用户查询中提取专业关键词
|
||||||
|
|
||||||
@@ -289,7 +285,7 @@ class AsyncIntentRecognizer:
|
|||||||
# 步骤1: 使用LLM提取查询中的关键词
|
# 步骤1: 使用LLM提取查询中的关键词
|
||||||
try:
|
try:
|
||||||
llm_start_time = time.time()
|
llm_start_time = time.time()
|
||||||
extracted_terms = await self._extract_keywords_with_llm_async(query, use_jieba)
|
extracted_terms = await self._extract_keywords_async(query, use_jieba)
|
||||||
for term in extracted_terms:
|
for term in extracted_terms:
|
||||||
query_keys.append(term.name)
|
query_keys.append(term.name)
|
||||||
llm_end_time = time.time()
|
llm_end_time = time.time()
|
||||||
@@ -328,7 +324,8 @@ class AsyncIntentRecognizer:
|
|||||||
|
|
||||||
# 合并所有结果
|
# 合并所有结果
|
||||||
for result in keyword_results:
|
for result in keyword_results:
|
||||||
matched_terms.extend(result)
|
if len(result) > 0:
|
||||||
|
matched_terms.extend(result)
|
||||||
|
|
||||||
vector_end_time = time.time()
|
vector_end_time = time.time()
|
||||||
vector_time = vector_end_time - vector_start_time
|
vector_time = vector_end_time - vector_start_time
|
||||||
@@ -383,26 +380,6 @@ class AsyncIntentRecognizer:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise RuntimeError(f"解析问题改写结果时出错: {e}") from e
|
raise RuntimeError(f"解析问题改写结果时出错: {e}") from e
|
||||||
|
|
||||||
def _judge_define_suffix(self, input_str: str) -> Tuple[bool, List[str]]:
|
|
||||||
"""
|
|
||||||
判断输入字符串是否包含定义的后缀,并返回所有匹配到的后缀名列表
|
|
||||||
|
|
||||||
Args:
|
|
||||||
input_str: 输入字符串
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Tuple[bool, List[str]]: (是否包含定义的后缀, 匹配到的后缀名列表)
|
|
||||||
"""
|
|
||||||
|
|
||||||
# 构建正则表达式模式,匹配大小写不敏感且前面可能带有.
|
|
||||||
pattern = r'(?:\.?)(' + '|'.join(re.escape(field.get('name')) for field in self._suffix_keywords) + r')'
|
|
||||||
|
|
||||||
# 使用 re.IGNORECASE 标志来忽略大小写,findall找到所有匹配
|
|
||||||
matches = re.finditer(pattern, input_str, re.IGNORECASE)
|
|
||||||
matched_suffixes = [match.group(1) for match in matches]
|
|
||||||
|
|
||||||
return bool(matched_suffixes), matched_suffixes
|
|
||||||
|
|
||||||
def _process_lock_related_query(self, query: str) -> str:
|
def _process_lock_related_query(self, query: str) -> str:
|
||||||
"""
|
"""
|
||||||
特殊处理锁相关咨询
|
特殊处理锁相关咨询
|
||||||
@@ -786,7 +763,8 @@ class AsyncIntentRecognizer:
|
|||||||
# 解析JSON响应
|
# 解析JSON响应
|
||||||
try:
|
try:
|
||||||
wiki_names = []
|
wiki_names = []
|
||||||
json_response = json.loads(response.content)
|
json_parser = JsonOutputParser()
|
||||||
|
json_response = json_parser.parse(response.content)
|
||||||
for match in json_response:
|
for match in json_response:
|
||||||
wiki_names.append(match["content"])
|
wiki_names.append(match["content"])
|
||||||
logging.debug(f"软件文档匹配耗时: {end_time - start_time:.2f}秒")
|
logging.debug(f"软件文档匹配耗时: {end_time - start_time:.2f}秒")
|
||||||
@@ -838,97 +816,4 @@ class AsyncIntentRecognizer:
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
# 如果解析失败,返回原始查询作为唯一子问题
|
# 如果解析失败,返回原始查询作为唯一子问题
|
||||||
logging.error(f"异步多角度问题生成失败: {e}", exc_info=True)
|
logging.error(f"异步多角度问题生成失败: {e}", exc_info=True)
|
||||||
return MultiQuestions(original_query=query, sub_questions=[query])
|
return MultiQuestions(original_query=query, sub_questions=[query])
|
||||||
|
|
||||||
async def _process_intent_and_slot_async(self, user_input: str, conversation_context: str = "",
|
|
||||||
chat_history: List[Dict[str, str]] = None,
|
|
||||||
previous_slots: Dict[str, Any] = None) -> Dict[str, Any]:
|
|
||||||
"""
|
|
||||||
异步使用统一提示词同时进行意图识别和槽位填充
|
|
||||||
|
|
||||||
Args:
|
|
||||||
user_input: 当前用户输入
|
|
||||||
conversation_context: 会话背景信息
|
|
||||||
chat_history: 历史对话记录,格式为[{"role": "user", "content": "..."}, {"role": "assistant", "content": "..."}]
|
|
||||||
previous_slots: 历史槽位信息
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
包含意图分类和槽位填充结果的字典
|
|
||||||
"""
|
|
||||||
# 初始化默认值
|
|
||||||
if chat_history is None:
|
|
||||||
chat_history = []
|
|
||||||
|
|
||||||
if previous_slots is None:
|
|
||||||
previous_slots = {}
|
|
||||||
|
|
||||||
# 生成槽位映射文档
|
|
||||||
slot_mapping_doc = generate_slot_mapping_doc()
|
|
||||||
|
|
||||||
# 准备提示词
|
|
||||||
parser = PydanticOutputParser(pydantic_object=IntentAndSlotResult)
|
|
||||||
formatted_prompt = intent_and_slot_prompt.format(
|
|
||||||
conversation_context=conversation_context,
|
|
||||||
chat_history=json.dumps(chat_history, ensure_ascii=False),
|
|
||||||
previous_slots=json.dumps(previous_slots, ensure_ascii=False),
|
|
||||||
user_input=user_input,
|
|
||||||
slot_mapping_doc=slot_mapping_doc,
|
|
||||||
output_format=parser.get_format_instructions(),
|
|
||||||
classification_info=classification_info
|
|
||||||
)
|
|
||||||
|
|
||||||
# 异步调用LLM
|
|
||||||
llm_start_time = time.time()
|
|
||||||
response = await self._llm.invoke_async(formatted_prompt + output_example, False)
|
|
||||||
llm_end_time = time.time()
|
|
||||||
llm_time = llm_end_time - llm_start_time
|
|
||||||
|
|
||||||
try:
|
|
||||||
# 解析LLM响应为JSON
|
|
||||||
response.content = response.content.strip()
|
|
||||||
clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
|
|
||||||
result_json = parser.parse(clean_output)
|
|
||||||
classification = result_json.classification
|
|
||||||
slot_filling = result_json.slots
|
|
||||||
is_complete, missing_slots = slot_filling.check_required_slots()
|
|
||||||
expected_slot_model = self._get_slot_model(classification)
|
|
||||||
|
|
||||||
# 添加容错处理,发生概率较低,但仍需处理
|
|
||||||
if expected_slot_model is None:
|
|
||||||
# 添加容错处理,应对LLM返回错误分类信息,一级分类跟二级分类错乱
|
|
||||||
# 重新分类
|
|
||||||
classification = await self._classify_intent_async(user_input, conversation_context, chat_history, previous_slots)
|
|
||||||
fill_slots = await self._fill_slots_async(user_input, classification, conversation_context, chat_history, previous_slots)
|
|
||||||
|
|
||||||
result = {
|
|
||||||
"classification": classification.model_dump(),
|
|
||||||
"slot_filling": fill_slots
|
|
||||||
}
|
|
||||||
logging.warning(f"异步重新分类与槽点填充")
|
|
||||||
return result
|
|
||||||
elif expected_slot_model.__name__ != type(slot_filling).__name__:
|
|
||||||
# 添加容错处理,应对LLM槽位与分类不匹配。重新填充槽位
|
|
||||||
slot_filling = await self._fill_slots_async(user_input, classification, conversation_context, chat_history, previous_slots)
|
|
||||||
result = {
|
|
||||||
"classification": classification.model_dump(),
|
|
||||||
"slot_filling": slot_filling
|
|
||||||
}
|
|
||||||
logging.warning(f"异步重新填充槽点")
|
|
||||||
return result
|
|
||||||
|
|
||||||
logging.info(f"异步意图识别+槽位LLM调用耗时: {llm_time:.2f}秒")
|
|
||||||
|
|
||||||
# 构建最终结果
|
|
||||||
result = {
|
|
||||||
"classification": classification.model_dump(),
|
|
||||||
"slot_filling": {
|
|
||||||
"is_complete": is_complete,
|
|
||||||
"missing_slots": missing_slots,
|
|
||||||
"filled_data": slot_filling.model_dump()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
raise RuntimeError(f"异步process_intent_and_slot error:{e}") from e
|
|
||||||
@@ -1,174 +0,0 @@
|
|||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
"""
|
|
||||||
File: Multi_PromptTemplates.py
|
|
||||||
Author: oyyz
|
|
||||||
Date: 2025-06-13
|
|
||||||
Description: 多轮对话下意图分类、改写核心提示词
|
|
||||||
"""
|
|
||||||
|
|
||||||
intent_and_slot_prompt = """
|
|
||||||
# 你是一个专业的电力造价领域智能助手,负责对用户输入进行意图分类识别和关键信息槽位填充。
|
|
||||||
|
|
||||||
{classification_info}
|
|
||||||
|
|
||||||
{slot_mapping_doc}
|
|
||||||
|
|
||||||
## 【软件名称规范】
|
|
||||||
支持的软件名称及其别名:
|
|
||||||
- **配网工程计价通D3软件**:别名包括配网D3、D3软件、配网工程软件等
|
|
||||||
- **新型储能电站建设计价通C1软件**:别名包括储能C1、C1软件、储能电站软件、储能软件等
|
|
||||||
- **西藏电力工程计价通Z1软件**:别名包括西藏Z1、Z1软件、西藏电力软件等
|
|
||||||
- **技改检修工程计价通T1软件**:别名包括技改T1、T1软件、技改检修软件等
|
|
||||||
- **技改检修清单计价通T1软件**:别名包括技改清单T1、T1清单软件、技改检修清单软件等
|
|
||||||
- **主网电力建设计价通软件**:别名包括主网软件、电力建设软件、主网建设软件、博微电力建设计价通等
|
|
||||||
不在上述软件之列的,使用用户输入中的软件名称
|
|
||||||
|
|
||||||
## 【任务要求】
|
|
||||||
|
|
||||||
1. **会话理解**:综合考虑会话背景、历史对话和之前的槽位信息来理解当前用户输入
|
|
||||||
2. **意图分类**:准确识别用户输入属于哪个垂直领域和子分类
|
|
||||||
3. **槽位填充**:从当前用户问题中提取关键信息,并结合历史槽位信息进行补充完善
|
|
||||||
4. **信息融合**:
|
|
||||||
- 优先使用当前用户输入中的明确信息
|
|
||||||
- 当前输入缺失但历史槽位存在的信息,可适当继承
|
|
||||||
- 历史对话中的上下文信息有助于理解当前输入的真实意图
|
|
||||||
5. **槽位处理**:
|
|
||||||
- 对于必填槽位,必须尽力从当前输入和历史信息中提取
|
|
||||||
- 对于选填槽位,如果能从当前输入或历史信息中提取则填写
|
|
||||||
- 如果当前输入与历史信息存在冲突,以当前输入为准
|
|
||||||
6. **输出格式**:只输出符合格式的JSON数据,不要有任何额外的解释
|
|
||||||
|
|
||||||
## 【会话背景信息】
|
|
||||||
{conversation_context}
|
|
||||||
|
|
||||||
## 【历史对话记录】
|
|
||||||
{chat_history}
|
|
||||||
|
|
||||||
## 【历史槽位信息】
|
|
||||||
{previous_slots}
|
|
||||||
|
|
||||||
## 【当前用户输入】
|
|
||||||
{user_input}
|
|
||||||
|
|
||||||
## 【输出格式】
|
|
||||||
{output_format}
|
|
||||||
"""
|
|
||||||
|
|
||||||
output_example="""
|
|
||||||
## 【综合分析示例】
|
|
||||||
|
|
||||||
**示例1:利用历史对话理解当前输入**
|
|
||||||
会话背景: 用户正在咨询软件使用问题
|
|
||||||
历史对话:
|
|
||||||
- 用户: "我在使用配网D3软件"
|
|
||||||
- 助手: "好的,请问您遇到什么问题?"
|
|
||||||
历史槽位:{"software_name": "配网工程计价通D3软件"}
|
|
||||||
当前用户输入: "新建工程按钮找不到"
|
|
||||||
|
|
||||||
输出:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"classification": {
|
|
||||||
"vertical_classification": "软件问题",
|
|
||||||
"sub_classification": "软件功能"
|
|
||||||
},
|
|
||||||
"slot_filling": {
|
|
||||||
"software_name": "配网工程计价通D3软件",
|
|
||||||
"function_name": "新建工程",
|
|
||||||
"operation": "查找新建工程按钮位置",
|
|
||||||
"software_version": null,
|
|
||||||
"operation_steps": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**示例2:继承和更新槽位信息**
|
|
||||||
会话背景: 用户遇到软件报错,正在进行故障排查
|
|
||||||
历史对话:
|
|
||||||
- 用户: "西藏Z1软件报错了"
|
|
||||||
- 助手: "请详细描述一下报错信息"
|
|
||||||
历史槽位: {"software_name": "西藏电力工程计价通Z1软件"}
|
|
||||||
当前用户输入: "提示找不到许可证,是在新建工程的时候"
|
|
||||||
|
|
||||||
输出:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"classification": {
|
|
||||||
"vertical_classification": "软件问题",
|
|
||||||
"sub_classification": "故障排查"
|
|
||||||
},
|
|
||||||
"slot_filling": {
|
|
||||||
"software_name": "西藏电力工程计价通Z1软件",
|
|
||||||
"function_name": "新建工程",
|
|
||||||
"error_message": "提示找不到许可证",
|
|
||||||
"software_version": null,
|
|
||||||
"os_version": null,
|
|
||||||
"reproduction_steps": "新建工程时出现错误"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
**示例3:信息冲突处理**
|
|
||||||
会话背景: 用户在多个软件间切换咨询
|
|
||||||
历史对话:
|
|
||||||
- 用户: "配网D3的费用计算"
|
|
||||||
- 助手: "好的,关于配网D3的费用计算..."
|
|
||||||
历史槽位: {"software_name": "配网工程计价通D3软件"}
|
|
||||||
当前用户输入: "不对,我说的是技改T1软件的材料费怎么算"
|
|
||||||
|
|
||||||
输出:
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"classification": {
|
|
||||||
"vertical_classification": "业务问题",
|
|
||||||
"sub_classification": "数据问题"
|
|
||||||
},
|
|
||||||
"slot_filling": {
|
|
||||||
"expense_type": "材料费",
|
|
||||||
"operation_purpose": "了解费用计算方法",
|
|
||||||
"software_name": "技改检修工程计价通T1软件",
|
|
||||||
"project_type": null
|
|
||||||
}
|
|
||||||
}
|
|
||||||
```
|
|
||||||
"""
|
|
||||||
|
|
||||||
def generate_slot_mapping_doc() -> str:
|
|
||||||
"""
|
|
||||||
生成分类与槽位模型对应关系的文档
|
|
||||||
Returns:
|
|
||||||
str: 格式化的文档字符串
|
|
||||||
"""
|
|
||||||
mapping = {
|
|
||||||
"软件问题": {
|
|
||||||
"软件功能": "SoftwareFunctionSlots",
|
|
||||||
"故障排查": "SoftwareTroubleShootingSlots"
|
|
||||||
},
|
|
||||||
"业务问题": {
|
|
||||||
"专业咨询": "ProfessionalConsultingSlots",
|
|
||||||
"数据问题": "DataProblemSlots"
|
|
||||||
},
|
|
||||||
"安装下载注册": {
|
|
||||||
"后缀名咨询": "FileExtensionConsultingSlots",
|
|
||||||
"软件锁类": "SoftwareLockSlots",
|
|
||||||
"安装下载类": "InstallationDownloadSlots",
|
|
||||||
"问题排查类": "ProblemDiagnosisSlots"
|
|
||||||
},
|
|
||||||
"其他": {
|
|
||||||
"其他": "OtherSlots"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
doc = ["## 【分类与槽位模型对应关系】"]
|
|
||||||
for vertical, sub_classes in mapping.items():
|
|
||||||
doc.append(f"\n{vertical}:")
|
|
||||||
for sub_class, slot_model in sub_classes.items():
|
|
||||||
doc.append(f"- {sub_class} -> {slot_model}")
|
|
||||||
|
|
||||||
doc.append("\n## 【注意事项】")
|
|
||||||
doc.append("1. 分类与槽位模型必须严格对应。严格遵守,不得违背")
|
|
||||||
doc.append("2. 每个分类只能使用其对应的槽位模型")
|
|
||||||
doc.append("3. 不允许混用不同分类的槽位模型")
|
|
||||||
|
|
||||||
return "\n".join(doc)
|
|
||||||
Reference in New Issue
Block a user