1、删除不再使用的.cursorrules文件
2、更新poetry.lock以反映Poetry版本的变化,添加jieba依赖, 3、重构意图识别逻辑以支持多轮对话,优化槽位填充和意图分类功能,增强代码可读性和维护性。
This commit is contained in:
@@ -8,6 +8,7 @@ Description: 意图识别和问题改写示例
|
||||
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
from regex import F
|
||||
from rag2_0.intent_recognition import IntentRecognizer
|
||||
import pandas as pd
|
||||
import logging
|
||||
@@ -16,6 +17,7 @@ import concurrent.futures
|
||||
from tqdm import tqdm
|
||||
import time
|
||||
import sys
|
||||
from typing import List, Dict
|
||||
# 加载环境变量
|
||||
load_dotenv()
|
||||
|
||||
@@ -42,7 +44,7 @@ def load_questions_from_excel(file_path=None):
|
||||
logging.error(f"读取Excel文件时出错: {e}")
|
||||
return []
|
||||
|
||||
def process_query(recognizer, query):
|
||||
def process_query(recognizer: IntentRecognizer, query: str, conversation_context: str = "", chat_history: List[Dict[str, str]] = None, previous_slots: Dict[str, str] = None):
|
||||
"""
|
||||
处理单个查询,支持重试机制,并包含槽位填充
|
||||
|
||||
@@ -59,8 +61,8 @@ def process_query(recognizer, query):
|
||||
while retry_count <= max_retries:
|
||||
try:
|
||||
# 使用新的process_query_with_slots方法处理查询
|
||||
result = recognizer.process_query_with_slots(query)
|
||||
|
||||
# result = recognizer.process_query_with_slots(query)
|
||||
result = recognizer.process_query(query, conversation_context=conversation_context, chat_history=chat_history, previous_slots=previous_slots)
|
||||
# 提取分类信息
|
||||
classification = result["classification"]
|
||||
|
||||
@@ -101,7 +103,6 @@ def process_query(recognizer, query):
|
||||
"检索的关键词": keywords_str,
|
||||
"槽位填充": slot_filling_str
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
retry_count += 1
|
||||
|
||||
@@ -174,10 +175,30 @@ def save_results_to_excel(results, output_file, is_final=False):
|
||||
logging.info(f"已保存{len(valid_results)}条结果至: {temp_output_file}")
|
||||
|
||||
# 示例查询
|
||||
examples_query = """"锁标签号:811621005858, 注册单位:惠州电力勘察设计院有限公司,软件名称:广东迁改导则2022, 注册号:BW278-83834-58155-58339.迁改导则是要另外下载安装软件吗?"
|
||||
|
||||
"""
|
||||
|
||||
examples_query = """那西藏软件呢"""
|
||||
conversation_context=""
|
||||
chat_history=[
|
||||
{
|
||||
"user": "怎么新建工程啊"
|
||||
},
|
||||
{
|
||||
"assistant": "您好!为了更准确地帮助您解决问题,请提供以下信息:\n\n- **软件名称**:请确认您使用的软件名称,可选值如下:\n - 配网工程计价通D3软件\n - 新型储能电站建设计价通C1软件\n - 西藏电力工程计价通Z1软件\n - 技改检修工程计价通T1软件\n - 技改检修清单计价通T1软件\n - 主网电力建设计价通软件\n\n请补充以上信息,以便我们能够更好地指导您如何新建工程。"
|
||||
},
|
||||
{
|
||||
"user": "配网工程计价通D3软件"
|
||||
},
|
||||
{
|
||||
"assistant": """好的,针对您使用**配网工程计价通D3软件**新建工程的问题,以下是具体的操作步骤:\n\n## 新建工程\n\n### 功能入口\n【主页】界面——"新建"按钮\n\n### 操作步骤\n1. 在"新建窗口"选择对应工程模板。\n2. 设置工程名称。\n3. 确定后根据新建向导完善"电压等级"、"地区类型"等参数。\n4. 点击"确定"即可完成新建工程。\n\n\n\n希望这些步骤能帮助您顺利完成新建工程。如果还有其他问题,欢迎随时提问!\n"""
|
||||
}
|
||||
]
|
||||
previous_slots={
|
||||
"software_name": "配网工程计价通D3软件",
|
||||
"function_name": "新建工程",
|
||||
"operation": "如何新建工程",
|
||||
"project_type": None,
|
||||
"software_version": None,
|
||||
"operation_steps": None
|
||||
}
|
||||
def main():
|
||||
"""
|
||||
意图识别和问题改写示例
|
||||
@@ -193,18 +214,19 @@ def main():
|
||||
|
||||
# 读取提问数据
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
data_file = os.path.join(current_dir, "..", "..", "data", "excel", "历史提问数据(dislike)_提问明确.xlsx")
|
||||
data_file = os.path.join(current_dir, "..", "..", "data", "excel", "历史提问数据(like)_提问明确.xlsx")
|
||||
output_file = os.path.join(current_dir, "..", "..", "data", "excel", "测试提问数据_槽位填充结果.xlsx")
|
||||
|
||||
# 检测是否为调试模式,调试模式下使用examples_query,否则从Excel读取
|
||||
is_debug = hasattr(sys, 'gettrace') and sys.gettrace() is not None
|
||||
# is_debug = False
|
||||
if is_debug:
|
||||
examples = examples_query.strip().split("\n")
|
||||
else:
|
||||
examples = load_questions_from_excel(data_file)
|
||||
|
||||
if not is_debug:
|
||||
max_workers = 20 # 减少并发数以避免API限制
|
||||
max_workers = 40 # 减少并发数以避免API限制
|
||||
logging.info(f"共有 {len(examples)} 个问题需要处理,使用 {max_workers} 个并发线程")
|
||||
|
||||
# 创建一个与输入顺序相同的结果列表
|
||||
@@ -229,9 +251,9 @@ def main():
|
||||
|
||||
completed += 1
|
||||
# 每处理batch_size条数据保存一次
|
||||
if completed % batch_size == 0:
|
||||
logging.info(f"已完成 {completed}/{len(examples)} 条,保存中间结果...")
|
||||
save_results_to_excel(results, output_file, is_final=False)
|
||||
# if completed % batch_size == 0:
|
||||
# logging.info(f"已完成 {completed}/{len(examples)} 条,保存中间结果...")
|
||||
# save_results_to_excel(results, output_file, is_final=False)
|
||||
|
||||
# 处理完所有数据后,保存最终结果
|
||||
save_results_to_excel(results, output_file, is_final=True)
|
||||
@@ -240,7 +262,7 @@ def main():
|
||||
for idx, query in enumerate(examples):
|
||||
if query.strip() == "":
|
||||
continue
|
||||
process_query(recognizer, query)
|
||||
process_query(recognizer, query, conversation_context, chat_history, previous_slots)
|
||||
|
||||
def setup_logging():
|
||||
# 配置日志输出到控制台
|
||||
|
||||
Reference in New Issue
Block a user