LLM输出的内容先通过正则清理多余内容

This commit is contained in:
2025-08-28 14:29:19 +08:00
parent 660b5f6561
commit abc868225a
5 changed files with 95 additions and 119 deletions
+15 -43
View File
@@ -204,41 +204,7 @@ class QueryRewriteProcessor:
enable_query_expansion=True,
use_jieba=True,
cur_soft_name=current_softname))
# 提取分类信息
classification = result["classification"]
original_query = result["rewrite"]["rewrite"]
query_list = result["query_expand"]["all"]
# 将字典转换为Classification对象
classification_obj = Classification(**classification)
# 根据enable_retrieval参数决定是否进行文档检索
retrieved_doc = None
retrieved_doc_titles=[]
if retrieved_doc:
retrieved_doc_titles=[doc["title"].split("/")[-1] for doc in retrieved_doc]
# 提取槽位填充信息
slot_filling = result.get("slot_filling", {})
slot_filling_str = ""
if slot_filling and "filled_data" in slot_filling:
# 格式化槽位填充结果
slot_filling_str = json.dumps({
"is_complete": slot_filling.get("is_complete", False),
"missing_slots": slot_filling.get("missing_slots", {}),
"filled_data": slot_filling.get("filled_data", {})
}, ensure_ascii=False, indent=2)
# 处理成功,返回结果
return {
"问题": query,
"问题分类": f"{classification['vertical_classification']} - {classification['sub_classification']}",
"问题改写": result["rewrite"]["rewrite"],
"槽位信息": slot_filling_str,
"检索的文档": "\n".join(retrieved_doc_titles),
"检索的内容": json.dumps(retrieved_doc, ensure_ascii=False, indent=2) if retrieved_doc else "",
}
return result
except Exception as e:
logging.error(f"处理问题 '{query}' 时出错: ",exc_info=True)
retry_count += 1
@@ -424,19 +390,25 @@ def main():
logging.info(f"所有处理完成,最终结果已保存至: {output_file}")
else:
logging.info(f"文档检索功能状态: 已启用")
for idx, query in enumerate(examples):
if query.strip() == "":
continue
query="怎么调整报表顺序"
nCount=0
while True:
query="请问怎么在南网版概算工程软件版里面查施工费?"
conversation_context={
"current_softname": "储能计价通C1软件"
"current_softname": "电力建设计价通软件"
}
# 在调试模式下使用完整的参数
print(json.dumps(processor.process_query(
result = processor.process_query(
query,
conversation_context=conversation_context,
enable_retrieval=True
), ensure_ascii=False, indent=2))
)
# 在调试模式下使用完整的参数
# print(json.dumps(processor.process_query(
# query,
# conversation_context=conversation_context,
# enable_retrieval=True
# ), ensure_ascii=False, indent=2))
nCount+=1
print("测试数量: ", nCount)
def setup_logging():
# 配置日志输出到控制台