优化DifyCompareTest和WorkorderToDify模块,调整日志记录格式,修复API密钥获取方式,增强工单处理流程,添加元数据管理功能,改进并发上传逻辑,更新文档处理方式。

This commit is contained in:
2025-07-28 08:34:17 +08:00
parent 780f423200
commit ba42107999
3 changed files with 356 additions and 64 deletions
+24 -17
View File
@@ -26,7 +26,7 @@ if not os.path.exists(log_dir):
os.makedirs(log_dir)
# 生成带时间戳的日志文件名
log_file = os.path.join(log_dir, f'dify_compare_{datetime.now().strftime("%Y%m%d_%H%M%S")}.log')
log_file = os.path.join(log_dir, f'dify_compare_{datetime.now().strftime("%Y%m%d")}.log')
import logging
@@ -43,7 +43,7 @@ logging.basicConfig(
class DifyCompareTest:
def __init__(self):
# 词条与工单同时检索
self.both_wiki_worker_client = ChatClient(api_key="app-CPoOMaGDsLRPAe9TW7Xjhszy", base_url=os.getenv("DIFY_BSAE_URL"))
self.both_wiki_worker_client = ChatClient(api_key=os.getenv("DIFY_APP_KEY"), base_url=os.getenv("DIFY_BSAE_URL"))
self.llm = OpenAiLLM(base_url=os.getenv("OPENAI_API_BASE"), model=os.getenv("MODEL_NAME"))
def llm_judge_answer(self, old_answer: str, now_answer: str):
@@ -100,22 +100,31 @@ class DifyCompareTest:
answer = result.get('answer', "")
if len(answer) == 0:
raise Exception(f"回答为空: {result}")
judge_result = self.llm_judge_answer(old_answer=old_answer, now_answer=answer)
return answer, judge_result
if old_answer:
judge_result = self.llm_judge_answer(old_answer=old_answer, now_answer=answer)
else:
judge_result=""
# 只取回答的前半部分
answer = answer.split("----------------------------------------")[0].strip()
message_id = result.get('message_id', "")
return answer, judge_result, message_id
except Exception as e:
retry_count += 1
if retry_count >= max_retries:
logging.error(f"词条与工单同时检索调用失败 (尝试 {max_retries} 次后): {e}")
return '', ''
return '', '', ''
else:
import time
time.sleep(1) # 等待1秒后重试
time.sleep(10) # 等待1秒后重试
def process_single_row(self, index, row):
"""处理单行数据的方法"""
try:
query = row["提问"]
old_answer = row["回答"]
if "回答" in row:
old_answer = row["回答"]
else:
old_answer = ""
current_software = row["当前软件"]
inputs = {
@@ -124,7 +133,7 @@ class DifyCompareTest:
}
# 调用词条与工单同时检索工作流
answer, judge_result = self.process_workflow(
answer, judge_result, message_id = self.process_workflow(
self.both_wiki_worker_client,
inputs,
query,
@@ -133,17 +142,17 @@ class DifyCompareTest:
# 构建结果
result_row = row.copy()
result_row["词条与工单同时回答"] = answer
result_row["词条与工单同时回答对比"] = judge_result
result_row["message_id"] = message_id
result_row["回答"] = answer
# result_row["词条与工单同时回答对比"] = judge_result
logging.info(f"成功处理第 {index + 1} 行数据")
return index, result_row
except Exception as e:
logging.error(f"处理第 {index + 1} 行数据时出错: {e}")
result_row = row.copy()
result_row["词条与工单同时回答"] = ''
result_row["词条与工单同时回答对比"] = ''
result_row["回答"] = ''
result_row["message_id"] = ''
return index, result_row
@@ -166,7 +175,7 @@ class DifyCompareTest:
logging.info(f"成功读取Excel文件: {excel_path}, 共 {len(df)} 行数据")
# 验证必要的列是否存在
required_columns = ["提问", "回答", "当前软件"]
required_columns = ["提问", "当前软件"]
missing_columns = [col for col in required_columns if col not in df.columns]
if missing_columns:
logging.error(f"Excel文件缺少必要的列: {missing_columns}")
@@ -199,8 +208,6 @@ class DifyCompareTest:
logging.error(f"线程执行失败 (行{original_index + 1}): {e}")
# 添加失败的行
result_row = df.iloc[original_index].copy()
result_row["词条与工单同时回答"] = '线程执行失败'
result_row["词条与工单同时回答对比"] = '线程执行失败'
results[original_index] = result_row
pbar.update(1)
@@ -223,7 +230,7 @@ if __name__ == "__main__":
# 处理第一个文件
excel_files = [
# ("data/excel/5月.xlsx", "data/excel/5月问答对比.xlsx"),
("data/excel/其他月.xlsx", "data/excel/其他月问答对比.xlsx")
("data/excel/第四轮问题-Part2.xlsx", "data/excel/第四轮问题-Part2-问答测试.xlsx")
]
for excel_path, save_path in excel_files: