diff --git a/backend/test1/evaluator.py b/backend/test1/evaluator.py index c53084d..fcefc4c 100644 --- a/backend/test1/evaluator.py +++ b/backend/test1/evaluator.py @@ -14,14 +14,20 @@ from llama_index.core import ( SimpleDirectoryReader, Response, ) -from llama_index.core.evaluation import FaithfulnessEvaluator, DatasetGenerator +from llama_index.core.evaluation import ( + FaithfulnessEvaluator, + DatasetGenerator, + CorrectnessEvaluator, + SemanticSimilarityEvaluator,) init_settings() init_observability() -evaluator_qwen = FaithfulnessEvaluator() +faith_evaluator_qwen = FaithfulnessEvaluator() #诚实度评测 +corr_evaluator_qwen = CorrectnessEvaluator() #准确率评测 +Seman_evaluator_qwen = SemanticSimilarityEvaluator()#嵌入相似度评估 documents = SimpleDirectoryReader("D:/LLM_model/text2sql/zjdataai-app-test/backend/data-test").load_data() @@ -52,12 +58,12 @@ async def evaluate_query_engine_async(query_engine, questions): c = [query_engine.aquery(q) for q in questions] gathering_future = asyncio.gather(*c) results = await gathering_future - print(results) + #print(results) total_correct = 0 for r in results: eval_result = ( - 1 if evaluator_qwen.evaluate_response(response=r).passing else 0 + 1 if faith_evaluator_qwen.evaluate_response(response=r).passing else 0 ) total_correct += eval_result