新增了参数评估脚本和评分脚本

This commit is contained in:
chentianrui
2024-08-29 11:38:45 +08:00
parent 3ee1ba529f
commit aff1793c4e
+10 -4
View File
@@ -14,14 +14,20 @@ from llama_index.core import (
SimpleDirectoryReader, SimpleDirectoryReader,
Response, Response,
) )
from llama_index.core.evaluation import FaithfulnessEvaluator, DatasetGenerator from llama_index.core.evaluation import (
FaithfulnessEvaluator,
DatasetGenerator,
CorrectnessEvaluator,
SemanticSimilarityEvaluator,)
init_settings() init_settings()
init_observability() init_observability()
evaluator_qwen = FaithfulnessEvaluator() faith_evaluator_qwen = FaithfulnessEvaluator() #诚实度评测
corr_evaluator_qwen = CorrectnessEvaluator() #准确率评测
Seman_evaluator_qwen = SemanticSimilarityEvaluator()#嵌入相似度评估
documents = SimpleDirectoryReader("D:/LLM_model/text2sql/zjdataai-app-test/backend/data-test").load_data() documents = SimpleDirectoryReader("D:/LLM_model/text2sql/zjdataai-app-test/backend/data-test").load_data()
@@ -52,12 +58,12 @@ async def evaluate_query_engine_async(query_engine, questions):
c = [query_engine.aquery(q) for q in questions] c = [query_engine.aquery(q) for q in questions]
gathering_future = asyncio.gather(*c) gathering_future = asyncio.gather(*c)
results = await gathering_future results = await gathering_future
print(results) #print(results)
total_correct = 0 total_correct = 0
for r in results: for r in results:
eval_result = ( eval_result = (
1 if evaluator_qwen.evaluate_response(response=r).passing else 0 1 if faith_evaluator_qwen.evaluate_response(response=r).passing else 0
) )
total_correct += eval_result total_correct += eval_result