增加了评估脚本
This commit is contained in:
@@ -0,0 +1,75 @@
|
||||
from app.observability import init_observability
|
||||
from app.settings import init_settings
|
||||
from dotenv import load_dotenv
|
||||
|
||||
import nest_asyncio
|
||||
nest_asyncio.apply()
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
from llama_index.core.node_parser import SentenceSplitter
|
||||
from llama_index.core import (
|
||||
VectorStoreIndex,
|
||||
SimpleDirectoryReader,
|
||||
Response,
|
||||
)
|
||||
from llama_index.core.evaluation import FaithfulnessEvaluator, DatasetGenerator
|
||||
|
||||
|
||||
|
||||
init_settings()
|
||||
init_observability()
|
||||
|
||||
evaluator_qwen = FaithfulnessEvaluator()
|
||||
|
||||
documents = SimpleDirectoryReader("D:/LLM_model/text2sql/zjdataai-app-test/backend/data-test").load_data()
|
||||
|
||||
splitter = SentenceSplitter(chunk_size=512)
|
||||
|
||||
|
||||
vector_index = VectorStoreIndex.from_documents(
|
||||
documents, transformations=[splitter],
|
||||
)
|
||||
|
||||
|
||||
# # 运行评估
|
||||
# query_engine = vector_index.as_query_engine()
|
||||
# response_vector = query_engine.query("工程监理费的金额是多少?")
|
||||
# eval_result = evaluator_qwen.evaluate_response(response=response_vector)
|
||||
|
||||
# print(response_vector)
|
||||
# print(eval_result)
|
||||
|
||||
|
||||
question_generator = DatasetGenerator.from_documents(documents)
|
||||
eval_questions = question_generator.generate_questions_from_nodes(5)
|
||||
print(eval_questions)
|
||||
|
||||
import asyncio
|
||||
|
||||
async def evaluate_query_engine_async(query_engine, questions):
|
||||
c = [query_engine.aquery(q) for q in questions]
|
||||
gathering_future = asyncio.gather(*c)
|
||||
results = await gathering_future
|
||||
print(results)
|
||||
|
||||
total_correct = 0
|
||||
for r in results:
|
||||
eval_result = (
|
||||
1 if evaluator_qwen.evaluate_response(response=r).passing else 0
|
||||
)
|
||||
total_correct += eval_result
|
||||
|
||||
return total_correct, len(results)
|
||||
|
||||
def evaluate_query_engine(query_engine, questions):
|
||||
loop = asyncio.get_event_loop()
|
||||
correct, total = loop.run_until_complete(evaluate_query_engine_async(query_engine, questions))
|
||||
return correct, total
|
||||
|
||||
# 使用 evaluate_query_engine 函数
|
||||
vector_query_engine = vector_index.as_query_engine()
|
||||
correct, total = evaluate_query_engine(vector_query_engine, eval_questions[:5])
|
||||
|
||||
print(f"score: {correct}/{total}")
|
||||
Reference in New Issue
Block a user