from app.observability import init_observability from app.settings import init_settings from dotenv import load_dotenv import nest_asyncio nest_asyncio.apply() load_dotenv() from llama_index.core.node_parser import SentenceSplitter from llama_index.core import ( VectorStoreIndex, SimpleDirectoryReader, Response, ) from llama_index.core.evaluation import FaithfulnessEvaluator, DatasetGenerator init_settings() init_observability() evaluator_qwen = FaithfulnessEvaluator() documents = SimpleDirectoryReader("D:/LLM_model/text2sql/zjdataai-app-test/backend/data-test").load_data() splitter = SentenceSplitter(chunk_size=512) vector_index = VectorStoreIndex.from_documents( documents, transformations=[splitter], ) # # 运行评估 # query_engine = vector_index.as_query_engine() # response_vector = query_engine.query("工程监理费的金额是多少?") # eval_result = evaluator_qwen.evaluate_response(response=response_vector) # print(response_vector) # print(eval_result) question_generator = DatasetGenerator.from_documents(documents) eval_questions = question_generator.generate_questions_from_nodes(5) print(eval_questions) import asyncio async def evaluate_query_engine_async(query_engine, questions): c = [query_engine.aquery(q) for q in questions] gathering_future = asyncio.gather(*c) results = await gathering_future print(results) total_correct = 0 for r in results: eval_result = ( 1 if evaluator_qwen.evaluate_response(response=r).passing else 0 ) total_correct += eval_result return total_correct, len(results) def evaluate_query_engine(query_engine, questions): loop = asyncio.get_event_loop() correct, total = loop.run_until_complete(evaluate_query_engine_async(query_engine, questions)) return correct, total # 使用 evaluate_query_engine 函数 vector_query_engine = vector_index.as_query_engine() correct, total = evaluate_query_engine(vector_query_engine, eval_questions[:5]) print(f"score: {correct}/{total}")