Add new files and update existing files
This commit is contained in:
+59
-33
@@ -1,56 +1,82 @@
|
||||
import os
|
||||
import random
|
||||
from sqlalchemy import create_engine, MetaData, Table, select, func
|
||||
import json
|
||||
from sqlalchemy import create_engine, MetaData, Table, func
|
||||
from sqlalchemy.orm import sessionmaker
|
||||
from dotenv import load_dotenv
|
||||
|
||||
load_dotenv()
|
||||
|
||||
def generate_questions(file_path, num_questions_per_table=10):
|
||||
def generate_questions_for_table(table_name, file_path, num_questions=50):
|
||||
engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
|
||||
metadata = MetaData()
|
||||
metadata.reflect(bind=engine)
|
||||
|
||||
# 定义表名及其对应的列索引和问题模板
|
||||
# 定义每张表的列索引、值列和问题模板
|
||||
tables_info = {
|
||||
"ProjectProperties": (0, "Attribute_Value", "{name_value}的属性值是多少?"),
|
||||
"OtherFee": (0, "Amount", "{name_value}的金额是多少?"),
|
||||
"FeeCollectionTable": (0, "Rate", "{name_value}的费率是多少?"),
|
||||
"ProjectDivision": (0, "Total_Price", "{name_value}的合价是多少?"),
|
||||
"ProjectDivisions_CostPreview": (0, "Direct_Fee", "{name_value}的直接费是多少?"),
|
||||
"TotalCalculateTable": (0, "Amount", "{name_value}的金额是多少?"),
|
||||
"ProjectQuantities": (0, "Code", "{name_value}的编码是多少?")
|
||||
"ProjectProperties": (0, "Value", "{name_value}的属性值是多少?", "属性值是{answer_value}"),
|
||||
"OtherFee": (3, "Amount", "{name_value}的金额是多少?", "金额是{answer_value}"),
|
||||
"FeeCollectionTable": (1, "Rate", "{name_value}的费率是多少?", "费率是{answer_value}"),
|
||||
"ProjectDivision": (5, "Sum_Price", "{name_value}的合价是多少?", "合价是{answer_value}"),
|
||||
"ProjectDivisions_CostPreview": (4, "Direct_Cost", "{name_value}的直接费是多少?", "直接费是{answer_value}"),
|
||||
"TotalCalculateTable": (3, "Amount", "{name_value}的金额是多少?", "金额是{answer_value}"),
|
||||
"ProjectQuantities": (6, "Encoding", "{name_value}的编码是多少?", "编码是{answer_value}")
|
||||
}
|
||||
|
||||
questions = []
|
||||
if table_name not in tables_info:
|
||||
print(f"未找到表 {table_name} 的配置信息")
|
||||
return
|
||||
|
||||
for table_name, (name_index, value_column, question_template) in tables_info.items():
|
||||
# 加载这张表
|
||||
table = Table(table_name, metadata, autoload_with=engine)
|
||||
# 获取表信息
|
||||
name_index, value_column, question_template, answer_template = tables_info[table_name]
|
||||
|
||||
# 创建会话
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
# 加载表
|
||||
table = Table(table_name, metadata, autoload_with=engine)
|
||||
|
||||
# 获取列名
|
||||
name_column = table.columns.keys()[name_index]
|
||||
# 创建会话
|
||||
Session = sessionmaker(bind=engine)
|
||||
session = Session()
|
||||
|
||||
# 对于每个表生成num_questions_per_table个问题
|
||||
for _ in range(num_questions_per_table):
|
||||
# 查询表中的随机一行,并获取名称列的值
|
||||
row = session.query(table).order_by(func.random()).first()
|
||||
name_value = getattr(row, name_column)
|
||||
# 获取列名
|
||||
name_column = table.columns.keys()[name_index]
|
||||
|
||||
# 构造问题
|
||||
question = question_template.format(name_value=name_value)
|
||||
questions.append(question)
|
||||
questions_and_answers = []
|
||||
|
||||
# 写入文件
|
||||
# 生成指定数量的问题
|
||||
for _ in range(num_questions):
|
||||
# 查询表中的随机一行,并获取名称列和值列的值
|
||||
row = session.query(table).order_by(func.random()).first()
|
||||
name_value = getattr(row, name_column)
|
||||
answer_value = getattr(row, value_column)
|
||||
|
||||
# 构造问题和答案
|
||||
question = question_template.format(name_value=name_value)
|
||||
answer = answer_template.format(answer_value=answer_value)
|
||||
|
||||
# 添加到列表中
|
||||
questions_and_answers.append({
|
||||
"question": question,
|
||||
"answer": answer
|
||||
})
|
||||
|
||||
# 将问题和答案以 JSON 格式写入文件
|
||||
with open(file_path, 'w', encoding='utf-8') as file:
|
||||
for question in questions:
|
||||
file.write(question + '\n')
|
||||
json.dump(questions_and_answers, file, ensure_ascii=False, indent=4)
|
||||
|
||||
print(f"已生成表 {table_name} 的问题到文件: {file_path}")
|
||||
|
||||
def main():
|
||||
engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
|
||||
metadata = MetaData()
|
||||
metadata.reflect(bind=engine)
|
||||
|
||||
# 获取脚本所在目录
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
|
||||
# 遍历每张表并生成对应的 JSON 文件
|
||||
for table_name in metadata.tables.keys():
|
||||
# 文件路径为:脚本目录 + 表名 + .json
|
||||
file_path = os.path.join(script_dir, f"{table_name}.json")
|
||||
generate_questions_for_table(table_name, file_path)
|
||||
|
||||
if __name__ == "__main__":
|
||||
questions_file_path = "/home/bw/ctr/zjdataai-app/backend/test1/questions.txt"
|
||||
generate_questions(questions_file_path)
|
||||
main()
|
||||
|
||||
Reference in New Issue
Block a user