Add new files and update existing files

This commit is contained in:
chentianrui
2024-08-16 11:17:27 +08:00
parent 3082ac5f3d
commit ae7e21768b
14 changed files with 1581 additions and 91 deletions
+59 -33
View File
@@ -1,56 +1,82 @@
import os
import random
from sqlalchemy import create_engine, MetaData, Table, select, func
import json
from sqlalchemy import create_engine, MetaData, Table, func
from sqlalchemy.orm import sessionmaker
from dotenv import load_dotenv
load_dotenv()
def generate_questions(file_path, num_questions_per_table=10):
def generate_questions_for_table(table_name, file_path, num_questions=50):
engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
metadata = MetaData()
metadata.reflect(bind=engine)
# 定义表名及其对应的列索引和问题模板
# 定义每张表的列索引、值列和问题模板
tables_info = {
"ProjectProperties": (0, "Attribute_Value", "{name_value}的属性值是多少?"),
"OtherFee": (0, "Amount", "{name_value}的金额是多少?"),
"FeeCollectionTable": (0, "Rate", "{name_value}的费率是多少?"),
"ProjectDivision": (0, "Total_Price", "{name_value}的合价是多少?"),
"ProjectDivisions_CostPreview": (0, "Direct_Fee", "{name_value}的直接费是多少?"),
"TotalCalculateTable": (0, "Amount", "{name_value}的金额是多少?"),
"ProjectQuantities": (0, "Code", "{name_value}的编码是多少?")
"ProjectProperties": (0, "Value", "{name_value}的属性值是多少?", "属性值是{answer_value}"),
"OtherFee": (3, "Amount", "{name_value}的金额是多少?", "金额是{answer_value}"),
"FeeCollectionTable": (1, "Rate", "{name_value}的费率是多少?", "费率是{answer_value}"),
"ProjectDivision": (5, "Sum_Price", "{name_value}的合价是多少?", "合价是{answer_value}"),
"ProjectDivisions_CostPreview": (4, "Direct_Cost", "{name_value}的直接费是多少?", "直接费是{answer_value}"),
"TotalCalculateTable": (3, "Amount", "{name_value}的金额是多少?", "金额是{answer_value}"),
"ProjectQuantities": (6, "Encoding", "{name_value}的编码是多少?", "编码是{answer_value}")
}
questions = []
if table_name not in tables_info:
print(f"未找到表 {table_name} 的配置信息")
return
for table_name, (name_index, value_column, question_template) in tables_info.items():
# 加载这张表
table = Table(table_name, metadata, autoload_with=engine)
# 获取表信息
name_index, value_column, question_template, answer_template = tables_info[table_name]
# 创建会话
Session = sessionmaker(bind=engine)
session = Session()
# 加载表
table = Table(table_name, metadata, autoload_with=engine)
# 获取列名
name_column = table.columns.keys()[name_index]
# 创建会话
Session = sessionmaker(bind=engine)
session = Session()
# 对于每个表生成num_questions_per_table个问题
for _ in range(num_questions_per_table):
# 查询表中的随机一行,并获取名称列的值
row = session.query(table).order_by(func.random()).first()
name_value = getattr(row, name_column)
# 获取列名
name_column = table.columns.keys()[name_index]
# 构造问题
question = question_template.format(name_value=name_value)
questions.append(question)
questions_and_answers = []
# 写入文件
# 生成指定数量的问题
for _ in range(num_questions):
# 查询表中的随机一行,并获取名称列和值列的值
row = session.query(table).order_by(func.random()).first()
name_value = getattr(row, name_column)
answer_value = getattr(row, value_column)
# 构造问题和答案
question = question_template.format(name_value=name_value)
answer = answer_template.format(answer_value=answer_value)
# 添加到列表中
questions_and_answers.append({
"question": question,
"answer": answer
})
# 将问题和答案以 JSON 格式写入文件
with open(file_path, 'w', encoding='utf-8') as file:
for question in questions:
file.write(question + '\n')
json.dump(questions_and_answers, file, ensure_ascii=False, indent=4)
print(f"已生成表 {table_name} 的问题到文件: {file_path}")
def main():
engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
metadata = MetaData()
metadata.reflect(bind=engine)
# 获取脚本所在目录
script_dir = os.path.dirname(os.path.abspath(__file__))
# 遍历每张表并生成对应的 JSON 文件
for table_name in metadata.tables.keys():
# 文件路径为:脚本目录 + 表名 + .json
file_path = os.path.join(script_dir, f"{table_name}.json")
generate_questions_for_table(table_name, file_path)
if __name__ == "__main__":
questions_file_path = "/home/bw/ctr/zjdataai-app/backend/test1/questions.txt"
generate_questions(questions_file_path)
main()