From ae7e21768b47d20f38d62e28d0437db7fe67137a Mon Sep 17 00:00:00 2001 From: chentianrui Date: Fri, 16 Aug 2024 11:17:27 +0800 Subject: [PATCH] Add new files and update existing files --- backend/pyproject.toml | 9 +- backend/test1/FeeCollectionTable.json | 202 ++++++++++++++++++ backend/test1/OtherFee.json | 202 ++++++++++++++++++ backend/test1/ProjectDivision.json | 202 ++++++++++++++++++ .../test1/ProjectDivisions_CostPreview.json | 202 ++++++++++++++++++ backend/test1/ProjectProperties.json | 202 ++++++++++++++++++ backend/test1/ProjectQuantities.json | 202 ++++++++++++++++++ backend/test1/TotalCalculateTable.json | 202 ++++++++++++++++++ backend/test1/incorrect_answers_log.json | 28 +++ backend/test1/parameters_results.json | 0 backend/test1/query_results.json | 12 ++ backend/test1/query_test.py | 57 +++-- backend/test1/question.py | 92 +++++--- backend/test1/test_parameters.py | 60 +++--- 14 files changed, 1581 insertions(+), 91 deletions(-) create mode 100644 backend/test1/FeeCollectionTable.json create mode 100644 backend/test1/OtherFee.json create mode 100644 backend/test1/ProjectDivision.json create mode 100644 backend/test1/ProjectDivisions_CostPreview.json create mode 100644 backend/test1/ProjectProperties.json create mode 100644 backend/test1/ProjectQuantities.json create mode 100644 backend/test1/TotalCalculateTable.json create mode 100644 backend/test1/incorrect_answers_log.json create mode 100644 backend/test1/parameters_results.json create mode 100644 backend/test1/query_results.json diff --git a/backend/pyproject.toml b/backend/pyproject.toml index f244939..b0399fe 100644 --- a/backend/pyproject.toml +++ b/backend/pyproject.toml @@ -11,24 +11,25 @@ generate = "app.engine.generate:generate_datasource" [tool.poetry.dependencies] python = "^3.11,<3.12" -fastapi = "^0.112.0" +fastapi = "^0.110.3" python-dotenv = "^1.0.0" aiostream = "^0.6.2" llama-index = "0.10.63" cachetools = "^5.3.3" protobuf = "4.25.4" + #arize-phoenix = "^4.12.0" openinference-instrumentation-llama-index="2.2.3" llama-index-callbacks-arize-phoenix = "^0.1.4" llama-index-llms-dashscope = "^0.1.2" llama-index-embeddings-dashscope = "^0.1.4" llama-index-postprocessor-dashscope-rerank-custom = "0.1.0" -#xinference = "^0.14.1" -xinference.client = "^0.14.1" +xinference = "^0.14.1" +xinference-client = "^0.14.1" llama-index-llms-xinference = "^0.1.2" qdrant-client="^1.10.1" llama-index-vector-stores-qdrant = "^0.2.14" -chroma="^0.5.5" +chroma="^0.2.0" llama-index-vector-stores-chroma = "^0.1.10" llama-index-readers-json = "^0.1.5" diff --git a/backend/test1/FeeCollectionTable.json b/backend/test1/FeeCollectionTable.json new file mode 100644 index 0000000..e20c2ed --- /dev/null +++ b/backend/test1/FeeCollectionTable.json @@ -0,0 +1,202 @@ +[ + { + "question": "人工费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "临时设施费的费率是多少?", + "answer": "费率是6.3500000000" + }, + { + "question": "乙供装置性材料费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "直接费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "甲供装置性材料费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "直接费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "夜间施工增加费的费率是多少?", + "answer": "费率是0E-10" + }, + { + "question": "装置性材料费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "冬雨季施工增加费的费率是多少?", + "answer": "费率是3.5700000000" + }, + { + "question": "材料费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "机械价差的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "规费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "直接工程费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "安全文明施工费的费率是多少?", + "answer": "费率是3.5500000000" + }, + { + "question": "企业管理费的费率是多少?", + "answer": "费率是35.7600000000" + }, + { + "question": "税金的费率是多少?", + "answer": "费率是9.0000000000" + }, + { + "question": "直接费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "安全文明施工费的费率是多少?", + "answer": "费率是3.5500000000" + }, + { + "question": "合计的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "税金的费率是多少?", + "answer": "费率是9.0000000000" + }, + { + "question": "安全文明施工费的费率是多少?", + "answer": "费率是3.5500000000" + }, + { + "question": "直接工程费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "税金的费率是多少?", + "answer": "费率是9.0000000000" + }, + { + "question": "社会保险费的费率是多少?", + "answer": "费率是15.0000000000" + }, + { + "question": "间接费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "合计的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "临时设施费的费率是多少?", + "answer": "费率是0E-10" + }, + { + "question": "利润的费率是多少?", + "answer": "费率是5.2400000000" + }, + { + "question": "税金的费率是多少?", + "answer": "费率是9.0000000000" + }, + { + "question": "社会保险费的费率是多少?", + "answer": "费率是15.0000000000" + }, + { + "question": "直接工程费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "乙供设备不含税价的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "企业管理费的费率是多少?", + "answer": "费率是17.1300000000" + }, + { + "question": "企业管理费的费率是多少?", + "answer": "费率是35.7600000000" + }, + { + "question": "夜间施工增加费的费率是多少?", + "answer": "费率是0E-10" + }, + { + "question": "直接费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "夜间施工增加费的费率是多少?", + "answer": "费率是0E-10" + }, + { + "question": "甲供设备含税价的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "施工机械使用费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "安全文明施工费的费率是多少?", + "answer": "费率是3.5500000000" + }, + { + "question": "定额直接费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "主材费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "直接费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "施工企业配合调试费的费率是多少?", + "answer": "费率是0E-10" + }, + { + "question": "施工机械使用费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "临时设施费的费率是多少?", + "answer": "费率是6.3500000000" + }, + { + "question": "施工工具用具使用费的费率是多少?", + "answer": "费率是3.8200000000" + }, + { + "question": "措施费的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "材料价差的费率是多少?", + "answer": "费率是100.0000000000" + }, + { + "question": "措施费的费率是多少?", + "answer": "费率是100.0000000000" + } +] \ No newline at end of file diff --git a/backend/test1/OtherFee.json b/backend/test1/OtherFee.json new file mode 100644 index 0000000..b6e03d2 --- /dev/null +++ b/backend/test1/OtherFee.json @@ -0,0 +1,202 @@ +[ + { + "question": "前期工作管理费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "特种设备安全监测费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "工程监理费的金额是多少?", + "answer": "金额是131009.9200000000" + }, + { + "question": "水土保持方案编审费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "生产准备费的金额是多少?", + "answer": "金额是472373669.4635599852" + }, + { + "question": "电力工程技术经济标准编制费的金额是多少?", + "answer": "金额是84352440.9756360054" + }, + { + "question": "项目建设技术服务费的金额是多少?", + "answer": "金额是16855957065.4302005768" + }, + { + "question": "工程保险费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "其他的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "施工图文件评审费的金额是多少?", + "answer": "金额是24940.0000000000" + }, + { + "question": "节能评估费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "桩基检测费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "项目前期工作费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "其他的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "项目法人管理费的金额是多少?", + "answer": "金额是986923559.4149370193" + }, + { + "question": "专业爆破服务费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "节能评估费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "用地预审费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "设备材料监造费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "环境监测及环境保护验收费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "环境监测及环境保护验收费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "设备材料监造费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "勘察费的金额是多少?", + "answer": "金额是12122154260.0000000000" + }, + { + "question": "项目法人管理费的金额是多少?", + "answer": "金额是986923559.4149370193" + }, + { + "question": "社会稳定风险评估费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "勘察费的金额是多少?", + "answer": "金额是12122154260.0000000000" + }, + { + "question": "环境影响评价费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "水土保持方案编审费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "使用林地可行性研究费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "环境监测及环境保护验收费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "桩基检测费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "设计费的金额是多少?", + "answer": "金额是4042055949.4299998283" + }, + { + "question": "环境监测及环境保护验收费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "建设场地征用及清理费的金额是多少?", + "answer": "金额是16831284.2287110016" + }, + { + "question": "施工图文件评审费的金额是多少?", + "answer": "金额是24940.0000000000" + }, + { + "question": "项目后评价费的金额是多少?", + "answer": "金额是421762204.8781780005" + }, + { + "question": "水土保持方案编审费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "勘察设计费的金额是多少?", + "answer": "金额是16164210209.4300003052" + }, + { + "question": "前期工作管理费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "节能评估费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "初步设计文件评审费的金额是多少?", + "answer": "金额是18560.0000000000" + }, + { + "question": "特种设备安全监测费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "初步设计文件评审费的金额是多少?", + "answer": "金额是18560.0000000000" + }, + { + "question": "桩基检测费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "矿产压覆评估费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "设计费的金额是多少?", + "answer": "金额是4042055949.4299998283" + }, + { + "question": "水土保持方案编审费用的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "电力工程技术经济标准编制费的金额是多少?", + "answer": "金额是84352440.9756360054" + }, + { + "question": "桩基检测费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "矿产压覆评估费用的金额是多少?", + "answer": "金额是0E-10" + } +] \ No newline at end of file diff --git a/backend/test1/ProjectDivision.json b/backend/test1/ProjectDivision.json new file mode 100644 index 0000000..d583af5 --- /dev/null +++ b/backend/test1/ProjectDivision.json @@ -0,0 +1,202 @@ +[ + { + "question": "新增项目名称的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "预制基础的合价是多少?", + "answer": "合价是40567.2639480000" + }, + { + "question": "绝缘子串及金具安装的合价是多少?", + "answer": "合价是2897171.9878110001" + }, + { + "question": "杆塔工程材料工地运输的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "基础防护的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "护坡、挡土墙及排洪沟土石方工程的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "新增项目名称的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "(1)拆除后能利用的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "地基处理的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "灌注桩基础的合价是多少?", + "answer": "合价是43466660.0544390008" + }, + { + "question": "(1)拆除后能利用的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "悬垂绝缘子串及金具安装的合价是多少?", + "answer": "合价是1251465.0340440001" + }, + { + "question": "护坡、挡土墙及排洪沟土石方工程的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "附件安装工程的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "导地线跨越架设的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "辅助工程的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "新增项目名称的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "绝缘子串及金具安装的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "护坡、挡土墙及排洪沟砌筑的合价是多少?", + "answer": "合价是709931.9013930000" + }, + { + "question": "锚杆基础的合价是多少?", + "answer": "合价是15344967.9002950005" + }, + { + "question": "建筑工程的合价是多少?", + "answer": "合价是25411.2790780000" + }, + { + "question": "辅助工程的合价是多少?", + "answer": "合价是1046253.4135240000" + }, + { + "question": "导地线跨越架设的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "电缆工程的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "输、送电线路试运的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "基础土石方工程的合价是多少?", + "answer": "合价是32872843180.7429008484" + }, + { + "question": "基础永久性围堰的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "基础永久性围堰的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "混凝土及钢筋混凝土结构的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "输、送电线路试运的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "混合结构的合价是多少?", + "answer": "合价是16967.5193850000" + }, + { + "question": "杆塔组立的合价是多少?", + "answer": "合价是2253906.0859830002" + }, + { + "question": "附件安装工程的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "接地工程材料工地运输的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "新增项目名称的合价是多少?", + "answer": "合价是27148.0310160000" + }, + { + "question": "导地线架设的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "护坡、挡土墙及排洪沟的合价是多少?", + "answer": "合价是709931.9013930000" + }, + { + "question": "(1)拆除后能利用的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "基础永久性围堰砌筑的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "(2)拆除后不能利用的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "安装工程的合价是多少?", + "answer": "合价是65324.9496330000" + }, + { + "question": "尖峰、施工基面土石方工程的合价是多少?", + "answer": "合价是325205.4178770000" + }, + { + "question": "架线工程的合价是多少?", + "answer": "合价是4844399648.0778598785" + }, + { + "question": "杆塔组立的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "架线工程材料工地运输的合价是多少?", + "answer": "合价是2088570123.2409000397" + }, + { + "question": "导地线架设的合价是多少?", + "answer": "合价是0E-10" + }, + { + "question": "耐张绝缘子串及金具安装的合价是多少?", + "answer": "合价是1645706.9537680000" + }, + { + "question": "架线工程材料工地运输的合价是多少?", + "answer": "合价是2088570123.2409000397" + }, + { + "question": "其他基础的合价是多少?", + "answer": "合价是3839666.7656879998" + }, + { + "question": "架线工程材料工地运输的合价是多少?", + "answer": "合价是0E-10" + } +] \ No newline at end of file diff --git a/backend/test1/ProjectDivisions_CostPreview.json b/backend/test1/ProjectDivisions_CostPreview.json new file mode 100644 index 0000000..4f17f26 --- /dev/null +++ b/backend/test1/ProjectDivisions_CostPreview.json @@ -0,0 +1,202 @@ +[ + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是440877984.9458540082" + }, + { + "question": "线路取费表(拆除)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是1086586.9018659999" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表(拆除)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是51486.7898090000" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是3321.8139230000" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是78005.0340730000" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是3535892767.0972299576" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是24045.2334060000" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是336253.7482950000" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是142270.1346780000" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是61049.8665780000" + }, + { + "question": "线路取费表(拆除)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是933061.7795919999" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是182949.5997350000" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表(余物清理)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表(拆除)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是21220645.1637400016" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是933061.7795919999" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是2501470269.7231497765" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是51486.7898090000" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是55265.9111100000" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是442897633.6273120046" + }, + { + "question": "线路取费表(拆除)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是1057484.3306960000" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是442897633.6273120046" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是21220645.1637400016" + }, + { + "question": "线路取费表(余物清理)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是336253.7482950000" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "的直接费是多少?", + "answer": "直接费是61049.8665780000" + }, + { + "question": "线路取费表(余物清理)(1)的直接费是多少?", + "answer": "直接费是61049.8665780000" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是24045.2334060000" + }, + { + "question": "线路取费表(拆除)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表(拆除)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表(余物清理)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表(拆除)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表(拆除)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是659466.5955000001" + }, + { + "question": "线路取费表(拆除)的直接费是多少?", + "answer": "直接费是0E-10" + }, + { + "question": "线路取费表的直接费是多少?", + "answer": "直接费是2501470269.7231497765" + } +] \ No newline at end of file diff --git a/backend/test1/ProjectProperties.json b/backend/test1/ProjectProperties.json new file mode 100644 index 0000000..90afa22 --- /dev/null +++ b/backend/test1/ProjectProperties.json @@ -0,0 +1,202 @@ +[ + { + "question": "降阻剂_数量的属性值是多少?", + "answer": "属性值是f" + }, + { + "question": "导线2_单位单价的属性值是多少?", + "answer": "属性值是9" + }, + { + "question": "导线_单公里用量的属性值是多少?", + "answer": "属性值是36" + }, + { + "question": "线路参数_导地线防震措施的属性值是多少?", + "answer": "属性值是457" + }, + { + "question": "合成绝缘子_数量的属性值是多少?", + "answer": "属性值是5" + }, + { + "question": "基础垫层的属性值是多少?", + "answer": "属性值是" + }, + { + "question": "其中:基础护壁用量的属性值是多少?", + "answer": "属性值是74394.212" + }, + { + "question": "铺石加混凝土的属性值是多少?", + "answer": "属性值是0.0" + }, + { + "question": "导线用量(西北)的属性值是多少?", + "answer": "属性值是-795976.0855" + }, + { + "question": "导线单公里用量(西北)的属性值是多少?", + "answer": "属性值是-159195.2171" + }, + { + "question": "灰土垫层单公里用量(西北)的属性值是多少?", + "answer": "属性值是8.0" + }, + { + "question": "地线瓷绝缘子单公里用量(西北)的属性值是多少?", + "answer": "属性值是738.253" + }, + { + "question": "地形条件_高山的属性值是多少?", + "answer": "属性值是7" + }, + { + "question": "流砂坑比例的属性值是多少?", + "answer": "属性值是0.001" + }, + { + "question": "碎石_数量的属性值是多少?", + "answer": "属性值是12" + }, + { + "question": "线路参数_导地线防震措施的属性值是多少?", + "answer": "属性值是457" + }, + { + "question": "灰土垫层的属性值是多少?", + "answer": "属性值是40.0" + }, + { + "question": "交叉跨越_弱电线路的属性值是多少?", + "answer": "属性值是45" + }, + { + "question": "地线1_根数的属性值是多少?", + "answer": "属性值是12" + }, + { + "question": "土质比例_岩石(人凿)的属性值是多少?", + "answer": "属性值是49" + }, + { + "question": "耐张混凝土杆基数的属性值是多少?", + "answer": "属性值是26.0" + }, + { + "question": "设计单位的属性值是多少?", + "answer": "属性值是3" + }, + { + "question": "接地钢的属性值是多少?", + "answer": "属性值是" + }, + { + "question": "间隔棒_单公里用量的属性值是多少?", + "answer": "属性值是r" + }, + { + "question": "导线其中:跳线和导线弧垂单公里用量(西北)的属性值是多少?", + "answer": "属性值是159203.0171" + }, + { + "question": "桩基础的属性值是多少?", + "answer": "属性值是310.0" + }, + { + "question": "降阻剂的属性值是多少?", + "answer": "属性值是" + }, + { + "question": "可抵扣增值税(万元)的属性值是多少?", + "answer": "属性值是2005241.808822" + }, + { + "question": "主要技术经济指标2的属性值是多少?", + "answer": "属性值是" + }, + { + "question": "合成绝缘子_数量的属性值是多少?", + "answer": "属性值是5" + }, + { + "question": "土质比例_水坑的属性值是多少?", + "answer": "属性值是47" + }, + { + "question": "基础_插入式的属性值是多少?", + "answer": "属性值是3" + }, + { + "question": "耐张角钢塔比例的属性值是多少?", + "answer": "属性值是250%" + }, + { + "question": "地线的属性值是多少?", + "answer": "属性值是" + }, + { + "question": "回路数的属性值是多少?", + "answer": "属性值是三回" + }, + { + "question": "导线其中:跳线和导线弧垂用量的属性值是多少?", + "answer": "属性值是796015.0855" + }, + { + "question": "OPGW用量(西北)的属性值是多少?", + "answer": "属性值是2904.737" + }, + { + "question": "现浇混凝土_单公里用量的属性值是多少?", + "answer": "属性值是22" + }, + { + "question": "架线工程费用(万元)(含价差)的属性值是多少?", + "answer": "属性值是3203726.0" + }, + { + "question": "耐张钢管塔比例的属性值是多少?", + "answer": "属性值是300%" + }, + { + "question": "单公里土石方量_基面的属性值是多少?", + "answer": "属性值是8*8" + }, + { + "question": "地线2的属性值是多少?", + "answer": "属性值是" + }, + { + "question": "降阻剂的属性值是多少?", + "answer": "属性值是" + }, + { + "question": "土质比例的属性值是多少?", + "answer": "属性值是" + }, + { + "question": "地线1_单位单价的属性值是多少?", + "answer": "属性值是113" + }, + { + "question": "绝缘子串型式_悬垂串的属性值是多少?", + "answer": "属性值是48" + }, + { + "question": "基坑土石方量(西北)的属性值是多少?", + "answer": "属性值是405403506.156" + }, + { + "question": "基坑坚土的属性值是多少?", + "answer": "属性值是25585167.713" + }, + { + "question": "基坑普通土的属性值是多少?", + "answer": "属性值是313873965.334" + }, + { + "question": "瓷绝缘子单公里用量(西北)的属性值是多少?", + "answer": "属性值是201.0" + } +] \ No newline at end of file diff --git a/backend/test1/ProjectQuantities.json b/backend/test1/ProjectQuantities.json new file mode 100644 index 0000000..5349024 --- /dev/null +++ b/backend/test1/ProjectQuantities.json @@ -0,0 +1,202 @@ +[ + { + "question": "电杆坑、塔坑、拉线坑人工挖方(或爆破)及回填 水坑 坑深2.0m以内的编码是多少?", + "answer": "编码是YX2-72" + }, + { + "question": "钢筋加工及制作的编码是多少?", + "answer": "编码是YX3-43" + }, + { + "question": "船舶运输 线材 每件重400kg以内 运输的编码是多少?", + "answer": "编码是YX1-132" + }, + { + "question": "船舶运输 钢管塔材 运输的编码是多少?", + "answer": "编码是YX1-152" + }, + { + "question": "碎石的编码是多少?", + "answer": "编码是C10020103" + }, + { + "question": "混凝土(保护帽)的编码是多少?", + "answer": "编码是ZH1001" + }, + { + "question": "船舶运输 金具、绝缘子、零星钢材 运输的编码是多少?", + "answer": "编码是YX1-144" + }, + { + "question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?", + "answer": "编码是YX1-1" + }, + { + "question": "船舶运输 线材 每件重1000kg以内 运输的编码是多少?", + "answer": "编码是YX1-136" + }, + { + "question": "混凝土搅拌及浇制 每基基础联系梁混凝土量20m³以内的编码是多少?", + "answer": "编码是YX3-69" + }, + { + "question": "索道运输 循环式 塔材 荷载1t以内 装卸的编码是多少?", + "answer": "编码是YX1-185" + }, + { + "question": "人力运输 混凝土预制品 每件重100kg以内的编码是多少?", + "answer": "编码是YX1-6" + }, + { + "question": "船舶运输 混凝土杆 每件重1500kg以上 运输的编码是多少?", + "answer": "编码是YX1-118" + }, + { + "question": "碎石的编码是多少?", + "answer": "编码是C10020103" + }, + { + "question": "电杆坑、塔坑、拉线坑人工挖方(或爆破)及回填 泥水 坑深8.0m以上的编码是多少?", + "answer": "编码是YX2-55" + }, + { + "question": "机械施工土方 场地平整的编码是多少?", + "answer": "编码是GT1-1" + }, + { + "question": "汽车运输 混凝土预制品 每件重100kg以内 装卸的编码是多少?", + "answer": "编码是YX1-69" + }, + { + "question": "汽车运输 其他建筑安装材料 运输的编码是多少?", + "answer": "编码是YX1-108" + }, + { + "question": "钻孔灌注桩基础 混凝土搅拌及浇制 孔深10m以内的编码是多少?", + "answer": "编码是YX3-171" + }, + { + "question": "线路复测及分坑 直线双杆及拉线塔的编码是多少?", + "answer": "编码是YX2-3" + }, + { + "question": "氧化锌避雷器安装 35kV的编码是多少?", + "answer": "编码是YX7-32" + }, + { + "question": "混凝土(保护帽)的编码是多少?", + "answer": "编码是ZH1002" + }, + { + "question": "汽车运输 其他建筑安装材料 装卸的编码是多少?", + "answer": "编码是YX1-107" + }, + { + "question": "船舶运输 混凝土杆 每件重500kg以内 装卸的编码是多少?", + "answer": "编码是YX1-109" + }, + { + "question": "混凝土(保护帽)的编码是多少?", + "answer": "编码是ZH1001" + }, + { + "question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?", + "answer": "编码是YX1-1" + }, + { + "question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?", + "answer": "编码是YX1-1" + }, + { + "question": "普通硅酸盐水泥的编码是多少?", + "answer": "编码是C09010102" + }, + { + "question": "拖拉机运输 钢管塔材 运输的编码是多少?", + "answer": "编码是YX1-44" + }, + { + "question": "尖峰及施工基面挖方(或爆破) 普通土的编码是多少?", + "answer": "编码是YX2-226" + }, + { + "question": "汽车运输 角钢塔材 装卸的编码是多少?", + "answer": "编码是YX1-103" + }, + { + "question": "接地槽挖方(或爆破)及回填 普通土的编码是多少?", + "answer": "编码是YX2-213" + }, + { + "question": "水的编码是多少?", + "answer": "编码是C21010101" + }, + { + "question": "直线(直线换位、直线转角)杆塔绝缘子串悬挂安装 35kV 针式单联串(悬垂串)的编码是多少?", + "answer": "编码是YX6-21" + }, + { + "question": "直线(直线换位、直线转角)杆塔绝缘子串悬挂安装 35kV I型双联串(悬垂串)的编码是多少?", + "answer": "编码是YX6-22" + }, + { + "question": "钻孔灌注桩基础 机械推钻成孔 砂砾石 孔深20m以内 孔径1.0m以内的编码是多少?", + "answer": "编码是YX3-117" + }, + { + "question": "线路复测及分坑 直线自立塔的编码是多少?", + "answer": "编码是YX2-6" + }, + { + "question": "钻孔灌注桩基础 凿桩头 桩径0.8m以上的编码是多少?", + "answer": "编码是YX3-180" + }, + { + "question": "线路复测及分坑 耐张(转角)单杆的编码是多少?", + "answer": "编码是YX2-2" + }, + { + "question": "中砂的编码是多少?", + "answer": "编码是C10010101" + }, + { + "question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?", + "answer": "编码是YX1-1" + }, + { + "question": "带电跨越电力线 被跨线电压等级 35kV的编码是多少?", + "answer": "编码是YX5-186" + }, + { + "question": "人工挖土方 普土 深2m以内的编码是多少?", + "answer": "编码是YT1-1" + }, + { + "question": "混凝土杆的编码是多少?", + "answer": "编码是" + }, + { + "question": "接地模块安装的编码是多少?", + "answer": "编码是YX3-213" + }, + { + "question": "拖拉机运输 线材 每件重400kg以内 运输的编码是多少?", + "answer": "编码是YX1-34" + }, + { + "question": "拖拉机运输 其他建筑安装材料 装卸的编码是多少?", + "answer": "编码是YX1-45" + }, + { + "question": "普通硅酸盐水泥的编码是多少?", + "answer": "编码是C09010102" + }, + { + "question": "船舶运输 线材 每件重4000kg以内 装卸的编码是多少?", + "answer": "编码是YX1-139" + }, + { + "question": "水的编码是多少?", + "answer": "编码是C21010101" + } +] \ No newline at end of file diff --git a/backend/test1/TotalCalculateTable.json b/backend/test1/TotalCalculateTable.json new file mode 100644 index 0000000..fce3d01 --- /dev/null +++ b/backend/test1/TotalCalculateTable.json @@ -0,0 +1,202 @@ +[ + { + "question": "架空输电线路本体工程的金额是多少?", + "answer": "金额是55105688268.5176010132" + }, + { + "question": "价差预备费的金额是多少?", + "answer": "金额是22731130869.6655998230" + }, + { + "question": "工程静态投资的金额是多少?", + "answer": "金额是715035853336.3909912109" + }, + { + "question": "工程动态投资的金额是多少?", + "answer": "金额是776282009093.5660400391" + }, + { + "question": "其中:工程建设检测费的金额是多少?", + "answer": "金额是185575370.1463980079" + }, + { + "question": "工程静态投资的金额是多少?", + "answer": "金额是715035853336.3909912109" + }, + { + "question": "建设期贷款利息的金额是多少?", + "answer": "金额是38515024887.5095977783" + }, + { + "question": "特殊项目的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "动态费用的金额是多少?", + "answer": "金额是61246155757.1752014160" + }, + { + "question": "动态费用的金额是多少?", + "answer": "金额是61246155757.1752014160" + }, + { + "question": "小计的金额是多少?", + "answer": "金额是458257942570.3129882812" + }, + { + "question": "其他费用的金额是多少?", + "answer": "金额是210942912572.8689880371" + }, + { + "question": "基本预备费的金额是多少?", + "answer": "金额是14020310849.7332000732" + }, + { + "question": "其中:水土保持监测及验收费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "其中:工程建设检测费的金额是多少?", + "answer": "金额是185575370.1463980079" + }, + { + "question": "其中:特种设备安全监测费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "工程静态投资的金额是多少?", + "answer": "金额是715035853336.3909912109" + }, + { + "question": "其中:水土保持监测及验收费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "架空输电线路本体工程的金额是多少?", + "answer": "金额是55105688268.5176010132" + }, + { + "question": "基本预备费的金额是多少?", + "answer": "金额是14020310849.7332000732" + }, + { + "question": "其中:水土保持监测及验收费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "小计的金额是多少?", + "answer": "金额是458257942570.3129882812" + }, + { + "question": "编制基准期价差的金额是多少?", + "answer": "金额是29246752707.1180000305" + }, + { + "question": "其中:水土保持监测及验收费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "小计的金额是多少?", + "answer": "金额是458257942570.3129882812" + }, + { + "question": "其他费用的金额是多少?", + "answer": "金额是210942912572.8689880371" + }, + { + "question": "特殊项目的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "编制基准期价差的金额是多少?", + "answer": "金额是29246752707.1180000305" + }, + { + "question": "特殊项目的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "小计的金额是多少?", + "answer": "金额是458257942570.3129882812" + }, + { + "question": "工程动态投资的金额是多少?", + "answer": "金额是776282009093.5660400391" + }, + { + "question": "其中:建设场地征用及清理费的金额是多少?", + "answer": "金额是16831284.2287110016" + }, + { + "question": "其中:可抵扣增值税额的金额是多少?", + "answer": "金额是20069645492.2888984680" + }, + { + "question": "小计的金额是多少?", + "answer": "金额是458257942570.3129882812" + }, + { + "question": "动态费用的金额是多少?", + "answer": "金额是61246155757.1752014160" + }, + { + "question": "建设期贷款利息的金额是多少?", + "answer": "金额是38515024887.5095977783" + }, + { + "question": "工程静态投资的金额是多少?", + "answer": "金额是715035853336.3909912109" + }, + { + "question": "其中:建设场地征用及清理费的金额是多少?", + "answer": "金额是16831284.2287110016" + }, + { + "question": "建设期贷款利息的金额是多少?", + "answer": "金额是38515024887.5095977783" + }, + { + "question": "工程动态投资的金额是多少?", + "answer": "金额是776282009093.5660400391" + }, + { + "question": "架空输电线路本体工程的金额是多少?", + "answer": "金额是55105688268.5176010132" + }, + { + "question": "其中:工程建设检测费的金额是多少?", + "answer": "金额是185575370.1463980079" + }, + { + "question": "其中:水土保持监测及验收费的金额是多少?", + "answer": "金额是0E-10" + }, + { + "question": "工程动态投资的金额是多少?", + "answer": "金额是776282009093.5660400391" + }, + { + "question": "其中:可抵扣增值税额的金额是多少?", + "answer": "金额是20069645492.2888984680" + }, + { + "question": "价差预备费的金额是多少?", + "answer": "金额是22731130869.6655998230" + }, + { + "question": "一般线路本体工程的金额是多少?", + "answer": "金额是55105688268.5176010132" + }, + { + "question": "其中:工程建设检测费的金额是多少?", + "answer": "金额是185575370.1463980079" + }, + { + "question": "基本预备费的金额是多少?", + "answer": "金额是14020310849.7332000732" + }, + { + "question": "设备购置费的金额是多少?", + "answer": "金额是2567934636.3574500084" + } +] \ No newline at end of file diff --git a/backend/test1/incorrect_answers_log.json b/backend/test1/incorrect_answers_log.json new file mode 100644 index 0000000..2d3b5ea --- /dev/null +++ b/backend/test1/incorrect_answers_log.json @@ -0,0 +1,28 @@ +[]错误问题: 税金的费率是多少? +正确答案: 9.0000000000 +查询结果: 9 + +错误问题: 冬雨季施工增加费的费率是多少? +正确答案: 3.5700000000 +查询结果: 未找到有效数字 + +错误问题: 住房公积金的费率是多少? +正确答案: 15.0000000000 +查询结果: 15 + +错误问题: 税金的费率是多少? +正确答案: 9.0000000000 +查询结果: 9 + +错误问题: 冬雨季施工增加费的费率是多少? +正确答案: 3.5700000000 +查询结果: 3 + +错误问题: 税金的费率是多少? +正确答案: 9.0000000000 +查询结果: 9 + +错误问题: 冬雨季施工增加费的费率是多少? +正确答案: 3.5700000000 +查询结果: 未找到有效数字 + diff --git a/backend/test1/parameters_results.json b/backend/test1/parameters_results.json new file mode 100644 index 0000000..e69de29 diff --git a/backend/test1/query_results.json b/backend/test1/query_results.json new file mode 100644 index 0000000..b2f3824 --- /dev/null +++ b/backend/test1/query_results.json @@ -0,0 +1,12 @@ +TOP_K: 5 +LLM_TEMPERATURE: 0.1 +similarity_top_k: 5.0 + +问题: 税金的费率是多少? +查询结果: SQL查询结果: 税金的费率是9.0%。请注意,查询结果中有多条重复的记录,但费率都是相同的,为9.0%。 +正确答案: 9.0000000000 + +问题: 冬雨季施工增加费的费率是多少? +查询结果: SQL查询结果: 对于"冬雨季施工增加费"的费率,当前数据库中没有找到具体信息。这可能是因为费率会根据不同的项目、地区或时间有所变化。建议您查阅最新的项目文件或与项目负责人联系以获取准确的费率信息。 +正确答案: 3.5700000000 + diff --git a/backend/test1/query_test.py b/backend/test1/query_test.py index 60533b8..81dbdac 100644 --- a/backend/test1/query_test.py +++ b/backend/test1/query_test.py @@ -1,12 +1,12 @@ import re import os import sys +import json from ctypes import cast from llama_index.core import VectorStoreIndex, SQLDatabase from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex -from llama_index.readers.database import DatabaseReader from sqlalchemy import create_engine from app.api.routers.chat import generate_filters @@ -23,15 +23,16 @@ load_dotenv() def read_questions_and_answers(file_path): questions_and_answers = [] with open(file_path, 'r', encoding='utf-8') as file: - for line in file: - if "question" in line and "answer" in line: - question_part = line.split(":")[1].strip() # 提取 question - answer_part = re.search(r"answer:.*?(\d+)", line) # 使用正则提取 answer 中的数字 - if answer_part: - answer_value = answer_part.group(1) - questions_and_answers.append((question_part, answer_value)) + data = json.load(file) # 读取 JSON 数据 + for entry in data: + question = entry.get("question", "").strip() # 获取 question + answer_match = re.search(r"(\d+\.?\d*)", entry.get("answer", "")) # 使用正则提取 answer 中的数字部分 + if question and answer_match: + answer_value = answer_match.group(1) # 获取匹配的数字 + questions_and_answers.append((question, answer_value)) return questions_and_answers + def save_results_to_file(question, result, correct_answer, file_path): with open(file_path, 'a', encoding='utf-8') as file: file.write(f"问题: {question}\n") @@ -44,20 +45,27 @@ def log_incorrect_answers(question, correct_answer, result, log_file_path): file.write(f"正确答案: {correct_answer}\n") file.write(f"查询结果: {result}\n\n") -def main(): - # 从命令行读取questions_file_path和查询类型 - if len(sys.argv) < 3: - print("请提供questions.txt文件的路径和查询类型(vector 或 sql)") - sys.exit(1) - questions_file_path = sys.argv[1] - query_type = sys.argv[2].lower() - +def main(questions_file, query_type): # 获取脚本所在的目录 script_dir = os.path.dirname(os.path.abspath(__file__)) - # 设置结果文件和日志文件的路径 - results_file_path = os.path.join(script_dir, "query_results.txt") - log_file_path = os.path.join(script_dir, "incorrect_answers_log.txt") + # 将文件扩展名更改为 .json + questions_file_path = os.path.join(script_dir, questions_file) + results_file_path = os.path.join(script_dir, "query_results.json") + log_file_path = os.path.join(script_dir, "incorrect_answers_log.json") + + # 如果 .json 文件不存在,则生成一个空的 JSON 文件 + if not os.path.exists(questions_file_path): + with open(questions_file_path, 'w', encoding='utf-8') as file: + json.dump([], file) # 写入空数组 + + if not os.path.exists(results_file_path): + with open(results_file_path, 'w', encoding='utf-8') as file: + json.dump([], file) # 写入空数组 + + if not os.path.exists(log_file_path): + with open(log_file_path, 'w', encoding='utf-8') as file: + json.dump([], file) # 写入空数组 # 更新环境变量 os.environ['TOP_K'] = str(5) # 向量的TOP_K值 @@ -133,7 +141,14 @@ def main(): log_incorrect_answers(question, correct_answer, "未找到有效数字", log_file_path) if __name__ == "__main__": + + if len(sys.argv) < 3: + print("请提供questions.json文件名和查询类型(vector 或 sql)") + sys.exit(1) + questions_file = sys.argv[1] + query_type = sys.argv[2].lower() + from phoenix.trace import using_project - with using_project("ly_zjapp_test") as obj: - main() + with using_project(questions_file) as obj: + main(questions_file, query_type) diff --git a/backend/test1/question.py b/backend/test1/question.py index 962621b..4cf1c22 100644 --- a/backend/test1/question.py +++ b/backend/test1/question.py @@ -1,56 +1,82 @@ import os -import random -from sqlalchemy import create_engine, MetaData, Table, select, func +import json +from sqlalchemy import create_engine, MetaData, Table, func from sqlalchemy.orm import sessionmaker from dotenv import load_dotenv load_dotenv() -def generate_questions(file_path, num_questions_per_table=10): +def generate_questions_for_table(table_name, file_path, num_questions=50): engine = create_engine(os.getenv("SQL_DATABASE_URL", "")) metadata = MetaData() metadata.reflect(bind=engine) - # 定义表名及其对应的列索引和问题模板 + # 定义每张表的列索引、值列和问题模板 tables_info = { - "ProjectProperties": (0, "Attribute_Value", "{name_value}的属性值是多少?"), - "OtherFee": (0, "Amount", "{name_value}的金额是多少?"), - "FeeCollectionTable": (0, "Rate", "{name_value}的费率是多少?"), - "ProjectDivision": (0, "Total_Price", "{name_value}的合价是多少?"), - "ProjectDivisions_CostPreview": (0, "Direct_Fee", "{name_value}的直接费是多少?"), - "TotalCalculateTable": (0, "Amount", "{name_value}的金额是多少?"), - "ProjectQuantities": (0, "Code", "{name_value}的编码是多少?") + "ProjectProperties": (0, "Value", "{name_value}的属性值是多少?", "属性值是{answer_value}"), + "OtherFee": (3, "Amount", "{name_value}的金额是多少?", "金额是{answer_value}"), + "FeeCollectionTable": (1, "Rate", "{name_value}的费率是多少?", "费率是{answer_value}"), + "ProjectDivision": (5, "Sum_Price", "{name_value}的合价是多少?", "合价是{answer_value}"), + "ProjectDivisions_CostPreview": (4, "Direct_Cost", "{name_value}的直接费是多少?", "直接费是{answer_value}"), + "TotalCalculateTable": (3, "Amount", "{name_value}的金额是多少?", "金额是{answer_value}"), + "ProjectQuantities": (6, "Encoding", "{name_value}的编码是多少?", "编码是{answer_value}") } - questions = [] + if table_name not in tables_info: + print(f"未找到表 {table_name} 的配置信息") + return - for table_name, (name_index, value_column, question_template) in tables_info.items(): - # 加载这张表 - table = Table(table_name, metadata, autoload_with=engine) + # 获取表信息 + name_index, value_column, question_template, answer_template = tables_info[table_name] - # 创建会话 - Session = sessionmaker(bind=engine) - session = Session() + # 加载表 + table = Table(table_name, metadata, autoload_with=engine) - # 获取列名 - name_column = table.columns.keys()[name_index] + # 创建会话 + Session = sessionmaker(bind=engine) + session = Session() - # 对于每个表生成num_questions_per_table个问题 - for _ in range(num_questions_per_table): - # 查询表中的随机一行,并获取名称列的值 - row = session.query(table).order_by(func.random()).first() - name_value = getattr(row, name_column) + # 获取列名 + name_column = table.columns.keys()[name_index] - # 构造问题 - question = question_template.format(name_value=name_value) - questions.append(question) + questions_and_answers = [] - # 写入文件 + # 生成指定数量的问题 + for _ in range(num_questions): + # 查询表中的随机一行,并获取名称列和值列的值 + row = session.query(table).order_by(func.random()).first() + name_value = getattr(row, name_column) + answer_value = getattr(row, value_column) + + # 构造问题和答案 + question = question_template.format(name_value=name_value) + answer = answer_template.format(answer_value=answer_value) + + # 添加到列表中 + questions_and_answers.append({ + "question": question, + "answer": answer + }) + + # 将问题和答案以 JSON 格式写入文件 with open(file_path, 'w', encoding='utf-8') as file: - for question in questions: - file.write(question + '\n') + json.dump(questions_and_answers, file, ensure_ascii=False, indent=4) + print(f"已生成表 {table_name} 的问题到文件: {file_path}") + +def main(): + engine = create_engine(os.getenv("SQL_DATABASE_URL", "")) + metadata = MetaData() + metadata.reflect(bind=engine) + + # 获取脚本所在目录 + script_dir = os.path.dirname(os.path.abspath(__file__)) + + # 遍历每张表并生成对应的 JSON 文件 + for table_name in metadata.tables.keys(): + # 文件路径为:脚本目录 + 表名 + .json + file_path = os.path.join(script_dir, f"{table_name}.json") + generate_questions_for_table(table_name, file_path) if __name__ == "__main__": - questions_file_path = "/home/bw/ctr/zjdataai-app/backend/test1/questions.txt" - generate_questions(questions_file_path) \ No newline at end of file + main() diff --git a/backend/test1/test_parameters.py b/backend/test1/test_parameters.py index fda5677..479c2f1 100644 --- a/backend/test1/test_parameters.py +++ b/backend/test1/test_parameters.py @@ -1,16 +1,14 @@ import os -from ctypes import cast +import json import sys from llama_index.core import VectorStoreIndex, SQLDatabase from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex -from llama_index.readers.database import DatabaseReader from sqlalchemy import create_engine from app.api.routers.chat import generate_filters from app.engine import get_index, makeDescriptionByEngine -from app.engine.loaders.db import CustomDatabaseReader from app.engine.vectordb import get_vector_store from app.observability import init_observability from app.settings import init_settings @@ -19,25 +17,21 @@ from dotenv import load_dotenv load_dotenv() def read_questions(file_path): - questions = [] with open(file_path, 'r', encoding='utf-8') as file: - for line in file: - if "question" in line: - question_part = line.split(":")[1].strip() # 提取 "question" 后的内容 - questions.append(question_part) + data = json.load(file) + questions = [item["question"] for item in data] return questions def save_results_to_file(question, result, file_path): + result_data = { + "question": question, + "result": result + } with open(file_path, 'a', encoding='utf-8') as file: - file.write(f"问题: {question}\n") - file.write(f"结果: {result}\n\n") + json.dump(result_data, file, ensure_ascii=False) + file.write('\n') -def main(): - # 从命令行读取questions_file_path - if len(sys.argv) < 2: - print("请提供questions.txt文件的路径") - sys.exit(1) - questions_file_path = sys.argv[1] +def main(questions_file): # 更新环境变量 os.environ['TOP_K'] = str(5) # 向量的TOP_K值 os.environ['LLM_TEMPERATURE'] = str(0.1) # 温度值 @@ -66,36 +60,36 @@ def main(): sql_query_engine = SQLTableRetrieverQueryEngine(sql_database, sql_obj_index.as_retriever(similarity_top_k=similarity_top_k)) - questions = read_questions(questions_file_path) - script_dir = os.path.dirname(os.path.abspath(__file__)) + questions_file_path = os.path.join(script_dir, questions_file) + results_file_path = os.path.join(script_dir, "parameters_results.json") - results_file_path = os.path.join(script_dir, "query_results.txt") + questions = read_questions(questions_file_path) # 如果文件为空,则写入参数值 - if os.path.getsize(results_file_path) == 0: + if not os.path.isfile(results_file_path): with open(results_file_path, 'w', encoding='utf-8') as file: - file.write(f"TOP_K: {top_k}\n") - file.write(f"LLM_TEMPERATURE: {temperature}\n") - file.write(f"similarity_top_k: {similarity_top_k}\n\n") + json.dump({ + "TOP_K": top_k, + "LLM_TEMPERATURE": temperature, + "similarity_top_k": similarity_top_k + }, file, ensure_ascii=False) + file.write('\n') # 循环执行查询 for i, question in enumerate(questions): print(f"Executing query {i+1}: {question}") - # query_engine = index.as_query_engine( - # similarity_top_k=top_k, filters=filters - # ) - # query_result = query_engine.query(question) - - # print(f"向量查询结果: {query_result}\n") - # save_results_to_file(question, f"向量查询结果: {query_result}", results_file_path) - sql_query_result = sql_query_engine.query(question) print(f"SQL查询结果: {sql_query_result}\n") save_results_to_file(question, f"SQL查询结果: {sql_query_result}", results_file_path) if __name__ == "__main__": + if len(sys.argv) < 2: + print("请提供questions.json文件的路径") + sys.exit(1) + questions_file = sys.argv[1] + from phoenix.trace import using_project - with using_project("ly_zjapp_test") as obj: - main() + with using_project(questions_file) as obj: + main(questions_file)