Compare commits
41 Commits
dev
..
6f5548ee61
| Author | SHA1 | Date | |
|---|---|---|---|
| 6f5548ee61 | |||
| 331595cc57 | |||
| cb34fde995 | |||
| 123693c9a0 | |||
| a897f0c6de | |||
| adce2a3809 | |||
| 7875e2cbcc | |||
| 95fbc820b9 | |||
| 54f19a20fc | |||
| bc124c5513 | |||
| 1c773924db | |||
| 60b0f11ca2 | |||
| 21fdc16259 | |||
| 64ba7efcdc | |||
| 9a09e9f79f | |||
| 9ac53011e0 | |||
| f171282a0c | |||
| 626ff1e632 | |||
| 5189d4368f | |||
| 5f182075aa | |||
| b1ef410638 | |||
| 7b040ae248 | |||
| 786c4d05f6 | |||
| a8db51e844 | |||
| 545fbc732b | |||
| 56cb36dfc9 | |||
| a6c5988408 | |||
| c4cf09a28f | |||
| 75fde3598b | |||
| aba6475c5a | |||
| ae19725d72 | |||
| 97a486e631 | |||
| 728ee06c5a | |||
| a4dd385368 | |||
| 24c808d66d | |||
| ced3199550 | |||
| c4088fe963 | |||
| e7628809ad | |||
| 73565b26e4 | |||
| e9ccd7db35 | |||
| 4020b603b1 |
+54
-29
@@ -1,42 +1,62 @@
|
||||
JIEBA_DATA=./nltk_data
|
||||
NLTK_DATA=./nltk_data
|
||||
SQLITE_DATABASE_URL=sqlite:///./source.db
|
||||
DATA_SOURCE_CACHE=./restapi
|
||||
|
||||
# The Llama Cloud API key.
|
||||
# LLAMA_CLOUD_API_KEY=
|
||||
SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
|
||||
#SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
|
||||
SQLITE_DATABASE_URL=sqlite:///./source.db
|
||||
|
||||
DASHSCOPE_API_KEY=sk-02c8540e86d84b7ca0e6f4f51bac6e60
|
||||
# The provider for the AI models to use.
|
||||
MODEL_PROVIDER=dashscope
|
||||
# The name of LLM model to use.
|
||||
MODEL=qwen-max
|
||||
# The number of similar embeddings to return when retrieving documents.
|
||||
TOP_K=10
|
||||
#--------------------------
|
||||
# 是否启用混合检索
|
||||
HYBRID_ENABLED = true
|
||||
# 混合检索阈值
|
||||
HYBRID_ALPHA = 0.6
|
||||
# 是否启用检索重排功能
|
||||
ENABLE_RERANK=true
|
||||
# Name of the embedding model to use.
|
||||
EMBEDDING_MODEL=text-embedding-v2
|
||||
RERANK_ENABLED=true
|
||||
|
||||
#---------- rerank- Xinference ----------------
|
||||
#RERANK_PROVIDER=xinference
|
||||
#RERANK_MODEL=bge-reranker-v2-m3
|
||||
#RERANK_BASE_URL=http://10.1.16.39:9995
|
||||
#RERANK_TOP_N=5
|
||||
#RERANK_THRESHOLD=0.3
|
||||
|
||||
|
||||
#---------- rerank- ollama ----------------
|
||||
RERANK_PROVIDER=ollama
|
||||
RERANK_MODEL= /models/bge-reranker-base
|
||||
RERANK_TOP_N=5
|
||||
RERANK_THRESHOLD=0.3
|
||||
|
||||
#---------- model - Xinference ----------------
|
||||
#MODEL_PROVIDER=xinference
|
||||
#OPENAI_API_KEY=xinference
|
||||
#BASE_URL=http://172.20.0.145:9995
|
||||
#MODEL=Qwen2-72B-Instruct-GPTQ-Int8
|
||||
## Temperature for sampling from the model.
|
||||
#LLM_TEMPERATURE=0.1
|
||||
|
||||
#---------- model - dashscope ----------------
|
||||
MODEL_PROVIDER=dashscope
|
||||
DASHSCOPE_API_KEY=sk-221d2d202e104618a56002ce2e7dc0d0
|
||||
MODEL=qwen2-math-72b-instruct
|
||||
|
||||
|
||||
|
||||
#---------- embedding - Xinference ----------------
|
||||
#EMBEDDING_PROVIDER=xinference
|
||||
#EMBEDDING_MODEL=bge-m3
|
||||
#EMBEDDING_BASE_URL=http://10.1.16.39:9995
|
||||
#EMBEDDING_DIM=1024
|
||||
|
||||
---------- embedding - dashscope ----------------
|
||||
EMBEDDING_PROVIDER=dashscope
|
||||
EMBEDDING_MODEL=text-embedding-v1
|
||||
|
||||
# Dimension of the embedding model to use.
|
||||
EMBEDDING_DIM=1024
|
||||
|
||||
# The questions to help users get started (multi-line).
|
||||
CONVERSATION_STARTERS=本工程指什么?\n总算表有哪些费用?\n项目划分哪些内容构成?\n其他费用表有哪些内容?
|
||||
|
||||
# The OpenAI API key to use.
|
||||
# OPENAI_API_KEY=
|
||||
|
||||
# Temperature for sampling from the model.
|
||||
# LLM_TEMPERATURE=
|
||||
|
||||
# Maximum number of tokens to generate.
|
||||
# LLM_MAX_TOKENS=
|
||||
|
||||
# The number of similar embeddings to return when retrieving documents.
|
||||
TOP_K=5
|
||||
|
||||
# The time in milliseconds to wait for the stream to return a response.
|
||||
STREAM_TIMEOUT=60000
|
||||
|
||||
@@ -58,7 +78,6 @@ VECTOR_STORE_PATH=./storage_vector
|
||||
BM_RETRIEVER_PATH =./storage_bm
|
||||
|
||||
|
||||
|
||||
PHOENIX_API_KEY=123456
|
||||
PHOENIX_URL=http://localhost:6006/v1/traces
|
||||
PHOENIX_PROJECT_NAME=ly_zjapp
|
||||
@@ -85,4 +104,10 @@ SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weath
|
||||
- You can install any pip package (if it exists) by running a cell with pip install.
|
||||
"
|
||||
|
||||
PROJECT_TITLE = "您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!"
|
||||
PRJTOJSON_URL = 'http://10.1.6.60:8092'
|
||||
PROJECT_TITLE = "您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!"
|
||||
|
||||
CHAT_UPLOAD_FILECACHE = "./output/uploaded"
|
||||
|
||||
JIEBA_DATA=./nltk_data
|
||||
NLTK_DATA=./nltk_data
|
||||
+18
-14
@@ -19,27 +19,28 @@ HYBRID_ALPHA = 0.6
|
||||
#--------------------------
|
||||
# 是否启用检索重排功能
|
||||
RERANK_ENABLED=true
|
||||
# Rerank model
|
||||
|
||||
#---------- rerank- Xinference ----------------
|
||||
RERANK_PROVIDER=xinference
|
||||
RERANK_MODEL=bge-reranker-v2-m3
|
||||
RERANK_BASE_URL=http://10.1.16.39:9995
|
||||
RERANK_TOP_N=5
|
||||
RERANK_THRESHOLD=0.3
|
||||
#---------- Xinference ----------------
|
||||
# The provider for the AI models to use.
|
||||
MODEL_PROVIDER=xinference
|
||||
# The OpenAI API key to use.
|
||||
OPENAI_API_KEY=xinference
|
||||
|
||||
#---------- model - Xinference ----------------
|
||||
MODEL_PROVIDER=xinference # The provider for the AI models to use.
|
||||
OPENAI_API_KEY=xinference # The OpenAI API key to use.
|
||||
BASE_URL=http://10.1.0.142:9995
|
||||
MODEL=Qwen2-72B-Instruct-GPTQ-Int8
|
||||
# Temperature for sampling from the model.
|
||||
LLM_TEMPERATURE=0.1
|
||||
# Maximum number of tokens to generate.
|
||||
#LLM_MAX_TOKENS=
|
||||
# Name of the embedding model to use.
|
||||
LLM_TEMPERATURE=0.1 # Temperature for sampling from the model.
|
||||
#LLM_MAX_TOKENS= # Maximum number of tokens to generate.
|
||||
|
||||
|
||||
#---------- embedding - Xinference ----------------
|
||||
EMBEDDING_PROVIDER=xinference
|
||||
EMBEDDING_MODEL=bge-m3
|
||||
EMBEDDING_BASE_URL=http://10.1.16.39:9995
|
||||
# Dimension of the embedding model to use.
|
||||
EMBEDDING_DIM=1024
|
||||
EMBEDDING_DIM=1024 # Dimension of the embedding model to use.
|
||||
|
||||
##---------- OpenAI ----------------
|
||||
## The provider for the AI models to use.
|
||||
@@ -116,4 +117,7 @@ SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weath
|
||||
- You can install any pip package (if it exists) by running a cell with pip install.
|
||||
"
|
||||
|
||||
PROJECT_TITLE = "您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!"
|
||||
|
||||
PRJTOJSON_URL = 'http://10.1.6.60:8092'
|
||||
PROJECT_TITLE = "您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!"
|
||||
CHAT_UPLOAD_FILECACHE = "./output/uploaded"
|
||||
+195
-69
@@ -1,37 +1,64 @@
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
from typing import Dict, List, Any, Optional, AsyncGenerator
|
||||
from collections import deque
|
||||
|
||||
from aiostream import stream
|
||||
from fastapi import APIRouter, Request
|
||||
from fastapi import APIRouter, Request,HTTPException
|
||||
from fastapi.responses import StreamingResponse
|
||||
from llama_index.core import BaseCallbackHandler
|
||||
from llama_index.core.base.llms.types import ChatMessage
|
||||
from llama_index.core.callbacks import CBEventType
|
||||
from llama_index.core.chat_engine.types import StreamingAgentChatResponse
|
||||
from llama_index.core.tools import ToolOutput
|
||||
from llama_index.core.schema import NodeWithScore
|
||||
from pydantic import BaseModel
|
||||
from app.api.routers.request.base import userMng, conversations,message,parameter,feedback
|
||||
from app.api.routers.request.base import userMng, conversations,message,ProjectInfo,feedback
|
||||
from app.api.routers.request.baseConfig import *
|
||||
from app.api.routers.request.models import ChatRequestData,ChatFileUploadRequest
|
||||
from app.engine import get_chat_engine
|
||||
import uuid
|
||||
from app.api.routers.services.fileServices import PrjFileLoadService,ChatFileService
|
||||
from app.api.routers.services.suggestion import NextQuestionSuggestion
|
||||
import time
|
||||
from llama_index.core.settings import Settings
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
api_router = r = APIRouter()
|
||||
|
||||
v1_router = v = APIRouter()
|
||||
|
||||
|
||||
gEvent_handler = None
|
||||
|
||||
|
||||
CH_Event_map={
|
||||
'CHUNKING':'文本切片',
|
||||
'NODE_PARSING':'节点解析',
|
||||
'EMBEDDING':'生成向量',
|
||||
'LLM':'知识问答',
|
||||
'QUERY':'查询',
|
||||
'RETRIEVE':'检索',
|
||||
'SYNTHESIZE':'答案合成',
|
||||
'TREE':'总结',
|
||||
'SUB_QUESTION':'问题分解',
|
||||
'TEMPLATING':'生成提示词模板',
|
||||
'FUNCTION_CALL':'函数调用',
|
||||
'RERANKING':'节点重排',
|
||||
'EXCEPTION':'执行异常',
|
||||
'AGENT_STEP':'单步执行'
|
||||
}
|
||||
|
||||
|
||||
class ChatCallbackEvent(BaseModel):
|
||||
event_type: ChatEventType
|
||||
payload: Optional[Dict[str, Any]] = None
|
||||
|
||||
def get_common_param(self)-> dict:
|
||||
return {
|
||||
'event': self.event_type.name,
|
||||
'event': self.event_type.value,
|
||||
'conversation_id':self.payload.get("conversation_id"),
|
||||
'message_id': self.payload.get("message_id"),
|
||||
'created_at': int(time.time()),
|
||||
@@ -47,7 +74,7 @@ class ChatCallbackEvent(BaseModel):
|
||||
"workflow_id": self.payload.get('workflow_id'),
|
||||
"sequence_number": 1709,
|
||||
"inputs": {
|
||||
"sys.query": self.payload.get('query'),
|
||||
"sys.query": f"开始查询 {self.payload.get('query')}",
|
||||
"sys.files": [],
|
||||
"sys.conversation_id": self.payload.get('conversation_id'),
|
||||
"sys.user_id": self.payload.get('use_id')
|
||||
@@ -92,7 +119,7 @@ class ChatCallbackEvent(BaseModel):
|
||||
"id": self.payload.get('nodeid'),
|
||||
"node_id": self.payload.get('nodeid'),
|
||||
"node_type": "http-request",
|
||||
"title": self.payload.get('title'),
|
||||
"title": CH_Event_map[self.payload.get('title')],
|
||||
"index": self.payload.get('index'),
|
||||
"predecessor_node_id": self.payload.get('predecessor_node_id'),
|
||||
"inputs": '',
|
||||
@@ -110,7 +137,7 @@ class ChatCallbackEvent(BaseModel):
|
||||
"id": self.payload.get('nodeid'),
|
||||
"node_id": self.payload.get('nodeid'),
|
||||
"node_type": "http-request",
|
||||
"title": self.payload.get('title'),
|
||||
"title": CH_Event_map[self.payload.get('title')],
|
||||
"index": self.payload.get('index'),
|
||||
"predecessor_node_id": self.payload.get('predecessor_node_id'),
|
||||
"inputs": '',
|
||||
@@ -137,15 +164,54 @@ class ChatCallbackEvent(BaseModel):
|
||||
|
||||
def get_MessageEnd_param(self) -> dict:
|
||||
params = self.get_common_param()
|
||||
nodeInfos = []
|
||||
source_nodes = self.payload.get('source_node')
|
||||
if source_nodes is not None:
|
||||
for i in range(len(source_nodes)):
|
||||
source_node:NodeWithScore = source_nodes[i]
|
||||
metadata:dict = source_node.node.metadata
|
||||
nodeInfo = {
|
||||
"position": i,
|
||||
"dataset_id": metadata.get("pipeline_id"),
|
||||
"dataset_name": metadata.get("file_name"),
|
||||
"document_id": source_node.node_id,
|
||||
"document_name": metadata.get("file_name"),
|
||||
"data_source_type": "upload_file",
|
||||
"segment_id": source_node.node_id,
|
||||
"retriever_from": "workflow",
|
||||
"score": source_node.score,
|
||||
"hit_count": 1,
|
||||
"word_count": 632,
|
||||
"segment_position": i,
|
||||
"index_node_hash": "",
|
||||
"content": source_node.text
|
||||
}
|
||||
nodeInfos.append(nodeInfo)
|
||||
params.update({
|
||||
'id':self.payload.get('message_id'),
|
||||
'metadata':self.payload.get('metadata')
|
||||
'metadata':{
|
||||
"retriever_resources":nodeInfos,
|
||||
"usage":{
|
||||
"prompt_tokens": 4972,
|
||||
"prompt_unit_price": "0.0",
|
||||
"prompt_price_unit": "0.0",
|
||||
"prompt_price": "0.0",
|
||||
"completion_tokens": 332,
|
||||
"completion_unit_price": "0.0",
|
||||
"completion_price_unit": "0.0",
|
||||
"completion_price": "0.0",
|
||||
"total_tokens": 5304,
|
||||
"total_price": "0.0",
|
||||
"currency": "USD",
|
||||
"latency": 4.897703120019287
|
||||
}
|
||||
}
|
||||
})
|
||||
return params
|
||||
|
||||
def to_response(self)-> dict|None:
|
||||
try:
|
||||
match self.event_type:
|
||||
match self.event_type.value:
|
||||
case "workflow_started":
|
||||
return self.get_WorkflowStart_param()
|
||||
case "workflow_finished":
|
||||
@@ -168,7 +234,7 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
||||
_aqueue: asyncio.Queue
|
||||
is_done: bool = False
|
||||
|
||||
def __init__(self,**params):
|
||||
def __init__(self):
|
||||
"""Initialize the base callback handler."""
|
||||
ignored_events = [
|
||||
# CBEventType.CHUNKING,
|
||||
@@ -179,23 +245,19 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
||||
]
|
||||
super().__init__(ignored_events, ignored_events)
|
||||
self._aqueue = asyncio.Queue()
|
||||
self._response:str = ''
|
||||
self._params:Dict[str,Any] = params
|
||||
self._nodeStack:deque = deque()
|
||||
self._response: StreamingAgentChatResponse = None
|
||||
self._ids:Dict[str,Any] = {}
|
||||
self._chatData:ChatRequestData = None
|
||||
self._nodeStack:List[str] = []
|
||||
self._firstEventID:str = None
|
||||
|
||||
def setInitParams(self,ids:dict,data:ChatRequestData):
|
||||
self._ids = ids
|
||||
self._chatData = data
|
||||
self._firstEventID = None
|
||||
|
||||
#添加工作流开始事件
|
||||
data:ChatRequestData = self._params['data']
|
||||
args:Dict[str,Any] = self._params['ids']
|
||||
args.update(
|
||||
{
|
||||
'use_id': data.user,
|
||||
'query': data.query,
|
||||
'conversation_id': data.conversation_id
|
||||
}
|
||||
)
|
||||
wf_event = ChatCallbackEvent(event_type = ChatEventType.WORKFLOW_START,payload = args)
|
||||
if wf_event.to_response() is not None:
|
||||
self._aqueue.put_nowait(wf_event)
|
||||
def setResponse(self,response: StreamingAgentChatResponse):
|
||||
self._response = response
|
||||
|
||||
def on_event_start(
|
||||
self,
|
||||
@@ -204,11 +266,15 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
||||
event_id: str = "",
|
||||
**kwargs: Any,
|
||||
) -> str:
|
||||
if self._firstEventID is None:
|
||||
self._firstEventID = event_id
|
||||
self.start()
|
||||
|
||||
logger.info("event_start:{} type:{} payload:{}\n".format(event_id, event_type, payload))
|
||||
|
||||
self._nodeStack.append(event_id)
|
||||
nindex = self._nodeStack.count() - 1
|
||||
args:Dict[str,Any] = self._params['ids']
|
||||
nindex = len(self._nodeStack) - 1
|
||||
args:Dict[str,Any] = self._ids
|
||||
args.update(
|
||||
{
|
||||
'nodeid':event_id,
|
||||
@@ -221,7 +287,6 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
||||
if nd_event.to_response() is not None:
|
||||
self._aqueue.put_nowait(nd_event)
|
||||
|
||||
|
||||
def on_event_end(
|
||||
self,
|
||||
event_type: CBEventType,
|
||||
@@ -232,10 +297,10 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
||||
logger.info("event_end:{} type:{} payload:{}\n".format(event_id, event_type, payload))
|
||||
|
||||
#self.response = payload.get("response","")
|
||||
args:Dict[str,Any] = self._params['ids']
|
||||
args:Dict[str,Any] = self._ids
|
||||
nodeID = self._nodeStack[-1]
|
||||
if nodeID == event_id:
|
||||
nindex = self._nodeStack.count() - 1
|
||||
nindex = len(self._nodeStack) - 1
|
||||
args.update(
|
||||
{
|
||||
'nodeid':event_id,
|
||||
@@ -249,6 +314,8 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
||||
self._aqueue.put_nowait(nd_event)
|
||||
self._nodeStack.pop()
|
||||
|
||||
if self._firstEventID is not None and self._firstEventID == event_id:
|
||||
self.finished()
|
||||
|
||||
def start_trace(self, trace_id: Optional[str] = None) -> None:
|
||||
"""No-op."""
|
||||
@@ -260,24 +327,7 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
||||
trace_map: Optional[Dict[str, List[str]]] = None,
|
||||
) -> None:
|
||||
"""No-op."""
|
||||
logger.info("trace_end:{} trace_map:{}\n".format(trace_id, trace_map))
|
||||
data:ChatRequestData = self._params['data']
|
||||
args:Dict[str,Any] = self._params['ids']
|
||||
args.update(
|
||||
{
|
||||
'response':self._response,
|
||||
'conversation_id': data.conversation_id
|
||||
}
|
||||
)
|
||||
wf_event = ChatCallbackEvent(event_type = ChatEventType.WORKFLOW_FINISHED,payload = args)
|
||||
if wf_event.to_response() is not None:
|
||||
self._aqueue.put_nowait(wf_event)
|
||||
|
||||
|
||||
args:Dict[str,Any] = self._params['ids']
|
||||
msgEnt_event = ChatCallbackEvent(event_type = ChatEventType.MESSAGE_END,payload = args)
|
||||
if msgEnt_event.to_response() is not None:
|
||||
self._aqueue.put_nowait(msgEnt_event)
|
||||
logger.info("trace_end:{} trace_map:{}\n".format(trace_id, trace_map))
|
||||
|
||||
async def async_event_gen(self) -> AsyncGenerator[ChatCallbackEvent, None]:
|
||||
while not self._aqueue.empty() or not self.is_done:
|
||||
@@ -286,6 +336,51 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
||||
except asyncio.TimeoutError:
|
||||
pass
|
||||
|
||||
def makeWorkflow_startEvent(self)->ChatCallbackEvent:
|
||||
args:Dict[str,Any] = self._ids
|
||||
args.update(
|
||||
{
|
||||
'use_id': self._chatData.user,
|
||||
'query': self._chatData.query,
|
||||
'conversation_id': self._chatData.conversation_id
|
||||
}
|
||||
)
|
||||
return ChatCallbackEvent(event_type = ChatEventType.WORKFLOW_START,payload = args)
|
||||
|
||||
def makeWorkflow_finishedEvent(self)->ChatCallbackEvent:
|
||||
args:Dict[str,Any] = self._ids
|
||||
args.update(
|
||||
{
|
||||
'response': '',
|
||||
'conversation_id': self._chatData.conversation_id
|
||||
}
|
||||
)
|
||||
return ChatCallbackEvent(event_type = ChatEventType.WORKFLOW_FINISHED,payload = args)
|
||||
|
||||
def makeMessage_EndEvent(self)->ChatCallbackEvent:
|
||||
args:Dict[str,Any] = self._ids
|
||||
if self._response is not None:
|
||||
args.update({
|
||||
'source_node': self._response.source_nodes
|
||||
})
|
||||
msgEnt_event = ChatCallbackEvent(event_type = ChatEventType.MESSAGE_END,payload = args)
|
||||
return msgEnt_event
|
||||
|
||||
def start(self):
|
||||
#添加工作流开始事件
|
||||
wf_event = self.makeWorkflow_startEvent()
|
||||
if wf_event.to_response() is not None:
|
||||
self._aqueue.put_nowait(wf_event)
|
||||
|
||||
def finished(self):
|
||||
wf_event = self.makeWorkflow_finishedEvent()
|
||||
if wf_event.to_response() is not None:
|
||||
self._aqueue.put_nowait(wf_event)
|
||||
|
||||
msgEnt_event = self.makeMessage_EndEvent()
|
||||
if msgEnt_event.to_response() is not None:
|
||||
self._aqueue.put_nowait(msgEnt_event)
|
||||
|
||||
class IDManager:
|
||||
def createID(self):
|
||||
return {
|
||||
@@ -353,6 +448,7 @@ class ChatStreamResponse(StreamingResponse):
|
||||
|
||||
# the text_generator is the leading stream, once it's finished, also finish the event stream
|
||||
event_handler.is_done = True
|
||||
event_handler.setResponse(response)
|
||||
|
||||
# Yield the events from the event handler
|
||||
async def _event_generator():
|
||||
@@ -374,33 +470,36 @@ class ChatStreamResponse(StreamingResponse):
|
||||
break
|
||||
|
||||
@v.post("/chat-messages")
|
||||
async def post_conversations(request: Request, data: ChatRequestData):
|
||||
async def post_chatmessages(request: Request, data: ChatRequestData):
|
||||
global gEvent_handler
|
||||
userMng.findNoExistCreate(data.user)
|
||||
data.conversation_id = data.conversation_id if data.conversation_id else str(uuid.uuid4())
|
||||
|
||||
conversaObj = conversations()
|
||||
conversationinfo = conversaObj.get(data.conversation_id)
|
||||
if conversationinfo is None:
|
||||
conversationinfo = conversaObj.add(data.conversation_id, data.user, "新建会话")
|
||||
conversationinfo = conversaObj.add(data.conversation_id, data.user, "新建会话",inputs= data.inputs)
|
||||
|
||||
# 生成聊天参数
|
||||
last_message_content = ChatMessage.from_str(data.query)
|
||||
filters = None
|
||||
params = data.inputs or {}
|
||||
|
||||
# 获取聊天引擎对象
|
||||
chat_engine = get_chat_engine(filters=filters, params=params)
|
||||
|
||||
# 启动聊天事件监听
|
||||
ids = IDManager().createID()
|
||||
event_handler = ChatEventCallbackHandler(ids = ids,data = data)
|
||||
chat_engine.callback_manager.handlers.append(event_handler) # type: ignore
|
||||
if gEvent_handler is None:
|
||||
gEvent_handler = ChatEventCallbackHandler()
|
||||
Settings.llm.callback_manager.handlers.append(gEvent_handler)
|
||||
|
||||
if gEvent_handler is not None:
|
||||
gEvent_handler.setInitParams(ids = ids,data = data)
|
||||
|
||||
# 获取聊天引擎对象
|
||||
chat_engine = get_chat_engine(filters=filters, params=params)
|
||||
# 执行异步聊天
|
||||
response = await chat_engine.astream_chat(data.query)
|
||||
|
||||
# 返回异步消息回应
|
||||
return ChatStreamResponse(request, event_handler, response, data,ids)
|
||||
return ChatStreamResponse(request, gEvent_handler, response, data,ids)
|
||||
|
||||
@v.get("/messages")
|
||||
async def query_messages(user:str, conversation_id:str):
|
||||
@@ -467,24 +566,51 @@ async def query_conversations(user:str, first_id:str = None, limit:str = None, p
|
||||
|
||||
@v.get("/parameters")
|
||||
async def query_parameters(user:str):
|
||||
params = parameter().get(user)
|
||||
if len(params) == 0:
|
||||
params = BaseConfig().ParamterCfg()
|
||||
return params
|
||||
prjObj = ProjectInfo()
|
||||
return BaseConfig().ParamterCfg(projectInfo = prjObj.projectNames())
|
||||
|
||||
@v.post("/messages/{message_id}/feedbacks")
|
||||
async def post_feedbacks(request: Request,message_id:str,params:Dict[str,Any]):
|
||||
if params['rating'] =='null':
|
||||
if params['rating'] is None:
|
||||
feedback().delete(message_id)
|
||||
else:
|
||||
condition = {'id':message_id}
|
||||
results = message().query(**condition)
|
||||
results = message().query(message_id)
|
||||
if len(results) > 0:
|
||||
result = results[0]
|
||||
feedback().add(message_id=message_id,query=result['query'],
|
||||
answer=result['answer'],rating=params['rating'])
|
||||
|
||||
@r.post("")
|
||||
def upload_file(request: ChatFileUploadRequest) -> List[str]:
|
||||
pass
|
||||
@v.post("/files/upload")
|
||||
def upload_file(request: ChatFileUploadRequest):
|
||||
try:
|
||||
logger.info("Processing file")
|
||||
resluts = ChatFileService.process_file(request.base64)
|
||||
return {
|
||||
'id':resluts.get('id'),
|
||||
'name': resluts.get('name'),
|
||||
'size': resluts.get('size'),
|
||||
'extension':resluts.get('extension'),
|
||||
'mime_type':resluts.get('mime_type'),
|
||||
'created_by':str(uuid.uuid4()),
|
||||
'created_at':int(time.time())
|
||||
}
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail="Error processing file")
|
||||
|
||||
@v.post("/project")
|
||||
def upload_file(request: ChatFileUploadRequest):
|
||||
try:
|
||||
logger.info("Processing file")
|
||||
return PrjFileLoadService.process_file(request.base64)
|
||||
except Exception as e:
|
||||
logger.error(f"Error processing file: {e}", exc_info=True)
|
||||
raise HTTPException(status_code=500, detail="Error processing file")
|
||||
|
||||
@v.post("/messages/{message_id}/suggested")
|
||||
async def post_suggested(request: Request,message_id:str,user:str):
|
||||
questions = await NextQuestionSuggestion.suggest_next_questions(message_id)
|
||||
return {
|
||||
"result": "success",
|
||||
"data":questions
|
||||
}
|
||||
@@ -2,7 +2,7 @@ from datetime import datetime
|
||||
import uuid
|
||||
from app.api.routers.request.baseConfig import BaseConfig
|
||||
from app.api.routers.request.dbOrm import DBManager
|
||||
|
||||
from typing import List
|
||||
dbManage = DBManager()
|
||||
|
||||
class conversations:
|
||||
@@ -24,12 +24,13 @@ class conversations:
|
||||
return records[0]
|
||||
return None
|
||||
|
||||
def add(self,id:str, user_id:str, name:str):
|
||||
def add(self,id:str, user_id:str, name:str,inputs:dict):
|
||||
template = BaseConfig().ConversationCfg()
|
||||
template['id'] = id
|
||||
template['user_id'] = user_id
|
||||
template['name'] = name
|
||||
template['created_at'] = 1724399038
|
||||
template['inputs'] = inputs
|
||||
dbManage.addRecord(self._tableName,template)
|
||||
|
||||
def delete(self,id:str):
|
||||
@@ -122,8 +123,9 @@ class message:
|
||||
def delete(self,user_id:str):
|
||||
dbManage.delete(self._tableName,user_id = user_id)
|
||||
|
||||
def query(self,**condition):
|
||||
def query(self,id:str):
|
||||
results = []
|
||||
condition = {'id':id}
|
||||
records = dbManage.query(self._tableName,**condition)
|
||||
for record in records:
|
||||
results.append(record.dict())
|
||||
@@ -152,4 +154,36 @@ class feedback:
|
||||
records = dbManage.query(self._tableName,**cond)
|
||||
if len(records) > 0:
|
||||
return records[0].dict()
|
||||
return None
|
||||
return None
|
||||
|
||||
class ProjectInfo:
|
||||
def __init__(self) -> None:
|
||||
self._tableName = 'projectInfos'
|
||||
dbManage.createTable(self._tableName)
|
||||
|
||||
def add(self,name:str,flag:str):
|
||||
info = dbManage.query(self._tableName,prjFlag = flag)
|
||||
if len(info) == 0:
|
||||
record = {
|
||||
'prjectName': name,
|
||||
'prjFlag': flag
|
||||
}
|
||||
dbManage.addRecord(self._tableName,record)
|
||||
|
||||
def projectNames(self)->List[str]:
|
||||
records = dbManage.query(self._tableName)
|
||||
names = []
|
||||
for record in records:
|
||||
data:dict = record.dict()
|
||||
name = data.get('prjectName')
|
||||
if name !='':
|
||||
names.append(name)
|
||||
return names
|
||||
|
||||
def prjFalg(self,name:str):
|
||||
records = dbManage.query(self._tableName)
|
||||
for record in records:
|
||||
data:dict = record.dict()
|
||||
if data.get('prjectName') == name:
|
||||
return data['prjFlag']
|
||||
return ''
|
||||
@@ -3,9 +3,10 @@ import os
|
||||
from enum import Enum
|
||||
|
||||
class BaseConfig(BaseModel):
|
||||
projectInfo:str = os.getenv("PROJECT_TITLE","您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!")
|
||||
projectInfo:str = os.getenv("PROJECT_TITLE","会话提示消息")
|
||||
|
||||
def ParamterCfg(self):
|
||||
def ParamterCfg(self,**args):
|
||||
prjItems = args.get('projectInfo')
|
||||
questions = os.getenv("CONVERSATION_STARTERS", "dev")
|
||||
return{
|
||||
"opening_statement": self.projectInfo,
|
||||
@@ -30,7 +31,18 @@ class BaseConfig(BaseModel):
|
||||
"more_like_this": {
|
||||
"enabled": False
|
||||
},
|
||||
"user_input_form": [],
|
||||
"user_input_form": [
|
||||
{
|
||||
"select": {
|
||||
"variable": "projectname",
|
||||
"label": "\u5de5\u7a0b\u540d\u79f0",
|
||||
"type": "select",
|
||||
"max_length": 48,
|
||||
"required": True,
|
||||
"options": prjItems
|
||||
}
|
||||
}
|
||||
],
|
||||
"sensitive_word_avoidance": {
|
||||
"enabled": False
|
||||
},
|
||||
@@ -44,8 +56,19 @@ class BaseConfig(BaseModel):
|
||||
}
|
||||
},
|
||||
"system_parameters": {
|
||||
"image_file_size_limit": "10"
|
||||
}
|
||||
"image_file_size_limit": "10",
|
||||
"language": "",
|
||||
"voice": "",
|
||||
},
|
||||
"retriever_resource": {
|
||||
"enabled": True
|
||||
},
|
||||
"annotation_reply": {
|
||||
"enabled": False
|
||||
},
|
||||
"more_like_this": {
|
||||
"enabled": False
|
||||
},
|
||||
}
|
||||
|
||||
def ConversationCfg(self):
|
||||
|
||||
@@ -55,6 +55,13 @@ class FeedBackOrm(Base):
|
||||
answer = Column(String)
|
||||
rating = Column(String)
|
||||
|
||||
class ProjectInfoOrm(Base):
|
||||
__tablename__ = "projectInfos"
|
||||
|
||||
prjFlag = Column(String,primary_key=True)
|
||||
prjectName = Column(String)
|
||||
|
||||
|
||||
#数据结构
|
||||
class ConversationModel(BaseModel):
|
||||
id: str
|
||||
@@ -121,6 +128,17 @@ class FeedBackModel(BaseModel):
|
||||
def orm(cls):
|
||||
return FeedBackOrm
|
||||
|
||||
class ProjectInfoModel(BaseModel):
|
||||
prjectName:str
|
||||
prjFlag:str
|
||||
|
||||
class Config:
|
||||
from_attributes=True
|
||||
|
||||
@classmethod
|
||||
def orm(cls):
|
||||
return ProjectInfoOrm
|
||||
|
||||
class DBManager:
|
||||
def __init__(self) -> None:
|
||||
DATABASE_URL = os.getenv("SQLITE_DATABASE_URL")
|
||||
@@ -160,7 +178,8 @@ class DBManager:
|
||||
return
|
||||
records = session.query(ormCls).filter_by(**filter).all()
|
||||
if records is not None:
|
||||
session.delete(records)
|
||||
for record in records:
|
||||
session.delete(record)
|
||||
session.commit()
|
||||
|
||||
def update(self,tableName:str,data:Dict[str,Any],**filter):
|
||||
|
||||
@@ -0,0 +1,134 @@
|
||||
import base64,os,mimetypes,requests,tempfile
|
||||
from typing import List,Dict,Any
|
||||
from uuid import uuid4
|
||||
from app.settings import init_settings
|
||||
from app.engine.loaders import get_document_Types, get_documents,getFileCacahePath
|
||||
from app.engine.vectordb import get_vector_store
|
||||
from app.engine.generate import get_doc_store,run_pipeline,persist_storage
|
||||
from llama_index.core.schema import Document
|
||||
from pathlib import Path
|
||||
from llama_index.core.readers.file.base import (
|
||||
_try_loading_included_file_formats as get_file_loaders_map,
|
||||
)
|
||||
from llama_index.readers.file import FlatReader
|
||||
from llama_index.core.ingestion import IngestionPipeline
|
||||
from llama_index.core import VectorStoreIndex
|
||||
from app.engine.index import get_index
|
||||
|
||||
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
|
||||
|
||||
class PrjFileLoadService:
|
||||
@staticmethod
|
||||
def store_and_parse_file(file_data):
|
||||
prjtoJson_url = os.getenv('PRJTOJSON_URL')
|
||||
convert_url = prjtoJson_url +'/prj_convert_clt2json'
|
||||
files ={'file':file_data}
|
||||
response1 = requests.post(
|
||||
url = convert_url,
|
||||
files=files
|
||||
)
|
||||
if response1.text is None or response1.text=='':
|
||||
return None
|
||||
|
||||
load_url = prjtoJson_url +'/file_download'
|
||||
response2 = requests.post(
|
||||
url = load_url,
|
||||
data=response1.text
|
||||
)
|
||||
if response2.text is None or response2.content=='':
|
||||
return None
|
||||
|
||||
try:
|
||||
tempFilePath:str = tempfile.gettempdir() + f"\\{uuid4().hex}.zip"
|
||||
with open(tempFilePath,'wb') as file:
|
||||
file.write(response2.content)
|
||||
|
||||
prjID = str(uuid4())
|
||||
filePath = getFileCacahePath() + f'/Projects/{prjID}'
|
||||
os.makedirs(filePath)
|
||||
import zipfile
|
||||
with zipfile.ZipFile(tempFilePath,'r') as zip_File:
|
||||
for zip_info in zip_File.infolist():
|
||||
zip_info.filename = zip_info.filename.encode('cp437').decode('gbk')
|
||||
zip_File.extract(zip_info,filePath)
|
||||
os.remove(tempFilePath)
|
||||
return f'Projects_{prjID}'
|
||||
except Exception as e:
|
||||
return None
|
||||
|
||||
@staticmethod
|
||||
def process_file(base64_content: str) -> str:
|
||||
prjFlag = PrjFileLoadService.store_and_parse_file(base64_content)
|
||||
if prjFlag is None:
|
||||
return None
|
||||
#生成向量并持久化至本地
|
||||
documents = get_documents(prjFlag)
|
||||
for doc in documents:
|
||||
doc.metadata["private"] = "false"
|
||||
docstore = get_doc_store(prjFlag)
|
||||
vector_store = get_vector_store(prjFlag)
|
||||
_ = run_pipeline(docstore, vector_store, documents)
|
||||
persist_storage(docstore, vector_store)
|
||||
return prjFlag
|
||||
|
||||
class ChatFileService:
|
||||
PRIVATE_STORE_PATH = os.getenv('CHAT_UPLOAD_FILECACHE','output/uploaded')
|
||||
resluts:Dict[str,Any] = {}
|
||||
|
||||
@staticmethod
|
||||
def process_file(base64_content: str) -> dict:
|
||||
file_data, extension = ChatFileService.preprocess_base64_file(base64_content)
|
||||
documents = ChatFileService.store_and_parse_file(file_data, extension)
|
||||
|
||||
pipeline = IngestionPipeline()
|
||||
nodes = pipeline.run(documents=documents)
|
||||
current_index = get_index()
|
||||
pipeline = IngestionPipeline()
|
||||
nodes = pipeline.run(documents=documents)
|
||||
if current_index is None:
|
||||
current_index = VectorStoreIndex(nodes=nodes)
|
||||
else:
|
||||
current_index.insert_nodes(nodes=nodes)
|
||||
current_index.storage_context.persist(
|
||||
persist_dir=os.environ.get("STORAGE_DIR", "storage")
|
||||
)
|
||||
|
||||
return ChatFileService.resluts
|
||||
|
||||
@staticmethod
|
||||
def preprocess_base64_file(base64_content: str) -> tuple:
|
||||
header, data = base64_content.split(",", 1)
|
||||
mime_type = header.split(";")[0].split(":", 1)[1]
|
||||
extension = mimetypes.guess_extension(mime_type)
|
||||
ChatFileService.resluts['mime_type'] = mime_type
|
||||
ChatFileService.resluts['extension'] = extension
|
||||
return base64.b64decode(data), extension
|
||||
|
||||
@staticmethod
|
||||
def store_and_parse_file(file_data, extension) -> List[Document]:
|
||||
os.makedirs(ChatFileService.PRIVATE_STORE_PATH, exist_ok=True)
|
||||
fileID = uuid4().hex
|
||||
file_name = f"{fileID}{extension}"
|
||||
file_path = Path(os.path.join(ChatFileService.PRIVATE_STORE_PATH, file_name))
|
||||
ChatFileService.resluts['id'] = fileID
|
||||
ChatFileService.resluts['file_name'] = file_name
|
||||
|
||||
with open(file_path, "wb") as f:
|
||||
f.write(file_data)
|
||||
|
||||
ChatFileService.resluts['size'] = os.path.getsize(file_path)
|
||||
reader_cls = ChatFileService.default_file_loaders_map().get(extension)
|
||||
if reader_cls is None:
|
||||
raise ValueError(f"File extension {extension} is not supported")
|
||||
reader = reader_cls()
|
||||
documents = reader.load_data(file_path)
|
||||
for doc in documents:
|
||||
doc.metadata["file_name"] = file_name
|
||||
doc.metadata["private"] = "true"
|
||||
return documents
|
||||
|
||||
@staticmethod
|
||||
def default_file_loaders_map():
|
||||
default_loaders = get_file_loaders_map()
|
||||
default_loaders[".txt"] = FlatReader
|
||||
return default_loaders
|
||||
@@ -0,0 +1,43 @@
|
||||
from typing import List
|
||||
|
||||
from app.api.routers.request.base import message
|
||||
from llama_index.core.prompts import PromptTemplate
|
||||
from llama_index.core.settings import Settings
|
||||
from pydantic import BaseModel
|
||||
|
||||
NEXT_QUESTIONS_SUGGESTION_PROMPT = PromptTemplate(
|
||||
"你是一个乐于助人的助手!你的任务是对用户可能会问的下一个问题给出建议。 "
|
||||
"\n这是对话历史记录"
|
||||
"\n---------------------\n{conversation}\n---------------------"
|
||||
"考虑到对话历史记录,仅限于现在知识库已有内容, 请给我 $number_of_questions 个你接下来可能会问题的问题!"
|
||||
)
|
||||
N_QUESTION_TO_GENERATE = 3
|
||||
|
||||
|
||||
class NextQuestions(BaseModel):
|
||||
"""A list of questions that user might ask next"""
|
||||
|
||||
questions: List[str]
|
||||
|
||||
|
||||
class NextQuestionSuggestion:
|
||||
@staticmethod
|
||||
async def suggest_next_questions(
|
||||
message_id: str,
|
||||
number_of_questions: int = N_QUESTION_TO_GENERATE,
|
||||
) -> List[str]:
|
||||
last_user_message = None
|
||||
last_assistant_message = None
|
||||
results = message().query(message_id)
|
||||
if len(results) > 0:
|
||||
last_user_message = results[0]['query']
|
||||
last_assistant_message = results[0]['answer']
|
||||
conversation: str = f"{last_user_message}\n{last_assistant_message}"
|
||||
output: NextQuestions = await Settings.llm.astructured_predict(
|
||||
NextQuestions,
|
||||
prompt=NEXT_QUESTIONS_SUGGESTION_PROMPT,
|
||||
conversation=conversation,
|
||||
nun_questions=number_of_questions,
|
||||
)
|
||||
return output.questions
|
||||
return []
|
||||
@@ -6,16 +6,25 @@ from llama_index.core.tools.query_engine import QueryEngineTool
|
||||
|
||||
from app.engine.engine import create_query_engine, create_summary_query_engine
|
||||
from app.engine.index import get_index
|
||||
from app.engine.prompt import ReActChatFormatter_messages, tree_summary_query_engine_tool_messages, \
|
||||
query_engine_tool_messages, summary_query_tool_messages
|
||||
#from app.engine.loaders.db import makeDescriptionByEngine
|
||||
from app.engine.tools import ToolFactory
|
||||
from app.api.routers.request.base import ProjectInfo
|
||||
from llama_index.core.response_synthesizers import ResponseMode
|
||||
|
||||
def getPrjFalg(params:dict=None)->str:
|
||||
prjFlag = ''
|
||||
if params is not None:
|
||||
prjFlag = ProjectInfo().prjFalg(params.get('projectname'))
|
||||
return prjFlag
|
||||
|
||||
|
||||
def get_chat_engine(filters=None, params=None):
|
||||
def get_chat_engine(filters=None, params:dict=None):
|
||||
system_prompt = os.getenv("SYSTEM_PROMPT")
|
||||
top_k = int(os.getenv("TOP_K", "3"))
|
||||
use_reranker = os.getenv("RERANK_ENABLED")
|
||||
tools = []
|
||||
|
||||
# 创建SQL查询工具
|
||||
# sql_query_engine = create_summary_query_engine(index)
|
||||
# sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine,
|
||||
@@ -25,31 +34,31 @@ def get_chat_engine(filters=None, params=None):
|
||||
#tools.append(sql_query_tool)
|
||||
|
||||
# Add query tool if index exists
|
||||
index = get_index()
|
||||
index = get_index(getPrjFalg(params))
|
||||
if index is not None:
|
||||
|
||||
|
||||
summary_query_engine = create_summary_query_engine(index,top_k,use_reranker,filters)
|
||||
summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool",
|
||||
description="适用于任何需要进行全面总结、概括的要求。",
|
||||
description=summary_query_tool_messages,
|
||||
)
|
||||
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = "COMPACT")
|
||||
query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool",
|
||||
description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后建议使用“zjdata_query_tool”工具。",
|
||||
)
|
||||
|
||||
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = "TREE_SUMMARIZE")
|
||||
query_engine_tool_1 = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool_1",
|
||||
description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后,且在询问工程中单位的具体数值,例如用量,费率,合计,金额等的时候建议使用“zj_query_tool_1”工具。",
|
||||
)
|
||||
|
||||
tools.append(summary_query_tool)
|
||||
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = ResponseMode.TREE_SUMMARIZE)
|
||||
query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool",
|
||||
description=query_engine_tool_messages)
|
||||
|
||||
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = ResponseMode.TREE_SUMMARIZE)
|
||||
query_engine_tool_1 = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool_1",
|
||||
description=tree_summary_query_engine_tool_messages)
|
||||
|
||||
tools.append(query_engine_tool)
|
||||
tools.append(query_engine_tool_1)
|
||||
tools.append(summary_query_tool)
|
||||
|
||||
# Add additional tools
|
||||
tools += ToolFactory.from_env()
|
||||
|
||||
prefix_messages = ("""您的设计旨在帮助完成各种任务,从回答问题到提供其他类型分析的摘要。\n\n##工具\n\n你可以访问各种工具。你有责任按照你认为合适的顺序使用这些工具来完成当前的任务。\n这可能需要将任务分解为子任务,并使用不同的工具来完成每个子任务。\n\n你可以访问以下工具:\n{tool_desc}\n\n\n##输出格式\n\n请用与问题相同的语言回答,并使用以下格式:\n\n \nThought: 用户当前的语言是:(user's language)。我需要使用工具来帮助我回答问题。\nAction: 如果使用工具,则为工具名称(one of {tool_names})。\nAction Input: 输入给工具的内容,使用JSON格式表示kwargs(例如{{\"input\": \"hello world\", \"num_beams\": 5}})\n \n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n切勿用Markdown代码标记包围你的响应。如果需要,可以在响应中使用代码标记。\n\n请为Action Input使用有效的JSON格式。不要这样做{{\'input\': \'hello world\', \'num_beams\': 5}}。\n\n如果使用此格式,用户将以下面的格式进行回应:\n\n \nObservation: 工具响应\n \n\n你应该继续重复上述格式,直到你有足够的信息来回答问题而无需使用更多工具。此时,你必须使用以下两种格式之一进行回答:\n\n \nThought: 我可以不用任何工具来回答。我将使用用户的语言来回答。\nAnswer: [你的答案(与用户问题相同的语言)]\n \n\n \nThought: 我无法使用提供的工具回答问题。\nAnswer: [你的答案(与用户问题相同的语言)]\n \n\n##如果从工具中得到的回应是Empty Response,那么只需要回答“我不知道”,不需要额外回答别的内容。## 当前对话\n\n以下是当前对话,由人类和助手的消息交替组成。\n""")
|
||||
react_chat_formatter = ReActChatFormatter.from_defaults(prefix_messages)
|
||||
react_chat_formatter = ReActChatFormatter.from_defaults(ReActChatFormatter_messages)
|
||||
agentrunner = AgentRunner.from_llm(
|
||||
llm=Settings.llm,
|
||||
tools=tools,
|
||||
@@ -58,6 +67,7 @@ def get_chat_engine(filters=None, params=None):
|
||||
verbose=True,
|
||||
)
|
||||
return agentrunner
|
||||
|
||||
# create the function calling worker for reasoning
|
||||
# worker = FunctionCallingAgentWorker.from_tools(
|
||||
# tools, verbose=True
|
||||
|
||||
@@ -7,10 +7,28 @@ from llama_index.core.query_engine import RetrieverQueryEngine
|
||||
from llama_index.core.response_synthesizers import ResponseMode
|
||||
from llama_index.readers.database import DatabaseReader
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from util.register import *
|
||||
from app.engine.prompt import text_qa_template, refine_template, summary_template, simple_template
|
||||
from app.engine.retriever.HybridRetriever import HybridRetriever
|
||||
from app.settings import get_node_postprocessors
|
||||
from app.engine.response.treeSummResponse import CustomTreeResponse
|
||||
from llama_index.core.settings import Settings
|
||||
|
||||
ModelPlateCategory = '模型平台'
|
||||
|
||||
def get_node_postprocessors():
|
||||
rerank_enabled = os.getenv("RERANK_ENABLED").title()
|
||||
if rerank_enabled is None or rerank_enabled == 'False':
|
||||
return []
|
||||
|
||||
Rerank_provider = os.getenv("RERANK_PROVIDER")
|
||||
modelPaltCls = ClsRegister.get(ModelPlateCategory,Rerank_provider)
|
||||
postprocess = None
|
||||
if modelPaltCls is not None:
|
||||
modelPalt = modelPaltCls()
|
||||
postprocess = modelPalt.rerank()
|
||||
else:
|
||||
raise ValueError(f"Invalid rerank provider: {Rerank_provider}")
|
||||
return postprocess
|
||||
|
||||
def makeDescriptionByEngine(sql_database:SQLDatabase):
|
||||
reader = DatabaseReader(sql_database)
|
||||
@@ -49,6 +67,14 @@ def get_Retriever(index,**kwargs):
|
||||
return retriever
|
||||
|
||||
|
||||
def get_synthesizer():
|
||||
return CustomTreeResponse(
|
||||
llm=Settings.llm,
|
||||
summary_template=summary_template,
|
||||
use_async=True,
|
||||
streaming=False,
|
||||
)
|
||||
|
||||
sql_database = None
|
||||
sql_obj_index = None
|
||||
|
||||
@@ -81,7 +107,7 @@ def create_summary_query_engine(index, top_k=3, use_reranker=False, filters=None
|
||||
summary_query_engine = summary_index.as_query_engine(
|
||||
response_mode=ResponseMode.TREE_SUMMARIZE,
|
||||
use_async=True,
|
||||
streaming=True,
|
||||
streaming=False,
|
||||
)
|
||||
return summary_query_engine
|
||||
|
||||
@@ -102,8 +128,8 @@ def create_query_engine(index, top_k=3, use_reranker=False, filters=None, respon
|
||||
simple_template = simple_template,
|
||||
node_postprocessors=postprocess,
|
||||
use_async=True,
|
||||
streaming=True,
|
||||
ResponseMode = response_mode
|
||||
streaming=False,
|
||||
response_mode = response_mode
|
||||
)
|
||||
|
||||
return query_engine
|
||||
@@ -5,12 +5,12 @@ load_dotenv()
|
||||
import logging
|
||||
import os
|
||||
|
||||
from app.engine.loaders import get_documents
|
||||
from app.engine.loaders import get_document_Types, get_documents
|
||||
from app.engine.vectordb import get_vector_store
|
||||
from app.settings import init_settings
|
||||
from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
|
||||
from llama_index.core.ingestion import IngestionPipeline
|
||||
from llama_index.core.node_parser import SentenceSplitter
|
||||
from llama_index.core.node_parser import SentenceSplitter,MarkdownNodeParser
|
||||
from llama_index.core.settings import Settings
|
||||
from llama_index.core.storage import StorageContext
|
||||
from llama_index.core.storage.docstore import SimpleDocumentStore
|
||||
@@ -21,12 +21,13 @@ logger = logging.getLogger()
|
||||
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
|
||||
|
||||
|
||||
def get_doc_store():
|
||||
def get_doc_store(docType:str):
|
||||
|
||||
# If the storage directory is there, load the document store from it.
|
||||
# If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
|
||||
if os.path.exists(STORAGE_DIR):
|
||||
return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
|
||||
storeDir = os.path.join(STORAGE_DIR,docType)
|
||||
if os.path.exists(storeDir):
|
||||
return SimpleDocumentStore.from_persist_dir(storeDir)
|
||||
else:
|
||||
return SimpleDocumentStore()
|
||||
|
||||
@@ -34,10 +35,11 @@ def get_doc_store():
|
||||
def run_pipeline(docstore, vector_store, documents):
|
||||
pipeline = IngestionPipeline(
|
||||
transformations=[
|
||||
SentenceSplitter(
|
||||
chunk_size=Settings.chunk_size,
|
||||
chunk_overlap=Settings.chunk_overlap,
|
||||
),
|
||||
#SentenceSplitter(
|
||||
#chunk_size=Settings.chunk_size,
|
||||
#chunk_overlap=Settings.chunk_overlap,
|
||||
#),
|
||||
#MarkdownNodeParser(),
|
||||
Settings.embed_model,
|
||||
],
|
||||
docstore=docstore,
|
||||
@@ -61,8 +63,9 @@ def persist_storage(docstore, vector_store):
|
||||
|
||||
def persist_BMRetriever(vector_store):
|
||||
STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
|
||||
top_k = int(os.getenv("TOP_K", "3"))
|
||||
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes=vector_store.get_nodes([]))
|
||||
nodes = vector_store.get_nodes([])
|
||||
top_k = min(int(os.getenv("TOP_K", "3")),len(nodes))
|
||||
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes = nodes)
|
||||
bmRetriver.persist(STORAGE_DIR)
|
||||
|
||||
|
||||
@@ -71,19 +74,21 @@ def generate_datasource():
|
||||
logger.info("Generate index for the provided data")
|
||||
|
||||
# Get the stores and documents or create new ones
|
||||
documents = get_documents()
|
||||
# Set private=false to mark the document as public (required for filtering)
|
||||
for doc in documents:
|
||||
doc.metadata["private"] = "false"
|
||||
docstore = get_doc_store()
|
||||
vector_store = get_vector_store()
|
||||
docTypes = get_document_Types()
|
||||
for docType in docTypes:
|
||||
documents = get_documents(docType)
|
||||
# Set private=false to mark the document as public (required for filtering)
|
||||
for doc in documents:
|
||||
doc.metadata["private"] = "false"
|
||||
docstore = get_doc_store(docType)
|
||||
vector_store = get_vector_store(docType)
|
||||
|
||||
# Run the ingestion pipeline
|
||||
_ = run_pipeline(docstore, vector_store, documents)
|
||||
# Run the ingestion pipeline
|
||||
_ = run_pipeline(docstore, vector_store, documents)
|
||||
|
||||
# Build the index and persist storage
|
||||
persist_storage(docstore, vector_store)
|
||||
persist_BMRetriever(vector_store)
|
||||
# Build the index and persist storage
|
||||
persist_storage(docstore, vector_store)
|
||||
persist_BMRetriever(vector_store)
|
||||
|
||||
logger.info("Finished generating the index")
|
||||
|
||||
|
||||
@@ -1,22 +1,15 @@
|
||||
import logging
|
||||
from llama_index.core.indices import VectorStoreIndex
|
||||
from app.engine.vectordb import get_vector_store
|
||||
|
||||
|
||||
from app.engine.loaders import get_document_Types
|
||||
from typing import Dict,Any
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
index = None
|
||||
|
||||
def get_index(params=None):
|
||||
global index
|
||||
if index is None:
|
||||
logger.info("Connecting vector store...")
|
||||
|
||||
store = get_vector_store()
|
||||
# Load the index from the vector store
|
||||
# If you are using a vector store that doesn't store text,
|
||||
# you must load the index from both the vector store and the document store
|
||||
index = VectorStoreIndex.from_vector_store(store)
|
||||
logger.info("Finished load index from vector store.")
|
||||
|
||||
def get_index(prjFlag:str):
|
||||
if prjFlag is None or prjFlag == '':
|
||||
raise ValueError('无效的工程标识')
|
||||
logger.info("Connecting vector store...")
|
||||
store = get_vector_store(prjFlag)
|
||||
index = VectorStoreIndex.from_vector_store(store)
|
||||
logger.info("Finished load index from vector store.")
|
||||
return index
|
||||
|
||||
@@ -3,17 +3,86 @@ import yaml
|
||||
from app.engine.loaders.db import DBLoaderConfig, get_db_documents
|
||||
from app.engine.loaders.file import FileLoaderConfig, get_file_documents
|
||||
from app.engine.loaders.web import WebLoaderConfig, get_web_documents
|
||||
from app.engine.loaders.file import getProjectName
|
||||
import os
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def load_configs():
|
||||
with open("config/loaders.yaml",encoding='UTF-8') as f:
|
||||
with open("config/loaders.yaml",encoding='utf-8') as f:
|
||||
configs = yaml.safe_load(f)
|
||||
return configs
|
||||
|
||||
def path_difference(path1:str, path2:str):
|
||||
import os
|
||||
path1 = os.path.abspath(path1)
|
||||
path2 = os.path.abspath(path2)
|
||||
|
||||
def get_documents():
|
||||
path1_parts = path1.split(os.path.sep)
|
||||
path2_parts = path2.split(os.path.sep)
|
||||
|
||||
for i, part in enumerate(path1_parts):
|
||||
if part != path2_parts[i]:
|
||||
break
|
||||
else:
|
||||
i += 1
|
||||
|
||||
pathKey = ''
|
||||
for j in range(i,len(path2_parts)):
|
||||
pathKey+=path2_parts[j] + '_'
|
||||
return pathKey[0:-1]
|
||||
|
||||
def getFileCacahePath():
|
||||
rootPath = 'data'
|
||||
configs = load_configs()
|
||||
if configs is not None and len(configs.items()) > 0:
|
||||
for loader_type, loader_config in configs.items():
|
||||
if loader_type == "file":
|
||||
rootPath = FileLoaderConfig(**loader_config).data_dir
|
||||
break
|
||||
return rootPath
|
||||
|
||||
def get_document_Types():
|
||||
rootPath = getFileCacahePath()
|
||||
types = []
|
||||
dirStack = [rootPath]
|
||||
while len(dirStack) > 0:
|
||||
curDir = dirStack.pop()
|
||||
dirs = [os.path.join(curDir, d) for d in os.listdir(curDir) if os.path.isdir(os.path.join(curDir, d))]
|
||||
if len(dirs) > 0:
|
||||
for dir in dirs:
|
||||
dirStack.append(dir)
|
||||
else:
|
||||
types.append(path_difference(rootPath,curDir))
|
||||
return types
|
||||
|
||||
def getProjectInfos():
|
||||
config = load_configs()
|
||||
if config is None or len(config.items()) == 0:
|
||||
return None
|
||||
|
||||
prjDir = None
|
||||
for loader_type, loader_config in config.items():
|
||||
if loader_config.get('enable', True):
|
||||
loader_config = loader_config or []
|
||||
config = FileLoaderConfig(**loader_config)
|
||||
prjDir = config.data_dir
|
||||
break
|
||||
if prjDir is None:
|
||||
return None
|
||||
|
||||
prjInfos = []
|
||||
prjFlags = get_document_Types()
|
||||
for prjFlag in prjFlags:
|
||||
fileDir = os.path.join(config.data_dir,prjFlag.replace('_','\\'))
|
||||
prjInfo = {}
|
||||
prjInfo['flag'] = prjFlag
|
||||
prjInfo['name'] = getProjectName(fileDir)
|
||||
prjInfos.append(prjInfo)
|
||||
return prjInfos
|
||||
|
||||
def get_documents(docType:str):
|
||||
documents = []
|
||||
config = load_configs()
|
||||
|
||||
@@ -29,7 +98,7 @@ def get_documents():
|
||||
loader_config = loader_config or []
|
||||
match loader_type:
|
||||
case "file":
|
||||
document = get_file_documents(FileLoaderConfig(**loader_config))
|
||||
document = get_file_documents(FileLoaderConfig(**loader_config),docType)
|
||||
case "web":
|
||||
document = get_web_documents(WebLoaderConfig(**loader_config))
|
||||
case "db":
|
||||
@@ -38,4 +107,4 @@ def get_documents():
|
||||
raise ValueError(f"Invalid loader type: {loader_type}")
|
||||
documents.extend(document)
|
||||
|
||||
return documents
|
||||
return documents
|
||||
|
||||
@@ -6,6 +6,9 @@ from llama_index.core.readers.base import BaseReader
|
||||
from llama_index.core.readers.json import JSONReader
|
||||
from llama_parse import LlamaParse
|
||||
from pydantic import BaseModel, validator
|
||||
from app.engine.loaders.markdownReader import ChunkMarkdownReader
|
||||
from app.engine.loaders.projectJson import ProjectJson
|
||||
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
@@ -20,7 +23,6 @@ class FileLoaderConfig(BaseModel):
|
||||
raise ValueError(f"Directory '{v}' does not exist")
|
||||
return v
|
||||
|
||||
|
||||
def llama_parse_parser():
|
||||
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
|
||||
raise ValueError(
|
||||
@@ -35,7 +37,6 @@ def llama_parse_parser():
|
||||
)
|
||||
return parser
|
||||
|
||||
|
||||
def llama_parse_extractor() -> Dict[str, LlamaParse]:
|
||||
from llama_parse.utils import SUPPORTED_FILE_TYPES
|
||||
|
||||
@@ -43,10 +44,13 @@ def llama_parse_extractor() -> Dict[str, LlamaParse]:
|
||||
return {file_type: parser for file_type in SUPPORTED_FILE_TYPES}
|
||||
|
||||
def llama_local_extractor() -> Dict[str, BaseReader]:
|
||||
return {".json" : JSONReader(clean_json=False,levels_back=0)}
|
||||
parser = {
|
||||
".json" : JSONReader(clean_json=False,levels_back=0),
|
||||
".md" : ChunkMarkdownReader(),
|
||||
}
|
||||
return parser
|
||||
|
||||
|
||||
def get_file_documents(config: FileLoaderConfig):
|
||||
def get_file_documents(config: FileLoaderConfig,childPath: str):
|
||||
from llama_index.core.readers import SimpleDirectoryReader
|
||||
|
||||
try:
|
||||
@@ -63,7 +67,7 @@ def get_file_documents(config: FileLoaderConfig):
|
||||
file_extractor = llama_local_extractor()
|
||||
|
||||
reader = SimpleDirectoryReader(
|
||||
config.data_dir,
|
||||
os.path.join(config.data_dir,childPath.replace('_','\\')),
|
||||
recursive=True,
|
||||
filename_as_id=True,
|
||||
raise_on_error=True,
|
||||
@@ -86,3 +90,32 @@ def get_file_documents(config: FileLoaderConfig):
|
||||
else:
|
||||
# Raise the error if it is not the case of empty data dir
|
||||
raise e
|
||||
|
||||
def prjFileSuffix(dir:str):
|
||||
entries = os.listdir(dir)
|
||||
file_names = [entry for entry in entries if os.path.isfile(os.path.join(dir, entry))]
|
||||
if len(file_names) > 0:
|
||||
return os.path.splitext(file_names[0])[1]
|
||||
return ''
|
||||
|
||||
def getProjectName(dir:str):
|
||||
suffix = prjFileSuffix(dir)
|
||||
if suffix== '.json':
|
||||
prjJson = ProjectJson(dir)
|
||||
prjJson.parse()
|
||||
tb = prjJson.table('工程属性')
|
||||
records = tb.records()
|
||||
for record in records:
|
||||
name = record.value('名称')
|
||||
if name == '工程名称':
|
||||
return record.value('值')
|
||||
elif suffix == '.md':
|
||||
md_files = [f for f in os.listdir(dir) if f.endswith('.md')]
|
||||
for md_file in md_files:
|
||||
prjPath = os.path.join(dir, md_file)
|
||||
basename = os.path.splitext(md_file)[0]
|
||||
if basename =='工程属性':
|
||||
rd = ChunkMarkdownReader()
|
||||
rd.load_data(prjPath)
|
||||
return rd.findValue("名称=='工程名称'",'值')
|
||||
return ''
|
||||
@@ -0,0 +1,64 @@
|
||||
from app.engine.loaders.projectJson import *
|
||||
|
||||
class MarkDown:
|
||||
def __init__(self,table:JsonTable,path:str) -> None:
|
||||
self._table = table
|
||||
self._path = path
|
||||
|
||||
def build(self):
|
||||
flds:Dict[str,Field] = self._table.fields()
|
||||
records:List[Record] = self._table.records()
|
||||
columns:list = []
|
||||
colComments:list = []
|
||||
ignores:List[str] = []
|
||||
for name,fld in flds.items():
|
||||
if name =='_id' or name =='nodeType' or name =='relTbId':
|
||||
ignores.append(name)
|
||||
continue
|
||||
|
||||
columns.append(fld.value('name'))
|
||||
colComments.append(fld.value('alias'))
|
||||
|
||||
rowdatas = []
|
||||
for record in records:
|
||||
datas = []
|
||||
for col in columns:
|
||||
if col in ignores:
|
||||
continue
|
||||
txt:str = record.value(col)
|
||||
datas.append(txt.replace('\n'," "))
|
||||
rowdatas.append(datas)
|
||||
|
||||
content = self.convert(self._table.name(),self._table.comment(),columns,colComments,rowdatas)
|
||||
with open(self._path, 'w',encoding='utf-8') as file:
|
||||
file.write(content)
|
||||
|
||||
def convert(self,tableName:str,tableComment:str,columns:list,colComments:list,rowdatas:list):
|
||||
strTitle = "# " + tableName + '\n'
|
||||
if tableName!='':
|
||||
strTitle+= f"备注:{tableComment}" + '\n'
|
||||
|
||||
for i in range(len(columns)):
|
||||
strTitle+= f"- 字段名称:{columns[i]}" + '\n'
|
||||
comment = colComments[i]
|
||||
if comment!='':
|
||||
strTitle+= f" - 备注:{comment}" + '\n'
|
||||
|
||||
markdown_table = "|"
|
||||
# 添加列标题
|
||||
markdown_table += "|".join(columns) + "|\n"
|
||||
# 添加分隔行
|
||||
markdown_table += "|" + "|".join(['---' for _ in columns]) + "|\n"
|
||||
# 遍历每个数据行
|
||||
for row in rowdatas:
|
||||
# 添加数据行
|
||||
markdown_table += "|" + "|".join(row) + "|\n"
|
||||
return strTitle + "\n" + markdown_table
|
||||
|
||||
|
||||
prjSon = ProjectJson('')
|
||||
prjSon.parse()
|
||||
tables = prjSon.tables()
|
||||
for name,table in tables.items():
|
||||
mdObj = MarkDown(table,f'')
|
||||
mdObj.build()
|
||||
@@ -0,0 +1,89 @@
|
||||
from llama_index.readers.file.markdown import MarkdownReader
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
import re
|
||||
from llama_index.core.utils import get_tokenizer
|
||||
|
||||
|
||||
class ChunkMarkdownReader(MarkdownReader):
|
||||
def __init__(
|
||||
self,
|
||||
*args: Any,
|
||||
chunkSize:int = 2048,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
self._chunkSize = chunkSize
|
||||
self._tokenizer = get_tokenizer()
|
||||
self._colheader = ''
|
||||
self._rows = []
|
||||
super().__init__(*args,**kwargs)
|
||||
|
||||
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
|
||||
markdown_tups: List[Tuple[Optional[str], str]] = []
|
||||
lines = markdown_text.split("\n")
|
||||
|
||||
strTitle = ''
|
||||
tokensNum:int = 0
|
||||
current_lines = []
|
||||
strheader:str = ''
|
||||
headerSize:int = 0
|
||||
for line in lines:
|
||||
tokensNum += self._token_size(line)
|
||||
if tokensNum > self._chunkSize and len(current_lines) > 0:
|
||||
if len(markdown_tups) == 0:
|
||||
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
||||
else:
|
||||
markdown_tups.append((strheader , "\n".join(current_lines)))
|
||||
tokensNum = headerSize
|
||||
current_lines.clear()
|
||||
current_lines.append(line)
|
||||
if strTitle!='' and strheader!='':
|
||||
self._rows.append(line)
|
||||
|
||||
if line == '\n' or line == '\r':
|
||||
if tokensNum > self._chunkSize:
|
||||
raise ValueError('标题Token数大于chunkSize大小')
|
||||
strTitle = "\n".join(current_lines)
|
||||
#headerSize = headerSize + self._token_size(strTitle)
|
||||
current_lines.clear()
|
||||
|
||||
if line.startswith("|---"):
|
||||
self._colheader = current_lines[0]
|
||||
strheader = "\n".join(current_lines)
|
||||
headerSize= headerSize + self._token_size(strheader)
|
||||
current_lines.clear()
|
||||
|
||||
|
||||
if len(current_lines) > 0:
|
||||
if len(markdown_tups) == 0:
|
||||
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
||||
else:
|
||||
markdown_tups.append((strheader , "\n".join(current_lines)))
|
||||
|
||||
return [
|
||||
(
|
||||
key if key is None else re.sub(r"#", "", key).strip(),
|
||||
re.sub(r"<.*?>", "", value),
|
||||
)
|
||||
for key, value in markdown_tups
|
||||
]
|
||||
|
||||
def _token_size(self, text: str) -> int:
|
||||
return len(self._tokenizer(text))
|
||||
|
||||
def findValue(self,expression:str,Field:str):
|
||||
cols = self._colheader.split('|')
|
||||
cols = [item for item in cols if item]
|
||||
|
||||
for row in self._rows:
|
||||
rowtrs = row.split('|')
|
||||
rowdatas = [item for item in rowtrs if item and (item!='\r' or item!='\n')]
|
||||
if len(rowdatas) == 0:
|
||||
continue
|
||||
gData = {}
|
||||
for cName,rValue in zip(cols,rowdatas):
|
||||
gData[cName] = rValue
|
||||
if eval(expression,gData):
|
||||
return gData[Field]
|
||||
return ''
|
||||
|
||||
|
||||
@@ -0,0 +1,94 @@
|
||||
from typing import Dict,List,Any
|
||||
import json,os
|
||||
|
||||
class Record:
|
||||
def __init__(self,datas:Dict[str,Any]) -> None:
|
||||
self._datas:Dict[str,Any] = datas
|
||||
|
||||
def value(self,key:str):
|
||||
if key in self._datas:
|
||||
return self._datas.get(key)
|
||||
return ''
|
||||
|
||||
class Field:
|
||||
def __init__(self,datas:Dict[str,Any]) -> None:
|
||||
self._datas:Dict[str,Any] = datas
|
||||
|
||||
def value(self,key:str):
|
||||
if key in self._datas:
|
||||
return self._datas.get(key)
|
||||
return ''
|
||||
|
||||
class JsonTable:
|
||||
def __init__(self,filePth:str) -> None:
|
||||
self._filePth = filePth
|
||||
self._fields:Dict[str,Field] = {}
|
||||
self._records:List[Record] = []
|
||||
self._fileName = os.path.splitext(os.path.basename(filePth))[0]
|
||||
self._name = ''
|
||||
self._comment = ''
|
||||
|
||||
def parse(self):
|
||||
with open(self._filePth, 'r',encoding='utf-8') as file:
|
||||
jsObj = json.load(file)
|
||||
data:dict = jsObj.get('table')
|
||||
self._name = data.get('name')
|
||||
self._comment = data.get('comment')
|
||||
Jsfields = data.get('fields')
|
||||
for jsfiled in Jsfields:
|
||||
field = Field(jsfiled)
|
||||
self._fields[field.value('name')] =field
|
||||
|
||||
JsRecords = data.get('records')
|
||||
for jsRecord in JsRecords:
|
||||
self._records.append(Record(jsRecord))
|
||||
|
||||
def records(self):
|
||||
return self._records
|
||||
|
||||
def fields(self):
|
||||
return self._fields
|
||||
|
||||
def name(self):
|
||||
return self._fileName
|
||||
|
||||
def comment(self):
|
||||
return self._comment
|
||||
|
||||
|
||||
class ProjectJson:
|
||||
def __init__(self,dir:str) -> None:
|
||||
self._dir = dir
|
||||
self._tables:Dict[str,JsonTable] = {}
|
||||
|
||||
def parse(self):
|
||||
json_files = [f for f in os.listdir(self._dir) if f.endswith('.json')]
|
||||
for json_file in json_files:
|
||||
prjPath = os.path.join(self._dir, json_file)
|
||||
tb = JsonTable(prjPath)
|
||||
tb.parse()
|
||||
basename = os.path.splitext(json_file)[0]
|
||||
self._tables[basename] = tb
|
||||
|
||||
def table(self,tableName:str):
|
||||
return self._tables[tableName]
|
||||
|
||||
def tables(self):
|
||||
return self._tables
|
||||
|
||||
def getProjectName(dir:str):
|
||||
result = dir.split('\\')
|
||||
if len(result) > 0:
|
||||
return result[-1]
|
||||
return "未知工程名称"
|
||||
|
||||
prjJson = ProjectJson(dir)
|
||||
prjJson.parse()
|
||||
tb:JsonTable = prjJson.table('工程属性')
|
||||
records = tb.records()
|
||||
for record in records:
|
||||
name = record.value('名称')
|
||||
if name == '工程名称':
|
||||
return record.value('值')
|
||||
return ''
|
||||
|
||||
@@ -2,39 +2,31 @@ from llama_index.core import PromptTemplate
|
||||
|
||||
text_qa_template_str = (
|
||||
"# 角色\n"
|
||||
"你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。"
|
||||
"你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答,"
|
||||
"如同直接从文件中提取的内容。\n"
|
||||
"知识库中已经导入一个工程的全部数据,请你站在当前工程的角度回答用户关于工程文件的问题。\n"
|
||||
"例如:询问“此工程”指当前导入的工程。询问“此工程名称”指当前导入的工程的工程名称。\n"
|
||||
|
||||
"## 技能\n"
|
||||
"### 技能 1: 数据查询与提供\n"
|
||||
"- 准确回答所有关于电力工程造价的相关问题。\n"
|
||||
"- 提供具体数据,如成本估算、材料清单、劳动力需求等。\n"
|
||||
"- 确保提供的信息严格基于工程文档中的记录。\n"
|
||||
|
||||
"### 技能 2: 技术性解释\n"
|
||||
"- 解释造价工程中的技术术语和概念。\n"
|
||||
"- 为复杂的工程细节提供清晰易懂的说明。\n"
|
||||
|
||||
"## 约束\n"
|
||||
"- 仅回答与电力工程造价文件相关的具体问题。\n"
|
||||
"- 不进行任何超出文件内容的猜测或假设。\n"
|
||||
"- 所有回答均基于文件内容,采用客观和技术性的语言。\n"
|
||||
"- 请基于这些信息回答问题。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。\n"
|
||||
"以下为上下文信息\n"
|
||||
"你是一名擅长博微造价工程数据问答的专家,可以根据电力工程文件中的内容回答用户问题。\n"
|
||||
"\n"
|
||||
"# 任务描述:\n"
|
||||
"请仔细阅读所给的文档片段,并根据其内容回答问题。\n"
|
||||
"您需要判断文档的内容是否可以回答问题,不要强行回答。如果可以回答,答案必须严格遵循文档内容,即使与事实不符。\n"
|
||||
"如果答案与事实不符,直接给出答案,不要做解释。\n"
|
||||
"\n"
|
||||
"# 回答规则:\n"
|
||||
"- 请使用与文档材料相同的语言回答问题。\n"
|
||||
"- 评估文档是否含有足够信息回答问题。无关时不要回答。\n"
|
||||
"- 如果问题能被回答,你的回答必须严格遵循文档内容,即使与事实不符。一定不要做多余解释。\n"
|
||||
"- 如果问题能被回答,直接引用文档的相关信息保证答案准确、完整,并追求简洁。\n"
|
||||
"- 当文档中只有少量信息与问题相关时,重点关注这部分信息,这种情况下一定回答。\n"
|
||||
"- 当文档中信息与问题无关时,请不要额外发散回答,只需要回答“我不知道这个问题的答案”。\n"
|
||||
"\n"
|
||||
"来自多个来源的文档片段如下,请充分理解以下参考资料内容,组织出满足用户提问的条理清晰的回复。\n"
|
||||
"---------------------\n"
|
||||
"{context_str}\n"
|
||||
"---------------------\n"
|
||||
"请根据上下文信息而非先前知识回答我的问题或回复我的指令。前面的上下文信息可能有用,也可能没用,你需要从我给出的上下文信息中选出与我的问题最相关的那些,来为你的回答提供依据。回答一定要忠于原文,简洁但不丢信息,不要胡乱编造。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。我的问题或指令是什么语种,你就用什么语种回复。\n"
|
||||
"鉴于来自多个来源的文档片段而非先验知识,回答查询。\n"
|
||||
"如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
|
||||
|
||||
"问题:{query_str}\n"
|
||||
"你的回复: "
|
||||
"Query: {query_str}\n"
|
||||
"Answer: "
|
||||
)
|
||||
|
||||
|
||||
text_qa_template = PromptTemplate(text_qa_template_str)
|
||||
|
||||
refine_template_str = (
|
||||
@@ -58,31 +50,26 @@ refine_template = PromptTemplate(refine_template_str)
|
||||
|
||||
summary_template_str = (
|
||||
"# 角色\n"
|
||||
"你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。"
|
||||
"你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答,"
|
||||
"如同直接从文件中提取的内容。\n"
|
||||
|
||||
"## 技能\n"
|
||||
"### 技能 1: 数据查询与提供\n"
|
||||
"- 准确回答所有关于电力工程造价的相关问题。\n"
|
||||
"- 提供具体数据,如成本估算、材料清单、劳动力需求等。\n"
|
||||
"- 确保提供的信息严格基于工程文档中的记录。\n"
|
||||
|
||||
"### 技能 2: 技术性解释\n"
|
||||
"- 解释造价工程中的技术术语和概念。\n"
|
||||
"- 为复杂的工程细节提供清晰易懂的说明。\n"
|
||||
|
||||
"## 约束\n"
|
||||
"- 仅回答与电力工程造价文件相关的具体问题。\n"
|
||||
"- 不进行任何超出文件内容的猜测或假设。\n"
|
||||
"- 所有回答均基于文件内容,采用客观和技术性的语言。\n"
|
||||
"- 请基于这些信息回答问题。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。\n"
|
||||
"来自多个来源的上下文信息如下。\n"
|
||||
"你是一名擅长博微造价工程数据问答的专家,可以根据电力工程文件中的内容回答用户问题。\n"
|
||||
"\n"
|
||||
"# 任务描述:\n"
|
||||
"请仔细阅读所给的文档片段,并根据其内容回答问题。\n"
|
||||
"您需要判断文档的内容是否可以回答问题,不要强行回答。如果可以回答,答案必须严格遵循文档内容,即使与事实不符。\n"
|
||||
"如果答案与事实不符,直接给出答案,不要做解释。\n"
|
||||
"\n"
|
||||
"# 回答规则:\n"
|
||||
"- 请使用与文档材料相同的语言回答问题。\n"
|
||||
"- 评估文档是否含有足够信息回答问题。无关时不要回答。\n"
|
||||
"- 如果问题能被回答,你的回答必须严格遵循文档内容,即使与事实不符。一定不要做多余解释。\n"
|
||||
"- 如果问题能被回答,直接引用文档的相关信息保证答案准确、完整,并追求简洁。\n"
|
||||
"- 当文档中只有少量信息与问题相关时,重点关注这部分信息,这种情况下一定回答。\n"
|
||||
"- 当文档中信息与问题无关时,请不要额外发散回答,只需要回答“我不知道这个问题的答案”。\n"
|
||||
"\n"
|
||||
"来自多个来源的文档片段如下,请充分理解以下参考资料内容,组织出满足用户提问的条理清晰的回复。\n"
|
||||
"---------------------\n"
|
||||
"{context_str}\n"
|
||||
"---------------------\n"
|
||||
"鉴于来自多个来源的信息而非先验知识, "
|
||||
"回答查询。\n"
|
||||
"鉴于来自多个来源的文档片段而非先验知识,回答查询。\n"
|
||||
"如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
|
||||
"Query: {query_str}\n"
|
||||
"Answer: "
|
||||
@@ -93,3 +80,40 @@ simple_template_str = (
|
||||
"{query_str}"
|
||||
)
|
||||
simple_template = PromptTemplate(simple_template_str)
|
||||
|
||||
ReActChatFormatter_messages = (
|
||||
"您的设计旨在帮助完成各种任务,从回答问题到提供其他类型分析的摘要。\n\n"
|
||||
"##工具\n\n"
|
||||
"你可以访问各种工具。你有责任按照你认为合适的顺序使用这些工具来完成当前的任务。\n"
|
||||
"这可能需要将任务分解为子任务,并使用不同的工具来完成每个子任务。\n\n"
|
||||
"你可以访问以下工具:\n"
|
||||
"{tool_desc}\n\n\n"
|
||||
"##输出格式\n\n"
|
||||
"请用与问题相同的语言回答,并使用以下格式:\n\n"
|
||||
"'''\n"
|
||||
"Thought: 用户当前的语言是:(user's language)。我需要使用工具来帮助我回答问题。\n"
|
||||
"Action: 如果使用工具,则为工具名称(one of {tool_names})。\n"
|
||||
"Action Input: 输入给工具的内容,使用JSON格式表示kwargs(例如{{\"input\": \"hello world\", \"num_beams\": 5}})\n"
|
||||
"'''\n\n"
|
||||
"请始终以Thought开始。\n\n"
|
||||
"切勿用Markdown代码标记包围你的响应。如果需要,可以在响应中使用代码标记。\n\n"
|
||||
"请为Action Input使用有效的JSON格式。不要这样做{{\'input\': \'hello world\', \'num_beams\': 5}}。\n\n"
|
||||
"如果使用此格式,用户将以下面的格式进行回应:\n\n"
|
||||
"'''\n"
|
||||
"Observation: 工具响应\n"
|
||||
"'''\n\n"
|
||||
"你应该继续重复上述格式,直到你有足够的信息来回答问题而无需使用更多工具。此时,你必须使用以下两种格式之一进行回答:\n\n"
|
||||
"'''\nThought: 我可以不用任何工具来回答。我将使用用户的语言来回答。\n"
|
||||
"Answer: [你的答案(与用户问题相同的语言)]\n"
|
||||
"'''\n\n"
|
||||
"'''\n"
|
||||
"Thought: 我无法使用提供的工具回答问题。\n"
|
||||
"Answer: [你的答案(与用户问题相同的语言)]\n"
|
||||
"'''\n\n##如果从工具中得到的回应是Empty Response,那么只需要回答“我不知道”,不需要额外回答别的内容。## 当前对话\n\n"
|
||||
"以下是当前对话,由人类和助手的消息交替组成。\n"
|
||||
)
|
||||
|
||||
|
||||
summary_query_tool_messages = "适用于任何需要进行全面总结、概括的要求。"
|
||||
query_engine_tool_messages = "适用于回答任何问题。"
|
||||
tree_summary_query_engine_tool_messages = "在询问工程中单位的具体数值,例如用量,费率,合计,金额等的时候建议使用本工具。"
|
||||
|
||||
@@ -0,0 +1,70 @@
|
||||
from typing import Any, List, Optional
|
||||
from llama_index.core.postprocessor import SentenceTransformerRerank
|
||||
from llama_index.core.schema import MetadataMode, NodeWithScore, QueryBundle
|
||||
from llama_index.core.callbacks import CBEventType, EventPayload
|
||||
from llama_index.core.bridge.pydantic import PrivateAttr
|
||||
|
||||
class OllamaRerank(SentenceTransformerRerank):
|
||||
_score_threshold: float = PrivateAttr()
|
||||
def __init__(
|
||||
self,
|
||||
top_n: int = 2,
|
||||
model: str = "cross-encoder/stsb-distilroberta-base",
|
||||
device: Optional[str] = None,
|
||||
keep_retrieval_score: Optional[bool] = False,
|
||||
score_threshold:float = 0.3
|
||||
):
|
||||
self._score_threshold = score_threshold
|
||||
super().__init__(top_n,model,device,keep_retrieval_score)
|
||||
|
||||
@classmethod
|
||||
def class_name(cls) -> str:
|
||||
return "OllamaRerank"
|
||||
|
||||
def _postprocess_nodes(
|
||||
self,
|
||||
nodes: List[NodeWithScore],
|
||||
query_bundle: Optional[QueryBundle] = None,
|
||||
) -> List[NodeWithScore]:
|
||||
if query_bundle is None:
|
||||
raise ValueError("Missing query bundle in extra info.")
|
||||
if len(nodes) == 0:
|
||||
return []
|
||||
|
||||
query_and_nodes = [
|
||||
(
|
||||
query_bundle.query_str,
|
||||
node.node.get_content(metadata_mode=MetadataMode.EMBED),
|
||||
)
|
||||
for node in nodes
|
||||
]
|
||||
|
||||
with self.callback_manager.event(
|
||||
CBEventType.RERANKING,
|
||||
payload={
|
||||
EventPayload.NODES: nodes,
|
||||
EventPayload.MODEL_NAME: self.model,
|
||||
EventPayload.QUERY_STR: query_bundle.query_str,
|
||||
EventPayload.TOP_K: self.top_n,
|
||||
},
|
||||
) as event:
|
||||
scores = self._model.predict(query_and_nodes)
|
||||
|
||||
assert len(scores) == len(nodes)
|
||||
|
||||
for node, score in zip(nodes, scores):
|
||||
if self.keep_retrieval_score:
|
||||
node.node.metadata["retrieval_score"] = node.score
|
||||
node.score = score
|
||||
|
||||
for i in range(len(nodes)-1,-1,-1):
|
||||
node = nodes[i]
|
||||
if node.score < self._score_threshold:
|
||||
nodes.remove(node)
|
||||
|
||||
new_nodes = sorted(nodes, key=lambda x: -x.score if x.score else 0)[
|
||||
: self.top_n
|
||||
]
|
||||
event.on_end(payload={EventPayload.NODES: new_nodes})
|
||||
|
||||
return new_nodes
|
||||
@@ -0,0 +1,234 @@
|
||||
from llama_index.core.response_synthesizers.tree_summarize import TreeSummarize
|
||||
from typing import Any, Optional, Sequence,List
|
||||
import asyncio
|
||||
from llama_index.core.callbacks.base import CallbackManager
|
||||
from llama_index.core.indices.prompt_helper import PromptHelper
|
||||
from llama_index.core.prompts import BasePromptTemplate
|
||||
from llama_index.core.service_context import ServiceContext
|
||||
from llama_index.core.service_context_elements.llm_predictor import LLMPredictorType
|
||||
from llama_index.core.types import BaseModel,RESPONSE_TEXT_TYPE
|
||||
from llama_index.core.async_utils import run_async_tasks
|
||||
from llama_index.core.utils import get_tokenizer
|
||||
from llama_index.core.prompts.prompt_utils import get_empty_prompt_txt
|
||||
|
||||
class CustomTreeResponse(TreeSummarize):
|
||||
def __init__(
|
||||
self,
|
||||
llm: Optional[LLMPredictorType] = None,
|
||||
callback_manager: Optional[CallbackManager] = None,
|
||||
prompt_helper: Optional[PromptHelper] = None,
|
||||
summary_template: Optional[BasePromptTemplate] = None,
|
||||
output_cls: Optional[BaseModel] = None,
|
||||
streaming: bool = False,
|
||||
use_async: bool = False,
|
||||
verbose: bool = False,
|
||||
service_context: Optional[ServiceContext] = None,
|
||||
) -> None:
|
||||
self._tokenizer = get_tokenizer()
|
||||
super().__init__(llm,callback_manager,prompt_helper,summary_template,output_cls
|
||||
,streaming,use_async,verbose,service_context)
|
||||
|
||||
async def aget_response(
|
||||
self,
|
||||
query_str: str,
|
||||
text_chunks: Sequence[str],
|
||||
**response_kwargs: Any,
|
||||
) -> RESPONSE_TEXT_TYPE:
|
||||
"""Get tree summarize response."""
|
||||
summary_template = self._summary_template.partial_format(query_str=query_str)
|
||||
|
||||
text_chunks = self.repack(text_chunks=text_chunks)
|
||||
|
||||
if self._verbose:
|
||||
print(f"{len(text_chunks)} text chunks after repacking")
|
||||
|
||||
|
||||
# give final response if there is only one chunk
|
||||
if len(text_chunks) == 1:
|
||||
response: RESPONSE_TEXT_TYPE
|
||||
if self._streaming:
|
||||
response = await self._llm.astream(
|
||||
summary_template, context_str=text_chunks[0], **response_kwargs
|
||||
)
|
||||
else:
|
||||
if self._output_cls is None:
|
||||
response = await self._llm.apredict(
|
||||
summary_template,
|
||||
context_str=text_chunks[0],
|
||||
**response_kwargs,
|
||||
)
|
||||
else:
|
||||
response = await self._llm.astructured_predict(
|
||||
self._output_cls,
|
||||
summary_template,
|
||||
context_str=text_chunks[0],
|
||||
**response_kwargs,
|
||||
)
|
||||
|
||||
# return pydantic object if output_cls is specified
|
||||
return response
|
||||
|
||||
else:
|
||||
# summarize each chunk
|
||||
if self._output_cls is None:
|
||||
tasks = [
|
||||
self._llm.apredict(
|
||||
summary_template,
|
||||
context_str=text_chunk,
|
||||
**response_kwargs,
|
||||
)
|
||||
for text_chunk in text_chunks
|
||||
]
|
||||
else:
|
||||
tasks = [
|
||||
self._llm.astructured_predict(
|
||||
self._output_cls,
|
||||
summary_template,
|
||||
context_str=text_chunk,
|
||||
**response_kwargs,
|
||||
)
|
||||
for text_chunk in text_chunks
|
||||
]
|
||||
|
||||
summary_responses = await asyncio.gather(*tasks)
|
||||
if self._output_cls is not None:
|
||||
summaries = [summary.json() for summary in summary_responses]
|
||||
else:
|
||||
summaries = summary_responses
|
||||
|
||||
# recursively summarize the summaries
|
||||
return await self.aget_response(
|
||||
query_str=query_str,
|
||||
text_chunks=summaries,
|
||||
**response_kwargs,
|
||||
)
|
||||
|
||||
def get_response(
|
||||
self,
|
||||
query_str: str,
|
||||
text_chunks: Sequence[str],
|
||||
**response_kwargs: Any,
|
||||
) -> RESPONSE_TEXT_TYPE:
|
||||
"""Get tree summarize response."""
|
||||
summary_template = self._summary_template.partial_format(query_str=query_str)
|
||||
text_chunks = self.repack(text_chunks=text_chunks)
|
||||
|
||||
if self._verbose:
|
||||
print(f"{len(text_chunks)} text chunks after repacking")
|
||||
|
||||
# give final response if there is only one chunk
|
||||
if len(text_chunks) == 1:
|
||||
response: RESPONSE_TEXT_TYPE
|
||||
if self._streaming:
|
||||
response = self._llm.stream(
|
||||
summary_template, context_str=text_chunks[0], **response_kwargs
|
||||
)
|
||||
else:
|
||||
if self._output_cls is None:
|
||||
response = self._llm.predict(
|
||||
summary_template,
|
||||
context_str=text_chunks[0],
|
||||
**response_kwargs,
|
||||
)
|
||||
else:
|
||||
response = self._llm.structured_predict(
|
||||
self._output_cls,
|
||||
summary_template,
|
||||
context_str=text_chunks[0],
|
||||
**response_kwargs,
|
||||
)
|
||||
|
||||
return response
|
||||
|
||||
else:
|
||||
# summarize each chunk
|
||||
if self._use_async:
|
||||
if self._output_cls is None:
|
||||
tasks = [
|
||||
self._llm.apredict(
|
||||
summary_template,
|
||||
context_str=text_chunk,
|
||||
**response_kwargs,
|
||||
)
|
||||
for text_chunk in text_chunks
|
||||
]
|
||||
else:
|
||||
tasks = [
|
||||
self._llm.astructured_predict(
|
||||
self._output_cls,
|
||||
summary_template,
|
||||
context_str=text_chunk,
|
||||
**response_kwargs,
|
||||
)
|
||||
for text_chunk in text_chunks
|
||||
]
|
||||
|
||||
summary_responses = run_async_tasks(tasks)
|
||||
|
||||
if self._output_cls is not None:
|
||||
summaries = [summary.json() for summary in summary_responses]
|
||||
else:
|
||||
summaries = summary_responses
|
||||
else:
|
||||
if self._output_cls is None:
|
||||
summaries = [
|
||||
self._llm.predict(
|
||||
summary_template,
|
||||
context_str=text_chunk,
|
||||
**response_kwargs,
|
||||
)
|
||||
for text_chunk in text_chunks
|
||||
]
|
||||
else:
|
||||
summaries = [
|
||||
self._llm.structured_predict(
|
||||
self._output_cls,
|
||||
summary_template,
|
||||
context_str=text_chunk,
|
||||
**response_kwargs,
|
||||
)
|
||||
for text_chunk in text_chunks
|
||||
]
|
||||
summaries = [summary.json() for summary in summaries]
|
||||
|
||||
# recursively summarize the summaries
|
||||
return self.get_response(
|
||||
query_str=query_str, text_chunks=summaries, **response_kwargs
|
||||
)
|
||||
|
||||
def repack( self,text_chunks: Sequence[str],) ->List[str]:
|
||||
prompt_str = get_empty_prompt_txt(self._summary_template)
|
||||
num_prompt_tokens = self._token_size(prompt_str)
|
||||
avaliableSize = self._get_available_context_size(num_prompt_tokens)
|
||||
ava_chunks = []
|
||||
sumSize = 0
|
||||
results = []
|
||||
for text_chunk in text_chunks:
|
||||
one_chunk_size = self._token_size(text_chunk)
|
||||
if one_chunk_size > avaliableSize:
|
||||
raise ValueError("文本块大小大于可用上下文大小")
|
||||
sumSize = sumSize + one_chunk_size
|
||||
if sumSize > avaliableSize:
|
||||
results.append(self._merge_chunks(ava_chunks))
|
||||
ava_chunks.clear()
|
||||
sumSize = 0
|
||||
ava_chunks.append(text_chunk)
|
||||
if len(ava_chunks) > 0:
|
||||
results.append(self._merge_chunks(ava_chunks))
|
||||
return results
|
||||
|
||||
def _get_available_context_size(self, num_prompt_tokens: int) -> int:
|
||||
llm_metadata = self._llm.metadata
|
||||
context_size_tokens = llm_metadata.context_window - num_prompt_tokens - llm_metadata.num_output
|
||||
if context_size_tokens < 0:
|
||||
raise ValueError(
|
||||
f"Calculated available context size {context_size_tokens} was"
|
||||
" not non-negative."
|
||||
)
|
||||
return context_size_tokens
|
||||
|
||||
def _token_size(self, text: str) -> int:
|
||||
return len(self._tokenizer(text))
|
||||
|
||||
def _merge_chunks(self,ava_chunks:list):
|
||||
return "\n\n".join([c.strip() for c in ava_chunks if c.strip()])
|
||||
@@ -24,13 +24,15 @@ class HybridRetriever(BaseRetriever):
|
||||
self._vecRetriever = vector_index.as_retriever(
|
||||
similarity_top_k=similarity_top_k,filters = filters
|
||||
)
|
||||
|
||||
self._bm25Retriever = None
|
||||
STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
|
||||
if os.path.exists(STORAGE_DIR) and len(os.listdir(STORAGE_DIR)) > 0:
|
||||
self._bm25Retriever = CHBM25Retriever.from_persist_dir(STORAGE_DIR)
|
||||
else:
|
||||
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=similarity_top_k,nodes=self._vector_index.vector_store.get_nodes(None))
|
||||
bmRetriver.persist(STORAGE_DIR)
|
||||
nodes = self._vector_index.vector_store.get_nodes(None)
|
||||
similarity_top_k = min(len(nodes),similarity_top_k)
|
||||
self._bm25Retriever = CHBM25Retriever.from_defaults(similarity_top_k=similarity_top_k,nodes=nodes)
|
||||
self._bm25Retriever.persist(STORAGE_DIR)
|
||||
self._alpha = alpha
|
||||
|
||||
|
||||
@@ -43,6 +45,16 @@ class HybridRetriever(BaseRetriever):
|
||||
for node in bmNodes:
|
||||
bmDic[node.node_id] = node
|
||||
|
||||
vecScores = [node_with_score.score for node_with_score in vecNodes]
|
||||
bmSores = [node_with_score.score for node_with_score in bmNodes]
|
||||
|
||||
vec_min_score = min(vecScores) if len(vecScores) > 0 else 0
|
||||
vec_max_score = max(vecScores) if len(vecScores) > 0 else 0
|
||||
|
||||
bm_min_score = min(bmSores) if len(bmSores) > 0 else 0
|
||||
bm_max_score = max(bmSores) if len(bmSores) > 0 else 0
|
||||
|
||||
|
||||
result_tups = []
|
||||
for i in range(len(vecNodes)):
|
||||
node = vecNodes[i]
|
||||
@@ -52,7 +64,11 @@ class HybridRetriever(BaseRetriever):
|
||||
bmDic.pop(node.node_id)
|
||||
else:
|
||||
bmScore = 0.0
|
||||
full_similarity = (self._alpha * node.score) + (
|
||||
|
||||
bmScore = self.normal_score(bmScore,bm_min_score,bm_max_score)
|
||||
vecScore = self.normal_score(node.score,vec_min_score,vec_max_score)
|
||||
|
||||
full_similarity = (self._alpha * vecScore) + (
|
||||
(1 - self._alpha) * bmScore
|
||||
)
|
||||
result_tups.append((full_similarity, node))
|
||||
@@ -64,4 +80,10 @@ class HybridRetriever(BaseRetriever):
|
||||
result_tups = sorted(result_tups, key=lambda x: x[0], reverse=True)
|
||||
for full_score, node in result_tups:
|
||||
node.score = full_score
|
||||
return [n for _, n in result_tups][:self._out_top_k]
|
||||
return [n for _, n in result_tups][:self._out_top_k]
|
||||
|
||||
def normal_score(self,score,min,max):
|
||||
if min == max:
|
||||
return 1.0 if score > 0 else 0.0
|
||||
else:
|
||||
return (score - min) / (max - min)
|
||||
@@ -5,12 +5,13 @@ from qdrant_client import qdrant_client
|
||||
|
||||
qclient = None
|
||||
|
||||
def get_qdrant_vector_store():
|
||||
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
|
||||
def get_qdrant_vector_store(docType:str):
|
||||
collection_name = docType
|
||||
vector_store_path = os.getenv("VECTOR_STORE_PATH")
|
||||
host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"),
|
||||
port=int(os.getenv("VECTOR_STORE_PORT", "6333")),
|
||||
|
||||
vector_store_path =os.path.join(vector_store_path,docType)
|
||||
if not vector_store_path or not host:
|
||||
raise ValueError(
|
||||
"Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
|
||||
@@ -32,9 +33,9 @@ def get_qdrant_vector_store():
|
||||
vector_store = QdrantVectorStore(client=qclient, collection_name=collection_name)
|
||||
return vector_store
|
||||
|
||||
def get_chroma_vector_store():
|
||||
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
|
||||
vector_store_path = os.getenv("VECTOR_STORE_PATH")
|
||||
def get_chroma_vector_store(docType:str):
|
||||
collection_name = docType
|
||||
vector_store_path =os.path.join(os.getenv("VECTOR_STORE_PATH"),docType)
|
||||
# if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path
|
||||
# otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
|
||||
if vector_store_path:
|
||||
@@ -55,16 +56,16 @@ def get_chroma_vector_store():
|
||||
)
|
||||
return store
|
||||
|
||||
def get_vector_store():
|
||||
def get_vector_store(docType:str):
|
||||
store_type=os.getenv("VECTOR_STORE_TYPE")
|
||||
|
||||
store = None
|
||||
|
||||
match store_type:
|
||||
case "chroma":
|
||||
store = get_chroma_vector_store()
|
||||
store = get_chroma_vector_store(docType)
|
||||
case "qdrant":
|
||||
store = get_qdrant_vector_store()
|
||||
store = get_qdrant_vector_store(docType)
|
||||
case _:
|
||||
raise ValueError(f"Invalid vector store type: {store_type}")
|
||||
|
||||
|
||||
+309
-208
@@ -1,234 +1,335 @@
|
||||
import os
|
||||
from typing import Dict
|
||||
|
||||
from abc import abstractmethod
|
||||
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||
from llama_index.core.settings import Settings
|
||||
from app.xinference.base import XinferenceEmbedding, XinferenceRerank
|
||||
from llama_index.embeddings.xinference import XinferenceEmbedding
|
||||
from llama_index.llms.xinference import Xinference
|
||||
#from llama_index.embeddings.xinference import XinferenceEmbedding
|
||||
from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP
|
||||
from llama_index.postprocessor.xinference_rerank import XinferenceRerank
|
||||
|
||||
from app.engine.loaders import getProjectInfos
|
||||
from app.api.routers.request.base import ProjectInfo
|
||||
from modelProvide.customDashScope import CustomDashScope
|
||||
from util.register import *
|
||||
from llama_index.core.callbacks import CallbackManager
|
||||
|
||||
|
||||
def get_node_postprocessors():
|
||||
rerank_enabled = os.getenv("RERANK_ENABLED").title()
|
||||
if rerank_enabled is None or rerank_enabled == 'False':
|
||||
return []
|
||||
|
||||
rerank_model = os.getenv("RERANK_MODEL")
|
||||
rerank_url = os.getenv("RERANK_BASE_URL")
|
||||
rerank_top_n = os.getenv("RERANK_TOP_N")
|
||||
rerank_threshold = os.getenv("RERANK_THRESHOLD")
|
||||
postprocess = None
|
||||
if rerank_model is not None:
|
||||
postprocess = [XinferenceRerank(rerank_model, rerank_url, top_n=rerank_top_n, threshold=rerank_threshold)]
|
||||
return postprocess
|
||||
ModelPlateCategory = '模型平台'
|
||||
|
||||
def init_settings():
|
||||
model_provider = os.getenv("MODEL_PROVIDER")
|
||||
match model_provider:
|
||||
case "openai":
|
||||
init_openai()
|
||||
case "dashscope":
|
||||
init_dashscope()
|
||||
case "groq":
|
||||
init_groq()
|
||||
case "ollama":
|
||||
init_ollama()
|
||||
case "anthropic":
|
||||
init_anthropic()
|
||||
case "gemini":
|
||||
init_gemini()
|
||||
case "mistral":
|
||||
init_mistral()
|
||||
case "azure-openai":
|
||||
init_azure_openai()
|
||||
case "t-systems":
|
||||
from .llmhub import init_llmhub
|
||||
init_llmhub()
|
||||
case "xinference":
|
||||
init_xinference()
|
||||
case _:
|
||||
raise ValueError(f"Invalid model provider: {model_provider}")
|
||||
modelPaltCls:ModelPlatform = ClsRegister.get(ModelPlateCategory,model_provider)
|
||||
if modelPaltCls is not None:
|
||||
modelPalt:ModelPlatform = modelPaltCls()
|
||||
Settings.llm = modelPalt.model()
|
||||
else:
|
||||
raise ValueError(f"Invalid model provider: {model_provider}")
|
||||
|
||||
embedding_provider = os.getenv("EMBEDDING_PROVIDER")
|
||||
modelPaltCls:ModelPlatform = ClsRegister.get(ModelPlateCategory,embedding_provider)
|
||||
if modelPalt is not None:
|
||||
modelPalt:ModelPlatform = modelPaltCls()
|
||||
Settings.embed_model = modelPalt.embedding()
|
||||
else:
|
||||
raise ValueError(f"Invalid embedding provider: {embedding_provider}")
|
||||
|
||||
Settings.llm.callback_manager = CallbackManager()
|
||||
Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
|
||||
Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
|
||||
|
||||
class ModelPlatform:
|
||||
@abstractmethod
|
||||
def model(self):
|
||||
pass
|
||||
|
||||
def init_ollama():
|
||||
# from llama_index.embeddings.ollama import OllamaEmbedding
|
||||
# from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama
|
||||
#
|
||||
# base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
|
||||
# request_timeout = float(
|
||||
# os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT)
|
||||
# )
|
||||
# Settings.embed_model = OllamaEmbedding(
|
||||
# base_url=base_url,
|
||||
# model_name=os.getenv("EMBEDDING_MODEL"),
|
||||
# )
|
||||
# Settings.llm = Ollama(
|
||||
# base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout
|
||||
# )
|
||||
pass
|
||||
@abstractmethod
|
||||
def embedding(self):
|
||||
pass
|
||||
|
||||
def init_xinference():
|
||||
base_url = os.getenv("BASE_URL")
|
||||
model = os.getenv("MODEL")
|
||||
max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None
|
||||
temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP))
|
||||
@abstractmethod
|
||||
def rerank(self):
|
||||
pass
|
||||
|
||||
Settings.llm = Xinference(model, base_url, temperature, max_tokens)
|
||||
@register(ModelPlateCategory,'ollama')
|
||||
class OllamaPlatform(ModelPlatform):
|
||||
def model(self):
|
||||
from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama
|
||||
|
||||
embedding_base_url = os.getenv("EMBEDDING_BASE_URL")
|
||||
embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url
|
||||
base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
|
||||
request_timeout = float(
|
||||
os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT)
|
||||
)
|
||||
Settings.llm = Ollama(
|
||||
base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout
|
||||
)
|
||||
pass
|
||||
|
||||
embed_model_name = os.getenv("EMBEDDING_MODEL")
|
||||
dimensions = os.getenv("EMBEDDING_DIM")
|
||||
dimensions = int(dimensions) if dimensions is not None else None
|
||||
Settings.embed_model = XinferenceEmbedding(embed_model_name, embedding_base_url, dimensions=dimensions)
|
||||
def embedding(self):
|
||||
#from llama_index.embeddings.ollama import OllamaEmbedding
|
||||
# base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
|
||||
# Settings.embed_model = OllamaEmbedding(
|
||||
# base_url=base_url,
|
||||
# model_name=os.getenv("EMBEDDING_MODEL"),
|
||||
# )
|
||||
pass
|
||||
|
||||
def init_openai():
|
||||
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||
from llama_index.embeddings.openai import OpenAIEmbedding
|
||||
from llama_index.llms.openai import OpenAI
|
||||
|
||||
max_tokens = os.getenv("LLM_MAX_TOKENS")
|
||||
config = {
|
||||
"model": os.getenv("MODEL"),
|
||||
"temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
|
||||
"max_tokens": int(max_tokens) if max_tokens is not None else None,
|
||||
}
|
||||
Settings.llm = OpenAI(**config)
|
||||
|
||||
dimensions = os.getenv("EMBEDDING_DIM")
|
||||
config = {
|
||||
"model": os.getenv("EMBEDDING_MODEL"),
|
||||
"dimensions": int(dimensions) if dimensions is not None else None,
|
||||
}
|
||||
Settings.embed_model = OpenAIEmbedding(**config)
|
||||
|
||||
def init_dashscope():
|
||||
from llama_index.llms.dashscope import DashScope,DashScopeGenerationModels
|
||||
from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeBatchTextEmbeddingModels,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels
|
||||
|
||||
max_tokens = os.getenv("LLM_MAX_TOKENS")
|
||||
config = {
|
||||
"model": os.getenv("MODEL"),
|
||||
"temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
|
||||
"max_tokens": int(max_tokens) if max_tokens is not None else None,
|
||||
}
|
||||
Settings.llm = llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX)
|
||||
|
||||
dimensions = os.getenv("EMBEDDING_DIM")
|
||||
config = {
|
||||
"model": os.getenv("EMBEDDING_MODEL"),
|
||||
"dimensions": int(dimensions) if dimensions is not None else None,
|
||||
}
|
||||
Settings.embed_model = DashScopeEmbedding(model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
|
||||
text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY)
|
||||
def rerank(self):
|
||||
from app.engine.rerank.ollamRerank import OllamaRerank
|
||||
modelpath = os.getcwd() + os.getenv('RERANK_MODEL')
|
||||
top_n = os.getenv('RERANK_TOP_N',5)
|
||||
threshold = float(os.getenv('RERANK_THRESHOLD',0.3))
|
||||
rerank = OllamaRerank(
|
||||
model=modelpath,
|
||||
top_n=top_n,
|
||||
device="cpu",
|
||||
score_threshold= threshold
|
||||
)
|
||||
return [rerank]
|
||||
|
||||
|
||||
def init_azure_openai():
|
||||
# from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||
# from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
|
||||
# from llama_index.llms.azure_openai import AzureOpenAI
|
||||
#
|
||||
# llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"]
|
||||
# embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]
|
||||
# max_tokens = os.getenv("LLM_MAX_TOKENS")
|
||||
# temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
|
||||
# dimensions = os.getenv("EMBEDDING_DIM")
|
||||
#
|
||||
# azure_config = {
|
||||
# "api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
# "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
|
||||
# "api_version": os.getenv("AZURE_OPENAI_API_VERSION")
|
||||
# or os.getenv("OPENAI_API_VERSION"),
|
||||
# }
|
||||
#
|
||||
# Settings.llm = AzureOpenAI(
|
||||
# model=os.getenv("MODEL"),
|
||||
# max_tokens=int(max_tokens) if max_tokens is not None else None,
|
||||
# temperature=float(temperature),
|
||||
# deployment_name=llm_deployment,
|
||||
# **azure_config,
|
||||
# )
|
||||
#
|
||||
# Settings.embed_model = AzureOpenAIEmbedding(
|
||||
# model=os.getenv("EMBEDDING_MODEL"),
|
||||
# dimensions=int(dimensions) if dimensions is not None else None,
|
||||
# deployment_name=embedding_deployment,
|
||||
# **azure_config,
|
||||
# )
|
||||
pass
|
||||
@register(ModelPlateCategory,'xinference')
|
||||
class XinferencePlatform(ModelPlatform):
|
||||
def model(self):
|
||||
base_url = os.getenv("BASE_URL")
|
||||
model = os.getenv("MODEL")
|
||||
max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None
|
||||
temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP))
|
||||
return Xinference(model, base_url, temperature, max_tokens)
|
||||
|
||||
def embedding(self):
|
||||
base_url = os.getenv("BASE_URL")
|
||||
embedding_base_url = os.getenv("EMBEDDING_BASE_URL")
|
||||
embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url
|
||||
|
||||
embed_model_name = os.getenv("EMBEDDING_MODEL")
|
||||
dimensions = os.getenv("EMBEDDING_DIM")
|
||||
dimensions = int(dimensions) if dimensions is not None else None
|
||||
return XinferenceEmbedding(embed_model_name, embedding_base_url)
|
||||
|
||||
def rerank(self):
|
||||
rerank_model = os.getenv("RERANK_MODEL")
|
||||
rerank_url = os.getenv("RERANK_BASE_URL")
|
||||
rerank_top_n = os.getenv("RERANK_TOP_N")
|
||||
rerank_threshold = os.getenv("RERANK_THRESHOLD")
|
||||
postprocess = None
|
||||
if rerank_model is not None:
|
||||
postprocess = [XinferenceRerank(rerank_model, rerank_url, top_n=rerank_top_n, threshold=rerank_threshold)]
|
||||
return postprocess
|
||||
|
||||
@register(ModelPlateCategory,'openai')
|
||||
class OpenAIPlatform(ModelPlatform):
|
||||
def model(self):
|
||||
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||
from llama_index.llms.openai import OpenAI
|
||||
|
||||
max_tokens = os.getenv("LLM_MAX_TOKENS")
|
||||
config = {
|
||||
"model": os.getenv("MODEL"),
|
||||
"temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
|
||||
"max_tokens": int(max_tokens) if max_tokens is not None else None,
|
||||
}
|
||||
return OpenAI(**config)
|
||||
|
||||
def embedding(self):
|
||||
from llama_index.embeddings.openai import OpenAIEmbedding
|
||||
dimensions = os.getenv("EMBEDDING_DIM")
|
||||
config = {
|
||||
"model": os.getenv("EMBEDDING_MODEL"),
|
||||
"dimensions": int(dimensions) if dimensions is not None else None,
|
||||
}
|
||||
return OpenAIEmbedding(**config)
|
||||
|
||||
def rerank(self):
|
||||
pass
|
||||
|
||||
@register(ModelPlateCategory,'dashscope')
|
||||
class DashscopePlatform(ModelPlatform):
|
||||
def model(self):
|
||||
apikey = os.getenv('DASHSCOPE_API_KEY')
|
||||
modelName = os.getenv('MODEL')
|
||||
return CustomDashScope(model_name=modelName,api_key = apikey)
|
||||
|
||||
def embedding(self):
|
||||
from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels
|
||||
api_key = os.getenv('DASHSCOPE_API_KEY')
|
||||
modelName = os.getenv('EMBEDDING_MODEL')
|
||||
return DashScopeEmbedding(model_name=modelName,
|
||||
text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY,api_key = api_key)
|
||||
|
||||
def rerank(self):
|
||||
pass
|
||||
|
||||
@register(ModelPlateCategory,'azure-openai')
|
||||
class AzureOpenaiPlatform(ModelPlatform):
|
||||
def model(self):
|
||||
# from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||
# from llama_index.llms.azure_openai import AzureOpenAI
|
||||
#
|
||||
# llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"]
|
||||
# embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]
|
||||
# max_tokens = os.getenv("LLM_MAX_TOKENS")
|
||||
# temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
|
||||
# dimensions = os.getenv("EMBEDDING_DIM")
|
||||
#
|
||||
# azure_config = {
|
||||
# "api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
# "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
|
||||
# "api_version": os.getenv("AZURE_OPENAI_API_VERSION")
|
||||
# or os.getenv("OPENAI_API_VERSION"),
|
||||
# }
|
||||
#
|
||||
# return AzureOpenAI(
|
||||
# model=os.getenv("MODEL"),
|
||||
# max_tokens=int(max_tokens) if max_tokens is not None else None,
|
||||
# temperature=float(temperature),
|
||||
# deployment_name=llm_deployment,
|
||||
# **azure_config,
|
||||
# )
|
||||
pass
|
||||
|
||||
def embedding(self):
|
||||
# from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||
# from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
|
||||
#
|
||||
# llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"]
|
||||
# embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]
|
||||
# max_tokens = os.getenv("LLM_MAX_TOKENS")
|
||||
# temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
|
||||
# dimensions = os.getenv("EMBEDDING_DIM")
|
||||
#
|
||||
# azure_config = {
|
||||
# "api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||
# "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
|
||||
# "api_version": os.getenv("AZURE_OPENAI_API_VERSION")
|
||||
# or os.getenv("OPENAI_API_VERSION"),
|
||||
# }
|
||||
# return AzureOpenAIEmbedding(
|
||||
# model=os.getenv("EMBEDDING_MODEL"),
|
||||
# dimensions=int(dimensions) if dimensions is not None else None,
|
||||
# deployment_name=embedding_deployment,
|
||||
# **azure_config,
|
||||
# )
|
||||
pass
|
||||
|
||||
def rerank(self):
|
||||
pass
|
||||
|
||||
@register(ModelPlateCategory,'fastembed')
|
||||
class FastembedPlatform(ModelPlatform):
|
||||
@abstractmethod
|
||||
def model(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def embedding(self):
|
||||
# from llama_index.embeddings.fastembed import FastEmbedEmbedding
|
||||
#
|
||||
# embed_model_map: Dict[str, str] = {
|
||||
# # Small and multilingual
|
||||
# "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
# # Large and multilingual
|
||||
# "paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", # noqa: E501
|
||||
# }
|
||||
#
|
||||
# # This will download the model automatically if it is not already downloaded
|
||||
# Settings.embed_model = FastEmbedEmbedding(
|
||||
# model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")]
|
||||
# )
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def rerank(self):
|
||||
pass
|
||||
|
||||
@register(ModelPlateCategory,'groq')
|
||||
class GroqPlatform(ModelPlatform):
|
||||
@abstractmethod
|
||||
def model(self):
|
||||
# from llama_index.llms.groq import Groq
|
||||
#
|
||||
# model_map: Dict[str, str] = {
|
||||
# "llama3-8b": "llama3-8b-8192",
|
||||
# "llama3-70b": "llama3-70b-8192",
|
||||
# "mixtral-8x7b": "mixtral-8x7b-32768",
|
||||
# }
|
||||
#
|
||||
# Settings.llm = Groq(model=model_map[os.getenv("MODEL")])
|
||||
# # Groq does not provide embeddings, so we use FastEmbed instead
|
||||
# init_fastembed()
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def embedding(self):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def rerank(self):
|
||||
pass
|
||||
|
||||
@register(ModelPlateCategory,'anthropic')
|
||||
class AnthropicPlatform(ModelPlatform):
|
||||
def model(self):
|
||||
# from llama_index.llms.anthropic import Anthropic
|
||||
#
|
||||
# model_map: Dict[str, str] = {
|
||||
# "claude-3-opus": "claude-3-opus-20240229",
|
||||
# "claude-3-sonnet": "claude-3-sonnet-20240229",
|
||||
# "claude-3-haiku": "claude-3-haiku-20240307",
|
||||
# "claude-2.1": "claude-2.1",
|
||||
# "claude-instant-1.2": "claude-instant-1.2",
|
||||
# }
|
||||
#
|
||||
# Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")])
|
||||
# # Anthropic does not provide embeddings, so we use FastEmbed instead
|
||||
# init_fastembed()
|
||||
pass
|
||||
|
||||
def embedding(self):
|
||||
pass
|
||||
|
||||
def rerank(self):
|
||||
pass
|
||||
|
||||
@register(ModelPlateCategory,'gemini')
|
||||
class GeminiPlatform(ModelPlatform):
|
||||
def model(self):
|
||||
# from llama_index.llms.gemini import Gemini
|
||||
# model_name = f"models/{os.getenv('MODEL')}"
|
||||
# return Gemini(model=model_name)
|
||||
pass
|
||||
|
||||
def embedding(self):
|
||||
# from llama_index.embeddings.gemini import GeminiEmbedding
|
||||
# embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}"
|
||||
# return GeminiEmbedding(model_name=embed_model_name)
|
||||
pass
|
||||
|
||||
def rerank(self):
|
||||
pass
|
||||
|
||||
@register(ModelPlateCategory,'mistral')
|
||||
class MistralPlatform(ModelPlatform):
|
||||
def model(self):
|
||||
# from llama_index.llms.mistralai import MistralAI
|
||||
# return MistralAI(model=os.getenv("MODEL"))
|
||||
pass
|
||||
|
||||
def embedding(self):
|
||||
# from llama_index.embeddings.mistralai import MistralAIEmbedding
|
||||
# return MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
|
||||
pass
|
||||
|
||||
def rerank(self):
|
||||
pass
|
||||
|
||||
def init_ProjectInfo():
|
||||
prjObj = ProjectInfo()
|
||||
prjInfos:list[tuple] = getProjectInfos()
|
||||
for prjInfo in prjInfos:
|
||||
prjObj.add(prjInfo['name'],prjInfo['flag'])
|
||||
|
||||
|
||||
def init_fastembed():
|
||||
"""
|
||||
Use Qdrant Fastembed as the local embedding provider.
|
||||
"""
|
||||
# from llama_index.embeddings.fastembed import FastEmbedEmbedding
|
||||
#
|
||||
# embed_model_map: Dict[str, str] = {
|
||||
# # Small and multilingual
|
||||
# "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
|
||||
# # Large and multilingual
|
||||
# "paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", # noqa: E501
|
||||
# }
|
||||
#
|
||||
# # This will download the model automatically if it is not already downloaded
|
||||
# Settings.embed_model = FastEmbedEmbedding(
|
||||
# model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")]
|
||||
# )
|
||||
pass
|
||||
|
||||
|
||||
def init_groq():
|
||||
# from llama_index.llms.groq import Groq
|
||||
#
|
||||
# model_map: Dict[str, str] = {
|
||||
# "llama3-8b": "llama3-8b-8192",
|
||||
# "llama3-70b": "llama3-70b-8192",
|
||||
# "mixtral-8x7b": "mixtral-8x7b-32768",
|
||||
# }
|
||||
#
|
||||
# Settings.llm = Groq(model=model_map[os.getenv("MODEL")])
|
||||
# # Groq does not provide embeddings, so we use FastEmbed instead
|
||||
# init_fastembed()
|
||||
pass
|
||||
|
||||
|
||||
def init_anthropic():
|
||||
# from llama_index.llms.anthropic import Anthropic
|
||||
#
|
||||
# model_map: Dict[str, str] = {
|
||||
# "claude-3-opus": "claude-3-opus-20240229",
|
||||
# "claude-3-sonnet": "claude-3-sonnet-20240229",
|
||||
# "claude-3-haiku": "claude-3-haiku-20240307",
|
||||
# "claude-2.1": "claude-2.1",
|
||||
# "claude-instant-1.2": "claude-instant-1.2",
|
||||
# }
|
||||
#
|
||||
# Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")])
|
||||
# # Anthropic does not provide embeddings, so we use FastEmbed instead
|
||||
# init_fastembed()
|
||||
pass
|
||||
|
||||
|
||||
def init_gemini():
|
||||
# from llama_index.embeddings.gemini import GeminiEmbedding
|
||||
# from llama_index.llms.gemini import Gemini
|
||||
#
|
||||
# model_name = f"models/{os.getenv('MODEL')}"
|
||||
# embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}"
|
||||
#
|
||||
# Settings.llm = Gemini(model=model_name)
|
||||
# Settings.embed_model = GeminiEmbedding(model_name=embed_model_name)
|
||||
pass
|
||||
|
||||
def init_mistral():
|
||||
# from llama_index.embeddings.mistralai import MistralAIEmbedding
|
||||
# from llama_index.llms.mistralai import MistralAI
|
||||
#
|
||||
# Settings.llm = MistralAI(model=os.getenv("MODEL"))
|
||||
# Settings.embed_model = MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
|
||||
pass
|
||||
|
||||
|
||||
@@ -1,272 +0,0 @@
|
||||
"""Xinference embeddings file."""
|
||||
|
||||
import logging
|
||||
from enum import Enum
|
||||
from http import HTTPStatus
|
||||
from typing import Any, Dict, List, Optional, Union, Tuple
|
||||
|
||||
from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding, dispatcher
|
||||
from llama_index.core.bridge.pydantic import PrivateAttr
|
||||
from llama_index.core.callbacks import CBEventType, EventPayload
|
||||
from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding
|
||||
from llama_index.core.instrumentation.events.rerank import ReRankStartEvent, ReRankEndEvent
|
||||
from llama_index.core.postprocessor.types import BaseNodePostprocessor
|
||||
from llama_index.core.schema import ImageType, NodeWithScore, QueryBundle
|
||||
from pydantic import Field
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
EMBED_MAX_INPUT_LENGTH = 2048
|
||||
EMBED_MAX_BATCH_SIZE = 1
|
||||
|
||||
|
||||
class XinferenceEmbedding(BaseEmbedding):
|
||||
"""Xinference class for text embedding.
|
||||
|
||||
"""
|
||||
model_description: Dict[str, Any] = Field(
|
||||
description="The model description from Xinference."
|
||||
)
|
||||
_generator: Any = PrivateAttr()
|
||||
_model_uid: str = Field(description="The Xinference model to use.")
|
||||
_endpoint: str = Field(description="The Xinference endpoint URL to use.")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_uid: str,
|
||||
endpoint: str,
|
||||
embed_batch_size: int = EMBED_MAX_BATCH_SIZE,
|
||||
dimensions: Optional[int] = None,
|
||||
additional_kwargs: Optional[Dict[str, Any]] = None,
|
||||
api_key: Optional[str] = None,
|
||||
api_base: Optional[str] = None,
|
||||
api_version: Optional[str] = None,
|
||||
max_retries: int = 10,
|
||||
# timeout: float = 60.0,
|
||||
# reuse_client: bool = True,
|
||||
# callback_manager: Optional[CallbackManager] = None,
|
||||
# default_headers: Optional[Dict[str, str]] = None,
|
||||
# http_client: Optional[httpx.Client] = None,
|
||||
# async_http_client: Optional[httpx.AsyncClient] = None,
|
||||
# num_workers: Optional[int] = None,
|
||||
**kwargs: Any,
|
||||
) -> None:
|
||||
generator, model_description, embed_batch_size, dimensions = self.load_model(
|
||||
model_uid, endpoint
|
||||
)
|
||||
self._generator = generator
|
||||
#self._model_uid = model_uid
|
||||
#self._endpoint = endpoint
|
||||
super().__init__(
|
||||
embed_batch_size=embed_batch_size,
|
||||
dimensions=dimensions,
|
||||
#callback_manager=callback_manager,
|
||||
model_name=model_uid,
|
||||
additional_kwargs=additional_kwargs,
|
||||
api_key=api_key,
|
||||
api_base=api_base,
|
||||
api_version=api_version,
|
||||
max_retries=max_retries,
|
||||
# reuse_client=reuse_client,
|
||||
# timeout=timeout,
|
||||
# default_headers=default_headers,
|
||||
# num_workers=num_workers,
|
||||
**kwargs,
|
||||
)
|
||||
|
||||
def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
|
||||
try:
|
||||
from xinference.client import RESTfulClient
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import Xinference library."
|
||||
'Please install Xinference with `pip install "xinference[all]"`'
|
||||
)
|
||||
|
||||
client = RESTfulClient(endpoint)
|
||||
|
||||
try:
|
||||
assert isinstance(client, RESTfulClient)
|
||||
except AssertionError:
|
||||
raise RuntimeError(
|
||||
"Could not create RESTfulClient instance."
|
||||
"Please make sure Xinference endpoint is running at the correct port."
|
||||
)
|
||||
|
||||
generator = client.get_model(model_uid)
|
||||
model_description = client.list_models()[model_uid]
|
||||
|
||||
try:
|
||||
assert generator is not None
|
||||
assert model_description is not None
|
||||
except AssertionError:
|
||||
raise RuntimeError(
|
||||
"Could not get model from endpoint."
|
||||
"Please make sure Xinference endpoint is running at the correct port."
|
||||
)
|
||||
|
||||
model = model_description["model_name"]
|
||||
replica = model_description['replica']
|
||||
dimensions = model_description['dimensions']
|
||||
max_tokens = model_description['max_tokens']
|
||||
|
||||
return generator, model_description, replica, dimensions
|
||||
|
||||
@classmethod
|
||||
def class_name(cls) -> str:
|
||||
return "XinferenceEmbedding"
|
||||
|
||||
def _get_text_embedding(self, text: str) -> Embedding:
|
||||
"""
|
||||
Embed the input text synchronously.
|
||||
|
||||
Subclasses should implement this method. Reference get_text_embedding's
|
||||
docstring for more information.
|
||||
"""
|
||||
assert self._generator is not None
|
||||
|
||||
response = self._generator.create_embedding(input=text)
|
||||
return response['data'][0]['embedding']
|
||||
|
||||
def _get_query_embedding(self, query: str) -> Embedding:
|
||||
"""
|
||||
Embed the input query synchronously.
|
||||
|
||||
Subclasses should implement this method. Reference get_query_embedding's
|
||||
docstring for more information.
|
||||
"""
|
||||
return self._get_text_embedding(query)
|
||||
|
||||
async def _aget_query_embedding(self, query: str) -> Embedding:
|
||||
"""
|
||||
Embed the input query asynchronously.
|
||||
|
||||
Subclasses should implement this method. Reference get_query_embedding's
|
||||
docstring for more information.
|
||||
"""
|
||||
return self._get_query_embedding(query)
|
||||
|
||||
class XinferenceRerank(BaseNodePostprocessor):
|
||||
"""Xinference class for rerank.
|
||||
|
||||
"""
|
||||
model_description: Dict[str, Any] = Field(
|
||||
description="The model description from Xinference."
|
||||
)
|
||||
_generator: Any = PrivateAttr()
|
||||
_model_uid: str = Field(description="The Xinference model to use.")
|
||||
_endpoint: str = Field(description="The Xinference endpoint URL to use.")
|
||||
model: str = Field(description="Dashscope rerank model name.")
|
||||
top_n: int = Field(description="Top N nodes to return.")
|
||||
threshold: float = Field(description="threshold nodes to return.")
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model_uid: str,
|
||||
endpoint: str,
|
||||
top_n: int = None,
|
||||
threshold: float = None,
|
||||
return_documents: bool = False
|
||||
):
|
||||
_model_uid = model_uid
|
||||
_endpoint = endpoint
|
||||
_op_n = top_n
|
||||
threshold = threshold
|
||||
generator, model_description = self.load_model(
|
||||
model_uid, endpoint
|
||||
)
|
||||
self._generator = generator
|
||||
super().__init__(top_n=top_n, model=model_uid, model_uid=model_uid, threshold = threshold, return_documents=return_documents)
|
||||
|
||||
@classmethod
|
||||
def class_name(cls) -> str:
|
||||
return "XinferenceRerank"
|
||||
|
||||
def _postprocess_nodes(
|
||||
self,
|
||||
nodes: List[NodeWithScore],
|
||||
query_bundle: Optional[QueryBundle] = None,
|
||||
) -> List[NodeWithScore]:
|
||||
if query_bundle is None:
|
||||
raise ValueError("Missing query bundle in extra info.")
|
||||
if len(nodes) == 0:
|
||||
return []
|
||||
|
||||
dispatcher.event(
|
||||
ReRankStartEvent(
|
||||
nodes = nodes,
|
||||
top_n = self.top_n,
|
||||
query = query_bundle,
|
||||
model_name = self.model
|
||||
)
|
||||
)
|
||||
|
||||
with self.callback_manager.event(
|
||||
CBEventType.RERANKING,
|
||||
payload={
|
||||
EventPayload.NODES: nodes,
|
||||
EventPayload.MODEL_NAME: self._model_uid,
|
||||
EventPayload.QUERY_STR: query_bundle.query_str,
|
||||
EventPayload.TOP_K: self.top_n,
|
||||
},
|
||||
) as event:
|
||||
texts = [node.node.get_content() for node in nodes]
|
||||
response = self._generator.rerank(texts,query_bundle.query_str)
|
||||
new_nodes = []
|
||||
for result in response['results']:
|
||||
new_node_with_score = NodeWithScore(
|
||||
node=nodes[result['index']].node, score=result['relevance_score']
|
||||
)
|
||||
if self.threshold is not None:
|
||||
if new_node_with_score.score >=self.threshold:
|
||||
new_nodes.append(new_node_with_score)
|
||||
|
||||
if self.top_n is not None:
|
||||
if len(new_nodes) > self.top_n:
|
||||
for index in new_nodes[self.top_n:-1]:
|
||||
new_nodes.remove(index)
|
||||
|
||||
event.on_end(payload={EventPayload.NODES: new_nodes})
|
||||
|
||||
dispatcher.event(
|
||||
ReRankEndEvent(
|
||||
nodes= new_nodes
|
||||
)
|
||||
)
|
||||
return new_nodes
|
||||
|
||||
def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
|
||||
try:
|
||||
from xinference.client import RESTfulClient
|
||||
except ImportError:
|
||||
raise ImportError(
|
||||
"Could not import Xinference library."
|
||||
'Please install Xinference with `pip install "xinference[all]"`'
|
||||
)
|
||||
|
||||
client = RESTfulClient(endpoint)
|
||||
|
||||
try:
|
||||
assert isinstance(client, RESTfulClient)
|
||||
except AssertionError:
|
||||
raise RuntimeError(
|
||||
"Could not create RESTfulClient instance."
|
||||
"Please make sure Xinference endpoint is running at the correct port."
|
||||
)
|
||||
|
||||
generator = client.get_model(model_uid)
|
||||
model_description = client.list_models()[model_uid]
|
||||
|
||||
try:
|
||||
assert generator is not None
|
||||
assert model_description is not None
|
||||
except AssertionError:
|
||||
raise RuntimeError(
|
||||
"Could not get model from endpoint."
|
||||
"Please make sure Xinference endpoint is running at the correct port."
|
||||
)
|
||||
|
||||
model = model_description["model_name"]
|
||||
|
||||
return generator, model_description
|
||||
+32
-32
@@ -3,46 +3,46 @@ file:
|
||||
# use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable
|
||||
use_llama_parse: false
|
||||
|
||||
db:
|
||||
#db:
|
||||
# The configuration for the database loader, only supports MySQL and PostgreSQL databases for now.
|
||||
# uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
|
||||
# query: The query to fetch data from the database. E.g.: SELECT * FROM table
|
||||
- uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
|
||||
enable: true # 添加 enable 字段
|
||||
queries:
|
||||
- sql: select * from ProjectProperties;
|
||||
explanation: "工程属性表数据,层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。"
|
||||
#- uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
|
||||
#enable: false # 添加 enable 字段
|
||||
#queries:
|
||||
#- sql: select * from ProjectProperties;
|
||||
#explanation: "工程属性表数据,层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。"
|
||||
|
||||
- sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable;
|
||||
explanation: "总算表数据,层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。"
|
||||
#- sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable;
|
||||
#explanation: "总算表数据,层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。"
|
||||
|
||||
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '线路';
|
||||
explanation: "专业类型为线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '余物清理';
|
||||
explanation: "专业类型为余物清理的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '拆除线路';
|
||||
explanation: "专业类型为拆除线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||
#- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '线路';
|
||||
#explanation: "专业类型为线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||
#- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '余物清理';
|
||||
#explanation: "专业类型为余物清理的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||
#- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '拆除线路';
|
||||
#explanation: "专业类型为拆除线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||
|
||||
- sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee;
|
||||
explanation: "其他费用表数据,层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中"
|
||||
#- sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee;
|
||||
#explanation: "其他费用表数据,层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中"
|
||||
|
||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表'
|
||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(调试工程)aa'
|
||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '大型土石方取费表'
|
||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)'
|
||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)(1)'
|
||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(拆除)'
|
||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表'
|
||||
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(调试工程)aa'
|
||||
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '大型土石方取费表'
|
||||
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)'
|
||||
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)(1)'
|
||||
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(拆除)'
|
||||
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||
|
||||
- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '线路'
|
||||
explanation: "专业类型为线路的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
|
||||
- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '余物清理'
|
||||
explanation: "专业类型为余物清理的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
|
||||
#- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '线路'
|
||||
#explanation: "专业类型为线路的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
|
||||
#- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '余物清理'
|
||||
#explanation: "专业类型为余物清理的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
|
||||
#web:
|
||||
# driver_arguments:
|
||||
# # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode
|
||||
|
||||
@@ -1,71 +0,0 @@
|
||||
{
|
||||
"Table": [
|
||||
{
|
||||
"name": "FeeCollectionTable",
|
||||
"alias": "",
|
||||
"comment": "取费表是取费设置中各取费表明细。查询示例: SELECT Rate FROM FeeCollectionTable WHERE Name = 'findname'。",
|
||||
"fields": [
|
||||
{
|
||||
"name": "FeeCollectionTableName",
|
||||
"alias": "取费表名称,取费名称,取费名",
|
||||
"comment": "取费表名称",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Name",
|
||||
"alias": "名称,费用名,项目名",
|
||||
"comment": "费用名称,项目名称",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "SerialNumber",
|
||||
"alias": "序号,序列号,费用序号",
|
||||
"comment": "费用表序号",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Code",
|
||||
"alias": "编号,代号,代码",
|
||||
"comment": "费用代码",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "CalculationFormula",
|
||||
"alias": "公式,表达式,计算式",
|
||||
"comment": "取费基数",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Rate",
|
||||
"alias": "费用利率,费率",
|
||||
"comment": "取费费率",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Remarks",
|
||||
"alias": "说明,备注",
|
||||
"comment": "费用项备注说明",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Major",
|
||||
"alias": "专业",
|
||||
"comment": "取费表专业",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Type",
|
||||
"alias": "类型,取费类型",
|
||||
"comment": "取费表类型",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Path",
|
||||
"alias": "路径,费用全路径",
|
||||
"comment": "费用项层级全路径",
|
||||
"type": "VARCHAR"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,89 +0,0 @@
|
||||
{
|
||||
"Table": [
|
||||
{
|
||||
"name": "OtherFee",
|
||||
"alias": "",
|
||||
"comment": "其他费用表被称为“工程费用中其他费用明细”。其他费用是指为完成工程项目建设所必需的,但不属于建筑工程费、安装工程费、设备购置费、基本预备费的其他相关费用。包括建设场地征用及清理费、项目建设管理费、项目建设技术服务费、生产准备费、大件运输措施费、专业爆破服务费等。查询示例: SELECT Rate FROM OtherFee WHERE Name = 'findname'。",
|
||||
"fields": [
|
||||
{
|
||||
"name": "Id",
|
||||
"alias": "项目id,id,费用id",
|
||||
"comment": "费用项目id",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "ParentId",
|
||||
"alias": "父级id,父id",
|
||||
"comment": "费用项目父级id",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "Level",
|
||||
"alias": "层号,层级,层编号",
|
||||
"comment": "层级编号,从1开始",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "Name",
|
||||
"alias": "名称,费用名,项目名",
|
||||
"comment": "费用名称,项目名称",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "SerialNumber",
|
||||
"alias": "序号,序列号",
|
||||
"comment": "费用表序号",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Code",
|
||||
"alias": "编号,代号,代码",
|
||||
"comment": "费用代码",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "CalculationFormula",
|
||||
"alias": "公式,表达式,计算式",
|
||||
"comment": "取费基数",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Rate",
|
||||
"alias": "费用利率,费率",
|
||||
"comment": "取费费率",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Amount",
|
||||
"alias": "金额,价格",
|
||||
"comment": "金额、合计、费用,\n单位为元",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Remarks",
|
||||
"alias": "说明,备注",
|
||||
"comment": "费用项备注说明",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Compilation_Basis",
|
||||
"alias": "编制依据,编制来源",
|
||||
"comment": "费用项编制依据",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "WBS_Code",
|
||||
"alias": "WBS编号,WBS编码",
|
||||
"comment": "费用项WBS编码",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Path",
|
||||
"alias": "路径,费用全路径",
|
||||
"comment": "费用项层级全路径",
|
||||
"type": "VARCHAR"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,149 +0,0 @@
|
||||
{
|
||||
"Table": [
|
||||
{
|
||||
"name": "ProjectDivision",
|
||||
"alias": "",
|
||||
"comment": "项目划分表是用于存储工程项目划分树状数据。内部包含安装工程项目划分,建筑工程项目划分,线路项目划分,工程分部分项。查询示例: SELECT Sum_Price FROM ProjectDivision WHERE Name = 'findname'。",
|
||||
"fields": [
|
||||
{
|
||||
"name": "Id",
|
||||
"alias": "项目id,id,费用id",
|
||||
"comment": "项目划分id",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "ParentId",
|
||||
"alias": "父级id,父id",
|
||||
"comment": "项目划分父级id",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "Level",
|
||||
"alias": "层号,层级,层编号",
|
||||
"comment": "层级编号,从1开始",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "Quantity",
|
||||
"alias": "个数,数量,数目",
|
||||
"comment": "项目划分数量",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "SerialNumber",
|
||||
"alias": "序号,序列号,项目序号",
|
||||
"comment": "项目划分序号",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Name",
|
||||
"alias": "名称,项目名",
|
||||
"comment": "项目名称",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Encoding",
|
||||
"alias": "编码,译码",
|
||||
"comment": "项目划分编码",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Sum_Price",
|
||||
"alias": "合计,合价",
|
||||
"comment": "项目划分合价,分部分项费用",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "FeeCollectionTableName",
|
||||
"alias": "取费表",
|
||||
"comment": "项目划分的取费表,此项目划分选用的取费表",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Remarks",
|
||||
"alias": "说明,备注",
|
||||
"comment": "备注",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "WBS_Code",
|
||||
"alias": "WBS编号,WBS编码",
|
||||
"comment": "WBS编码",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Manual_Adjustment_Coefficient",
|
||||
"alias": "人工调差系数",
|
||||
"comment": "此项目划分下人工调差系数",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Material_Adjustment_Coefficient",
|
||||
"alias": "材料调差系数",
|
||||
"comment": "此项目划分下材料调差系数",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Mechanical_Adjustment_Coefficient",
|
||||
"alias": "机械调差系数",
|
||||
"comment": "此项目划分下机械调差系数",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Demolition_Manual_Adjustment_Coefficient",
|
||||
"alias": "拆除人工调差系数",
|
||||
"comment": "此项目划分下拆除人工调差系数",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Demolition_Material_Adjustment_Coefficient",
|
||||
"alias": "拆除材料调差系数",
|
||||
"comment": "此项目划分下拆除材料调差系数",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Demolition_Mechanical_Adjustment_Coefficient",
|
||||
"alias": "拆除机械调差系数",
|
||||
"comment": "此项目划分下拆除机械调差系数",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "ProfessionalType",
|
||||
"alias": "专业类型",
|
||||
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Unit",
|
||||
"alias": "单位",
|
||||
"comment": "项目划分单位",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "CalculationFormula",
|
||||
"alias": "公式,表达式,计算式",
|
||||
"comment": "项目划分计算式",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Rate",
|
||||
"alias": "费用利率,费率",
|
||||
"comment": "项目划分费率",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Code",
|
||||
"alias": "编号,代号,代码",
|
||||
"comment": "项目划分代码",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Path",
|
||||
"alias": "路径,项目全路径",
|
||||
"comment": "项目划分层级全路径",
|
||||
"type": "VARCHAR"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,239 +0,0 @@
|
||||
{
|
||||
"Table": [
|
||||
{
|
||||
"name": "ProjectDivisions_CostPreview",
|
||||
"alias": "",
|
||||
"comment": "项目划分_费用预览表也被称为“项目划分费用预览”、“项目划分取费费用”。其中包含项目划分合价、直接费、间接费、利润、税金、主材费等。查询示例: SELECT Total FROM ProjectDivisions_CostPreview WHERE Id = '15'。",
|
||||
"fields": [
|
||||
{
|
||||
"name": "Id",
|
||||
"alias": "id,项目id",
|
||||
"comment": "项目划分id",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "ParentId",
|
||||
"alias": "父级id,父id",
|
||||
"comment": "项目划分父级id",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "Level",
|
||||
"alias": "层号,层级,层编号",
|
||||
"comment": "层级编号,从1开始",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "ProfessionalType",
|
||||
"alias": "专业类型",
|
||||
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "FeeCollectionTableName",
|
||||
"alias": "取费表",
|
||||
"comment": "项目划分的取费表,此项目划分选用的取费表",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Direct_Cost",
|
||||
"alias": "直接费",
|
||||
"comment": "直接费是指施工过程中直接耗用于建筑、安装工程产品的各项费用的总和。包括直接工程费和措施费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Direct_Project_Cost",
|
||||
"alias": "直接工程费",
|
||||
"comment": "直接工程费是指按照正常的施工条件,在施工过程中耗费的构成工程实体的各项费用。包括人工费、材料费和施工机械使用费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Quota_Direct_Cost",
|
||||
"alias": "定额直接费",
|
||||
"comment": "定额直接费,包含人工费、材料费中已进入定额基价的消耗性材料费和施工机械使用费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Labor_Cost",
|
||||
"alias": "人工费",
|
||||
"comment": "人工费是指支付给直接从事建筑安装工程施工作业的生产人员的各项费用。包括基本工资、工资性补贴、辅助工资、职工福利费、生产人员劳动保护费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Material_Cost",
|
||||
"alias": "材料费",
|
||||
"comment": "材料费是指施工过程中一次性消耗材料及摊销材料的费用。指已进入定额基价的消耗性材料费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Construction_Machinery_Cost",
|
||||
"alias": "施工机械使用费",
|
||||
"comment": "施工机械使用费是指施工机械作业所发生的机械使用费以及机械的现场安拆费和场外运费。包括折旧费、检修费、维护费、安装及拆卸费、场外运费、操作人员人工费、燃料动力费、其他费等。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Installation_Material_Cost",
|
||||
"alias": "装置性材料费",
|
||||
"comment": "装置性材料费是指建设工程中构成工艺系统实体的工艺性材料,也称主要材料费。装置性材料通常在概算或预算定额中未计价,也称未计价材料,也称主材。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "A_Supply_Installation_Material_Cost",
|
||||
"alias": "甲供装置性材料费",
|
||||
"comment": "供货方为甲供的装置性材料费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "B_Supply_Installation_Material_Cost",
|
||||
"alias": "乙供装置性材料费",
|
||||
"comment": "供货方为乙供的装置性材料费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Measure_Cost",
|
||||
"alias": "措施费",
|
||||
"comment": "措施费是指为完成工程项目施工而进行施工准备、克服自然条件的不利影响和辅助施工所发生的不构成工程实体的各项费用。包括冬雨季施工增加费、夜间施工增加费、施工工具用具使用费、特殊地区施工增加费、临时设施费、施工机构迁移费、安全文明施工费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "WinterRainySeasons_Additional_Construction_Cost",
|
||||
"alias": "冬雨季施工增加费",
|
||||
"comment": "冬雨季施工增加费是指按照合理的工期要求,建筑、安装工程必须在冬季、雨季期间连续施工而需要增加的费用。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Night_Additional_Construction_Cost",
|
||||
"alias": "夜间施工增加费",
|
||||
"comment": "夜间施工增加费是指按照规程要求,工程必须在夜间连续施工所发生的夜班补助、夜间施工降效、夜间施工照明设备摊销及照明用电等费用。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Construction_Tool_Usage_Cost",
|
||||
"alias": "施工工具用具使用费",
|
||||
"comment": "施工工具用具使用费是指施工企业的生产、检验、试验部门使用的不属于固定资产的工具用具和仪器仪表的购置、摊销和维护费用。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Special_Areas_Additional_Construction_Cost",
|
||||
"alias": "特殊地区施工增加费",
|
||||
"comment": "特殊地区施工增加费是指在高海拔、酷热、严寒等地区施工:因特殊自然条件影响而需额外增加的施工费用。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Temporary_Facility_Cost",
|
||||
"alias": "临时设施费",
|
||||
"comment": "临时设施费是指施工企业为满足现场正常生产、生活需要在现场必须搭设的生产、生活用临时建筑物、构筑物和其他临时设施所发生的费用,以及维修、拆除、折旧及摊销费,或临时设施的租赁费等。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Construction_Organization_Relocation_Cost",
|
||||
"alias": "施工机构迁移费",
|
||||
"comment": "施工机构迁移费是指施工企业派遣施工队伍到所承建工程现场所发生的搬迁费用。包括职工调遣差旅费和调遣期间的工资,以及办公设备、工器具、家具、材料用品和施工机械等的搬迁费用。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Safe_Civilized_Construction_Cost",
|
||||
"alias": "安全文明施工费",
|
||||
"comment": "安全文明施工费,包括安全生产费、文明施工费、环境保护费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Indirect_Cost",
|
||||
"alias": "间接费",
|
||||
"comment": "间接费是指建筑安装工程的施工过程中,为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Regulatory_Cost",
|
||||
"alias": "规费",
|
||||
"comment": "规费是指按照国家行政主管部门或省级政府和省级有关权力部门规定必须缴纳并计入建筑安装工程造价的费用。包括社会保险费和住房公积金。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Social_Insurance_Premiums",
|
||||
"alias": "社会保险费",
|
||||
"comment": "社会保险费包括养老保险费、失业保险费、医疗保险费、生育保险费和工伤保险费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Housing_Provident_Fund",
|
||||
"alias": "住房公积金",
|
||||
"comment": "住房公积金是指企业按照规定标准为职工缴纳的住房公积金。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Enterprise_Management_Cost",
|
||||
"alias": "企业管理费",
|
||||
"comment": "企业管理费是指建筑安装施工企业为组织施工生产和经营管理所发生的费用。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Construction_Enterprise_Cooperation_Debugging_Cost",
|
||||
"alias": "施工企业配合调试费",
|
||||
"comment": "施工企业配合调试费是指在工程整套启动试运阶段,施工企业安装专业配合调试所发生的费用。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Profit",
|
||||
"alias": "利润",
|
||||
"comment": "利润是指施工企业完成所承包工程获得的盈利。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Taxes",
|
||||
"alias": "税金",
|
||||
"comment": "税金是指按照国家税法规定应计入建筑安装工程造价内的销项税额。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Equipment_Cost",
|
||||
"alias": "设备费",
|
||||
"comment": "设备购置费是指为项目建设而购置或自制各种设备,并将设备运至施工现场指定位置所支出的费用。包括设备费和设备运杂费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "B_Supply_Equipment_Excluding_Tax_Price",
|
||||
"alias": "乙供设备不含税价",
|
||||
"comment": "设备费中,供货方为乙供设备,不含税价",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "A_Supply_Equipment_Tax_Price",
|
||||
"alias": "甲供设备含税价",
|
||||
"comment": "设备费中,供货方为甲供设备,含税价",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Installation_Cost",
|
||||
"alias": "安装费",
|
||||
"comment": "安装费包含定额直接费、措施费、间接费、利润、税金和一笔性费用。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Main_Material_Cost",
|
||||
"alias": "主材费",
|
||||
"comment": "主材费指装置性材料费",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Total",
|
||||
"alias": "总价,总计,总体费用,总的费用",
|
||||
"comment": "总计包含安装费、主材费、设备费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Sum",
|
||||
"alias": "合计,合价",
|
||||
"comment": "项目划分合价,分部分项费用,项目划分费用。合计包含安装费和主材费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Path",
|
||||
"alias": "路径,项目划分全路径",
|
||||
"comment": "项目划分层级全路径",
|
||||
"type": "VARCHAR"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
{
|
||||
"Table": [
|
||||
{
|
||||
"name": "ProjectProperties",
|
||||
"alias": "",
|
||||
"comment": "工程属性表是用于存储整个工程的重要属性,访问该表都是为了通过属性名查找属性值。通常属性值有工程信息、工程属性、技经参数,表中包含工程总投资、工程总费用,工程主要费用,工程技经参数等。查询示例: SELECT Value FROM ProjectProperties WHERE Name = 'findname'。",
|
||||
"fields": [
|
||||
{
|
||||
"name": "Name\n",
|
||||
"alias": "名称、属性、属性名称、字段、字段名称、变量、参数,属性名",
|
||||
"comment": "属性的唯一标识",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Value",
|
||||
"alias": "值、变量值、参数值、数值,属性值",
|
||||
"comment": "属性对应的实际值",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Type",
|
||||
"alias": "类型、变量类型、数值类型,属性类型",
|
||||
"comment": "属性变量的类型",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Unit",
|
||||
"alias": "单位",
|
||||
"comment": "单位",
|
||||
"type": "VARCHAR"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,335 +0,0 @@
|
||||
{
|
||||
"Table": [
|
||||
{
|
||||
"name": "ProjectQuantities",
|
||||
"alias": "",
|
||||
"comment": "工程量表是项目划分下工程量,包含定额、主材、设备、一笔性费用。查询示例: SELECT BudgetPrice FROM ProjectQuantities WHERE Name = 'findname'。",
|
||||
"fields": [
|
||||
{
|
||||
"name": "Id",
|
||||
"alias": "id",
|
||||
"comment": "消耗量id,工程量id",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "ParentId",
|
||||
"alias": "父级id,父id",
|
||||
"comment": "父级id",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "ProjectDivisionId",
|
||||
"alias": "项目划分id,项目id",
|
||||
"comment": "父级项目划分id",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "Quantity",
|
||||
"alias": "个数,数量,数目",
|
||||
"comment": "数量,消耗量数量,工程量数量,主材数量,定额数量,设备数量,项目划分单位",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "FeatureSegment",
|
||||
"alias": "特征段",
|
||||
"comment": "线路特征段",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "ParentQuantity",
|
||||
"alias": "父级个数,父级数量",
|
||||
"comment": "父级id的数量",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Name",
|
||||
"alias": "名称",
|
||||
"comment": "项目名称,工程量名称,消耗量名称,主材名称,定额名称,设备名称,材料名称",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Encoding",
|
||||
"alias": "编码,译码",
|
||||
"comment": "编码,定额编码,主材编码,设备编码",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "SpecificationModel",
|
||||
"alias": "规格型号",
|
||||
"comment": "规格型号,主材规格型号,设备规格型号",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Unit",
|
||||
"alias": "单位",
|
||||
"comment": "单位,主材单位,定额单位,设备单位,项目划分单位",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "BasePrice",
|
||||
"alias": "基价",
|
||||
"comment": "定额基价",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "LaborCost",
|
||||
"alias": "人工费",
|
||||
"comment": "定额人工费",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "MaterialCost",
|
||||
"alias": "材料费",
|
||||
"comment": "定额材料费",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "MachineryCost",
|
||||
"alias": "机械费",
|
||||
"comment": "定额机械费",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "QuotaCoefficient",
|
||||
"alias": "定额系数",
|
||||
"comment": "定额系数",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "LaborCoefficient",
|
||||
"alias": "人工系数",
|
||||
"comment": "定额人工系数",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "MaterialCoefficient",
|
||||
"alias": "材料系数",
|
||||
"comment": "定额材料系数",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "MechanicalCoefficient",
|
||||
"alias": "机械系数",
|
||||
"comment": "定额机械系数",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "ExpenseType",
|
||||
"alias": "费用类型",
|
||||
"comment": "费用类型,取值为取费、不取费",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "BudgetPrice",
|
||||
"alias": "预算价",
|
||||
"comment": "预算价",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "MarketPrice",
|
||||
"alias": "市场价",
|
||||
"comment": "间接费是指建筑安装工程的施工过程中,为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Supplier",
|
||||
"alias": "供货方",
|
||||
"comment": "供货方,设备供货方,主材供货方,取值为甲供、乙供",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Type",
|
||||
"alias": "类型",
|
||||
"comment": "工程量类型,取值定额、主材、设备、一笔性费用",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "QuotaRange",
|
||||
"alias": "定额范围",
|
||||
"comment": "定额范围,取值概算、预算",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "A_Supply_Material_Cost_Excluding_Tax",
|
||||
"alias": "甲供材料费不含税",
|
||||
"comment": "甲供材料费不含税",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "A_Supply_Material_Cost_Including_Tax",
|
||||
"alias": "甲供材料费含税",
|
||||
"comment": "甲供材料费含税",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "B_Supply_Material_Cost_Excluding_Tax",
|
||||
"alias": "乙供材料费不含税",
|
||||
"comment": "乙供材料费不含税",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "B_Supply_Material_Cost_Including_Tax",
|
||||
"alias": "乙供材料费含税",
|
||||
"comment": "乙供材料费含税",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "ScaffoldCalculation",
|
||||
"alias": "脚手架计取",
|
||||
"comment": "脚手架计取,取值计取、不计取",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Remarks",
|
||||
"alias": "说明,备注",
|
||||
"comment": "备注,说明",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "FeeCollectionTableName",
|
||||
"alias": "取费表",
|
||||
"comment": "项目划分的取费表,工程量的取费表",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Quota_Section_Name",
|
||||
"alias": "定额章节名称",
|
||||
"comment": "定额章节名称",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "ProfessionalType",
|
||||
"alias": "专业类型",
|
||||
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "split",
|
||||
"alias": "拆分",
|
||||
"comment": "是否为拆分材料,取值1为拆分,取值0为不拆分",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "Loss",
|
||||
"alias": "损耗",
|
||||
"comment": "损耗率,主材损耗率",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "SingleWeight",
|
||||
"alias": "单重",
|
||||
"comment": "单重,主材单重",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "LineWeight",
|
||||
"alias": "线重",
|
||||
"comment": "线重,主材线重",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "SupervisedMaterials",
|
||||
"alias": "监造物料",
|
||||
"comment": "监造物料,取值1为监造物料,取值0为非监造物料",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "EquipmentMaterials",
|
||||
"alias": "设备性材料",
|
||||
"comment": "设备性材料,取值1为设备性材料,取值0为主材",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "GrossWeight",
|
||||
"alias": "毛重",
|
||||
"comment": "毛重,主材毛重",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "TransportationType",
|
||||
"alias": "运输类型",
|
||||
"comment": "运输类型,主材运输类型",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "TransportationMiscellaneous",
|
||||
"alias": "运杂费率",
|
||||
"comment": "运杂费率,设备运杂费率",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "EquipmentType",
|
||||
"alias": "设备类型",
|
||||
"comment": "设备类型,取值为主要设备、普通设备",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "UnitPrice",
|
||||
"alias": "单价",
|
||||
"comment": "单价",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Market_Price_Excluding_Tax",
|
||||
"alias": "市场价不含税",
|
||||
"comment": "市场价不含税",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Market_Price_Including_Tax",
|
||||
"alias": "市场价含税",
|
||||
"comment": "市场价含税,设备含税价",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Budget_Price_Excluding_Tax",
|
||||
"alias": "预算价不含税",
|
||||
"comment": "预算价不含税",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Budget_Price_Including_Tax",
|
||||
"alias": "预算价含税",
|
||||
"comment": "预算价含税",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Unit_Price_Excluding_Tax",
|
||||
"alias": "单价不含税",
|
||||
"comment": "单价不含税,设备不含税价",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "GroupPrice",
|
||||
"alias": "分组合价",
|
||||
"comment": "分组合价",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Pump_Truck_Pouring",
|
||||
"alias": "泵车浇制",
|
||||
"comment": "泵车浇制,取值1为泵车浇制,取值0为非泵车浇制",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "On_Site_Preparation",
|
||||
"alias": "现场制备",
|
||||
"comment": "现场制备,取值1为现场制备,取值0为非现场制备",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "Clear_Water_Concrete",
|
||||
"alias": "清水混凝土",
|
||||
"comment": "清水混凝土,取值1为清水混凝土,取值0为非清水混凝土",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "Debugging_Fee_Calculation",
|
||||
"alias": "调试费计取",
|
||||
"comment": "调试费计取,取值计取、不计取",
|
||||
"type": "VARCHAR"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
@@ -1,101 +0,0 @@
|
||||
{
|
||||
"Table": [
|
||||
{
|
||||
"name": "TotalCalculateTable",
|
||||
"alias": "",
|
||||
"comment": "总算表也被称为“工程总费用”、“工程费用”。其中包含本地工程、辅助设施工程、编制基准期价差、设备购置费、其他费用、基本预备费、特殊费用、工程静态投资、动态费用、价差预备费、建设期贷款利息、工程动态投资、可抵扣增值税额。查询示例: SELECT Amount FROM TotalCalculateTable WHERE Name = 'findname'。",
|
||||
"fields": [
|
||||
{
|
||||
"name": "Id",
|
||||
"alias": "项目id,id,费用id",
|
||||
"comment": "费用项目id",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "ParentId",
|
||||
"alias": "父级id,父id",
|
||||
"comment": "费用项目父级id",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "Level",
|
||||
"alias": "层号,层级,层编号",
|
||||
"comment": "层级编号,从1开始",
|
||||
"type": "INT"
|
||||
},
|
||||
{
|
||||
"name": "Name",
|
||||
"alias": "名称,费用名,项目名",
|
||||
"comment": "费用名称,项目名称",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "SerialNumber",
|
||||
"alias": "序号",
|
||||
"comment": "工程费用序号",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Code",
|
||||
"alias": "编号,代号,代码",
|
||||
"comment": "费用代码",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Rate",
|
||||
"alias": "费用利率,费率",
|
||||
"comment": "费率",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Amount",
|
||||
"alias": "金额,价格",
|
||||
"comment": "合计费",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "WBS_Code",
|
||||
"alias": "WBS编号,WBS编码",
|
||||
"comment": "费用编码",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Path",
|
||||
"alias": "路径,费用全路径",
|
||||
"comment": "费用名称全路径",
|
||||
"type": "VARCHAR"
|
||||
},
|
||||
{
|
||||
"name": "Amount_InstallationCost",
|
||||
"alias": "安装金额,金额_安装费,安装价格",
|
||||
"comment": "安装费金额",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Amount_EquipmentCost",
|
||||
"alias": "金额_设备费,设备金额,设备价格",
|
||||
"comment": "设备费金额",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Amount_OtherCost",
|
||||
"alias": "其他费用金额,金额_其他费,其他费用价格",
|
||||
"comment": "其他费金额",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Amount_Total",
|
||||
"alias": "总的金额,金额_占总计,总体金额",
|
||||
"comment": "合计费占总计",
|
||||
"type": "REAL"
|
||||
},
|
||||
{
|
||||
"name": "Amount_UnitInvestment",
|
||||
"alias": "金额_单位投资,合计投资金额",
|
||||
"comment": "合计费单位投资",
|
||||
"type": "REAL"
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
+5
-3
@@ -11,22 +11,24 @@ from fastapi.responses import RedirectResponse
|
||||
from app.api.routers.chat import chat_router
|
||||
from app.api.routers.upload import file_upload_router
|
||||
from app.api.routers.app import v1_router
|
||||
from app.settings import init_settings
|
||||
|
||||
from app.settings import init_settings,init_ProjectInfo
|
||||
from app.observability import init_observability
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
from phoenix.trace import using_project
|
||||
|
||||
|
||||
logger = logging.getLogger("uvicorn")
|
||||
|
||||
|
||||
usPrj = using_project(os.getenv("PHOENIX_PROJECT_NAME"))
|
||||
usPrj.__enter__()
|
||||
|
||||
import nest_asyncio
|
||||
nest_asyncio.apply()
|
||||
|
||||
init_settings()
|
||||
init_observability()
|
||||
|
||||
init_ProjectInfo()
|
||||
app = FastAPI()
|
||||
|
||||
environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
from llama_index.llms.dashscope import DashScope
|
||||
from llama_index.core.base.llms.types import LLMMetadata
|
||||
|
||||
class DashScopeGenerationModels:
|
||||
"""DashScope Qwen serial models."""
|
||||
|
||||
QWEN_TURBO = "qwen-turbo"
|
||||
QWEN_PLUS = "qwen-plus"
|
||||
QWEN_MAX = "qwen-max"
|
||||
QWEN_MAX_1201 = "qwen-max-1201"
|
||||
QWEN_MAX_LONGCONTEXT = "qwen-max-longcontext"
|
||||
QWEN2_MATH_72B_INSTRUCT = 'qwen2-math-72b-instruct',
|
||||
QWEN2_72B = 'qwen2-72b-instruct'
|
||||
|
||||
DASHSCOPE_MODEL_META = {
|
||||
DashScopeGenerationModels.QWEN_TURBO: {
|
||||
"context_window": 1024 * 8,
|
||||
"num_output": 1024 * 8,
|
||||
"is_chat_model": True,
|
||||
},
|
||||
DashScopeGenerationModels.QWEN_PLUS: {
|
||||
"context_window": 1024 * 32,
|
||||
"num_output": 1024 * 32,
|
||||
"is_chat_model": True,
|
||||
},
|
||||
DashScopeGenerationModels.QWEN_MAX: {
|
||||
"context_window": 1024 * 8,
|
||||
"num_output": 1024 * 8,
|
||||
"is_chat_model": True,
|
||||
},
|
||||
DashScopeGenerationModels.QWEN_MAX_1201: {
|
||||
"context_window": 1024 * 8,
|
||||
"num_output": 1024 * 8,
|
||||
"is_chat_model": True,
|
||||
},
|
||||
DashScopeGenerationModels.QWEN_MAX_LONGCONTEXT: {
|
||||
"context_window": 1024 * 30,
|
||||
"num_output": 1024 * 30,
|
||||
"is_chat_model": True,
|
||||
},
|
||||
DashScopeGenerationModels.QWEN2_MATH_72B_INSTRUCT: {
|
||||
"context_window": 1024 * 2,
|
||||
"num_output": 1024 * 8,
|
||||
"is_chat_model": True,
|
||||
},
|
||||
DashScopeGenerationModels.QWEN2_72B: {
|
||||
"context_window": 1024 * 2,
|
||||
"num_output": 1024 * 8,
|
||||
"is_chat_model": True,
|
||||
},
|
||||
}
|
||||
|
||||
|
||||
class CustomDashScope(DashScope):
|
||||
@property
|
||||
def metadata(self) -> LLMMetadata:
|
||||
DASHSCOPE_MODEL_META[self.model_name]["num_output"] = (
|
||||
self.max_tokens or DASHSCOPE_MODEL_META[self.model_name]["num_output"]
|
||||
)
|
||||
return LLMMetadata(
|
||||
model_name=self.model_name, **DASHSCOPE_MODEL_META[self.model_name]
|
||||
)
|
||||
|
||||
|
||||
Generated
+5323
-963
File diff suppressed because it is too large
Load Diff
+55
-30
@@ -10,44 +10,54 @@ readme = "README.md"
|
||||
generate = "app.engine.generate:generate_datasource"
|
||||
|
||||
[tool.poetry.dependencies]
|
||||
python = "^3.11,<3.12"
|
||||
fastapi = "^0.110.3"
|
||||
python-dotenv = "^1.0.0"
|
||||
python = "^3.11,<3.13"
|
||||
fastapi = "0.110.3"
|
||||
python-dotenv = "^1.0.1"
|
||||
aiostream = "^0.6.2"
|
||||
llama-index = "0.10.63"
|
||||
cachetools = "^5.3.3"
|
||||
cachetools = "^5.5.0"
|
||||
protobuf = "4.25.4"
|
||||
nltk = "^3.9.1"
|
||||
jieba = "^0.42.1"
|
||||
|
||||
#arize-phoenix = "^4.12.0"
|
||||
openinference-instrumentation-llama-index="2.2.3"
|
||||
llama-index-callbacks-arize-phoenix = "^0.1.4"
|
||||
llama-index-llms-dashscope = "^0.1.2"
|
||||
llama-index-embeddings-dashscope = "^0.1.4"
|
||||
llama-index-postprocessor-dashscope-rerank-custom = "0.1.0"
|
||||
xinference = "^0.14.1"
|
||||
xinference-client = "^0.14.1"
|
||||
llama-index-llms-xinference = "^0.1.2"
|
||||
qdrant-client="^1.10.1"
|
||||
llama-index-vector-stores-qdrant = "^0.2.14"
|
||||
chroma="^0.2.0"
|
||||
llama-index-vector-stores-chroma = "^0.1.10"
|
||||
llama-index-readers-json = "^0.1.5"
|
||||
llama-index-retrievers-bm25 = "^0.2.2"
|
||||
llama-index-experimental = "^0.2.0"
|
||||
transformers = "^4.43.0"
|
||||
|
||||
duckduckgo_search = "^6.2.6"
|
||||
#arize-phoenix = "^4.12.0"
|
||||
openinference-instrumentation-llama-index="^3.0.2"
|
||||
llama-index = "^0.11.7"
|
||||
llama-index-core = "^0.11.7"
|
||||
llama-index-callbacks-arize-phoenix = "^0.2.1"
|
||||
llama-index-llms-dashscope = "^0.2.0"
|
||||
llama-index-embeddings-dashscope = "^0.2.1"
|
||||
#llama-index-postprocessor-dashscope-rerank = "^0.2.0"
|
||||
|
||||
llama-index-llms-ollama = "^0.3.1"
|
||||
llama-index-embeddings-ollama = "^0.3.0"
|
||||
|
||||
xinference = "^0.15.0"
|
||||
xinference-client = "^0.15.0"
|
||||
llama-index-llms-xinference = "^0.2.1"
|
||||
llama-index-embeddings-xinference = "^0.1.0"
|
||||
llama-index-postprocessor-xinference-rerank = "^0.1.0"
|
||||
qdrant-client="^1.11.0"
|
||||
llama-index-vector-stores-qdrant = "^0.3.0"
|
||||
chroma="^0.2.0"
|
||||
llama-index-vector-stores-chroma = "^0.2.0"
|
||||
llama-index-readers-json = "^0.2.0"
|
||||
llama-index-retrievers-bm25 = "^0.3.0"
|
||||
llama-index-experimental = "^0.3.0"
|
||||
|
||||
|
||||
duckduckgo_search = "^6.2.10"
|
||||
|
||||
[tool.poetry.dependencies.uvicorn]
|
||||
extras = [ "standard" ]
|
||||
version = "^0.23.2"
|
||||
version = "^0.30.6"
|
||||
|
||||
[tool.poetry.dependencies.llama-index-readers-database]
|
||||
version = "^0.1.3"
|
||||
version = "^0.2.0"
|
||||
|
||||
[tool.poetry.dependencies.pymysql]
|
||||
version = "^1.1.0"
|
||||
version = "^1.1.1"
|
||||
extras = [ "rsa" ]
|
||||
|
||||
#[tool.poetry.dependencies.psycopg2]
|
||||
@@ -60,15 +70,30 @@ extras = [ "rsa" ]
|
||||
version = "^0.8"
|
||||
|
||||
[tool.poetry.dependencies.e2b_code_interpreter]
|
||||
version = "0.0.7"
|
||||
version = "^0.0.7"
|
||||
|
||||
|
||||
|
||||
[[tool.poetry.source]]
|
||||
name = "mirrors"
|
||||
url = "https://pypi.tuna.tsinghua.edu.cn/simple/"
|
||||
priority = "default"
|
||||
name = "ali"
|
||||
url = "https://mirrors.aliyun.com/pypi/simple/"
|
||||
priority = "primary"
|
||||
|
||||
|
||||
[[tool.poetry.source]]
|
||||
name = "tencent"
|
||||
url = "https://mirrors.cloud.tencent.com/pypi/simple/"
|
||||
priority = "primary"
|
||||
|
||||
|
||||
[[tool.poetry.source]]
|
||||
name = "tsinghua"
|
||||
url = "https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple/"
|
||||
priority = "primary"
|
||||
|
||||
|
||||
[build-system]
|
||||
requires = [ "poetry-core" ]
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
build-backend = "poetry.core.masonry.api"
|
||||
|
||||
tiktoken
|
||||
@@ -8,7 +8,8 @@ from llama_index.core import VectorStoreIndex, SQLDatabase
|
||||
from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
|
||||
from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
|
||||
from app.api.routers.chat import generate_filters
|
||||
from app.engine import get_index, makeDescriptionByEngine
|
||||
from app.engine import get_index
|
||||
from app.engine.engine import makeDescriptionByEngine
|
||||
from app.engine.loaders.db import CustomDatabaseReader
|
||||
from app.engine.vectordb import get_vector_store
|
||||
from app.observability import init_observability
|
||||
|
||||
@@ -7,7 +7,8 @@ from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
|
||||
from sqlalchemy import create_engine
|
||||
|
||||
from app.api.routers.chat import generate_filters
|
||||
from app.engine import get_index, makeDescriptionByEngine
|
||||
from app.engine import get_index
|
||||
from app.engine.engine import makeDescriptionByEngine
|
||||
from app.engine.vectordb import get_vector_store
|
||||
from app.observability import init_observability
|
||||
from app.settings import init_settings
|
||||
|
||||
@@ -0,0 +1,202 @@
|
||||
[
|
||||
{
|
||||
"question": "人工费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "临时设施费的费率是多少?",
|
||||
"answer": "费率是6.3500000000"
|
||||
},
|
||||
{
|
||||
"question": "乙供装置性材料费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "直接费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "甲供装置性材料费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "直接费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "夜间施工增加费的费率是多少?",
|
||||
"answer": "费率是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "装置性材料费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "冬雨季施工增加费的费率是多少?",
|
||||
"answer": "费率是3.5700000000"
|
||||
},
|
||||
{
|
||||
"question": "材料费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "机械价差的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "规费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "直接工程费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "安全文明施工费的费率是多少?",
|
||||
"answer": "费率是3.5500000000"
|
||||
},
|
||||
{
|
||||
"question": "企业管理费的费率是多少?",
|
||||
"answer": "费率是35.7600000000"
|
||||
},
|
||||
{
|
||||
"question": "税金的费率是多少?",
|
||||
"answer": "费率是9.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "直接费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "安全文明施工费的费率是多少?",
|
||||
"answer": "费率是3.5500000000"
|
||||
},
|
||||
{
|
||||
"question": "合计的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "税金的费率是多少?",
|
||||
"answer": "费率是9.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "安全文明施工费的费率是多少?",
|
||||
"answer": "费率是3.5500000000"
|
||||
},
|
||||
{
|
||||
"question": "直接工程费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "税金的费率是多少?",
|
||||
"answer": "费率是9.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "社会保险费的费率是多少?",
|
||||
"answer": "费率是15.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "间接费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "合计的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "临时设施费的费率是多少?",
|
||||
"answer": "费率是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "利润的费率是多少?",
|
||||
"answer": "费率是5.2400000000"
|
||||
},
|
||||
{
|
||||
"question": "税金的费率是多少?",
|
||||
"answer": "费率是9.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "社会保险费的费率是多少?",
|
||||
"answer": "费率是15.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "直接工程费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "乙供设备不含税价的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "企业管理费的费率是多少?",
|
||||
"answer": "费率是17.1300000000"
|
||||
},
|
||||
{
|
||||
"question": "企业管理费的费率是多少?",
|
||||
"answer": "费率是35.7600000000"
|
||||
},
|
||||
{
|
||||
"question": "夜间施工增加费的费率是多少?",
|
||||
"answer": "费率是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "直接费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "夜间施工增加费的费率是多少?",
|
||||
"answer": "费率是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "甲供设备含税价的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "施工机械使用费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "安全文明施工费的费率是多少?",
|
||||
"answer": "费率是3.5500000000"
|
||||
},
|
||||
{
|
||||
"question": "定额直接费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "主材费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "直接费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "施工企业配合调试费的费率是多少?",
|
||||
"answer": "费率是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "施工机械使用费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "临时设施费的费率是多少?",
|
||||
"answer": "费率是6.3500000000"
|
||||
},
|
||||
{
|
||||
"question": "施工工具用具使用费的费率是多少?",
|
||||
"answer": "费率是3.8200000000"
|
||||
},
|
||||
{
|
||||
"question": "措施费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "材料价差的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "措施费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,202 @@
|
||||
[
|
||||
{
|
||||
"question": "前期工作管理费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "特种设备安全监测费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "工程监理费的金额是多少?",
|
||||
"answer": "金额是131009.9200000000"
|
||||
},
|
||||
{
|
||||
"question": "水土保持方案编审费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "生产准备费的金额是多少?",
|
||||
"answer": "金额是472373669.4635599852"
|
||||
},
|
||||
{
|
||||
"question": "电力工程技术经济标准编制费的金额是多少?",
|
||||
"answer": "金额是84352440.9756360054"
|
||||
},
|
||||
{
|
||||
"question": "项目建设技术服务费的金额是多少?",
|
||||
"answer": "金额是16855957065.4302005768"
|
||||
},
|
||||
{
|
||||
"question": "工程保险费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "其他的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "施工图文件评审费的金额是多少?",
|
||||
"answer": "金额是24940.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "节能评估费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "桩基检测费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "项目前期工作费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "其他的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "项目法人管理费的金额是多少?",
|
||||
"answer": "金额是986923559.4149370193"
|
||||
},
|
||||
{
|
||||
"question": "专业爆破服务费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "节能评估费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "用地预审费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "设备材料监造费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "环境监测及环境保护验收费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "环境监测及环境保护验收费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "设备材料监造费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "勘察费的金额是多少?",
|
||||
"answer": "金额是12122154260.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "项目法人管理费的金额是多少?",
|
||||
"answer": "金额是986923559.4149370193"
|
||||
},
|
||||
{
|
||||
"question": "社会稳定风险评估费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "勘察费的金额是多少?",
|
||||
"answer": "金额是12122154260.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "环境影响评价费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "水土保持方案编审费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "使用林地可行性研究费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "环境监测及环境保护验收费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "桩基检测费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "设计费的金额是多少?",
|
||||
"answer": "金额是4042055949.4299998283"
|
||||
},
|
||||
{
|
||||
"question": "环境监测及环境保护验收费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "建设场地征用及清理费的金额是多少?",
|
||||
"answer": "金额是16831284.2287110016"
|
||||
},
|
||||
{
|
||||
"question": "施工图文件评审费的金额是多少?",
|
||||
"answer": "金额是24940.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "项目后评价费的金额是多少?",
|
||||
"answer": "金额是421762204.8781780005"
|
||||
},
|
||||
{
|
||||
"question": "水土保持方案编审费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "勘察设计费的金额是多少?",
|
||||
"answer": "金额是16164210209.4300003052"
|
||||
},
|
||||
{
|
||||
"question": "前期工作管理费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "节能评估费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "初步设计文件评审费的金额是多少?",
|
||||
"answer": "金额是18560.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "特种设备安全监测费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "初步设计文件评审费的金额是多少?",
|
||||
"answer": "金额是18560.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "桩基检测费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "矿产压覆评估费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "设计费的金额是多少?",
|
||||
"answer": "金额是4042055949.4299998283"
|
||||
},
|
||||
{
|
||||
"question": "水土保持方案编审费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "电力工程技术经济标准编制费的金额是多少?",
|
||||
"answer": "金额是84352440.9756360054"
|
||||
},
|
||||
{
|
||||
"question": "桩基检测费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "矿产压覆评估费用的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,202 @@
|
||||
[
|
||||
{
|
||||
"question": "新增项目名称的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "预制基础的合价是多少?",
|
||||
"answer": "合价是40567.2639480000"
|
||||
},
|
||||
{
|
||||
"question": "绝缘子串及金具安装的合价是多少?",
|
||||
"answer": "合价是2897171.9878110001"
|
||||
},
|
||||
{
|
||||
"question": "杆塔工程材料工地运输的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "基础防护的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "护坡、挡土墙及排洪沟土石方工程的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "新增项目名称的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "(1)拆除后能利用的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "地基处理的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "灌注桩基础的合价是多少?",
|
||||
"answer": "合价是43466660.0544390008"
|
||||
},
|
||||
{
|
||||
"question": "(1)拆除后能利用的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "悬垂绝缘子串及金具安装的合价是多少?",
|
||||
"answer": "合价是1251465.0340440001"
|
||||
},
|
||||
{
|
||||
"question": "护坡、挡土墙及排洪沟土石方工程的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "附件安装工程的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "导地线跨越架设的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "辅助工程的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "新增项目名称的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "绝缘子串及金具安装的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "护坡、挡土墙及排洪沟砌筑的合价是多少?",
|
||||
"answer": "合价是709931.9013930000"
|
||||
},
|
||||
{
|
||||
"question": "锚杆基础的合价是多少?",
|
||||
"answer": "合价是15344967.9002950005"
|
||||
},
|
||||
{
|
||||
"question": "建筑工程的合价是多少?",
|
||||
"answer": "合价是25411.2790780000"
|
||||
},
|
||||
{
|
||||
"question": "辅助工程的合价是多少?",
|
||||
"answer": "合价是1046253.4135240000"
|
||||
},
|
||||
{
|
||||
"question": "导地线跨越架设的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "电缆工程的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "输、送电线路试运的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "基础土石方工程的合价是多少?",
|
||||
"answer": "合价是32872843180.7429008484"
|
||||
},
|
||||
{
|
||||
"question": "基础永久性围堰的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "基础永久性围堰的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "混凝土及钢筋混凝土结构的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "输、送电线路试运的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "混合结构的合价是多少?",
|
||||
"answer": "合价是16967.5193850000"
|
||||
},
|
||||
{
|
||||
"question": "杆塔组立的合价是多少?",
|
||||
"answer": "合价是2253906.0859830002"
|
||||
},
|
||||
{
|
||||
"question": "附件安装工程的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "接地工程材料工地运输的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "新增项目名称的合价是多少?",
|
||||
"answer": "合价是27148.0310160000"
|
||||
},
|
||||
{
|
||||
"question": "导地线架设的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "护坡、挡土墙及排洪沟的合价是多少?",
|
||||
"answer": "合价是709931.9013930000"
|
||||
},
|
||||
{
|
||||
"question": "(1)拆除后能利用的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "基础永久性围堰砌筑的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "(2)拆除后不能利用的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "安装工程的合价是多少?",
|
||||
"answer": "合价是65324.9496330000"
|
||||
},
|
||||
{
|
||||
"question": "尖峰、施工基面土石方工程的合价是多少?",
|
||||
"answer": "合价是325205.4178770000"
|
||||
},
|
||||
{
|
||||
"question": "架线工程的合价是多少?",
|
||||
"answer": "合价是4844399648.0778598785"
|
||||
},
|
||||
{
|
||||
"question": "杆塔组立的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "架线工程材料工地运输的合价是多少?",
|
||||
"answer": "合价是2088570123.2409000397"
|
||||
},
|
||||
{
|
||||
"question": "导地线架设的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "耐张绝缘子串及金具安装的合价是多少?",
|
||||
"answer": "合价是1645706.9537680000"
|
||||
},
|
||||
{
|
||||
"question": "架线工程材料工地运输的合价是多少?",
|
||||
"answer": "合价是2088570123.2409000397"
|
||||
},
|
||||
{
|
||||
"question": "其他基础的合价是多少?",
|
||||
"answer": "合价是3839666.7656879998"
|
||||
},
|
||||
{
|
||||
"question": "架线工程材料工地运输的合价是多少?",
|
||||
"answer": "合价是0E-10"
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,202 @@
|
||||
[
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是440877984.9458540082"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(拆除)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是1086586.9018659999"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(拆除)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是51486.7898090000"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是3321.8139230000"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是78005.0340730000"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是3535892767.0972299576"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是24045.2334060000"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是336253.7482950000"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是142270.1346780000"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是61049.8665780000"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(拆除)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是933061.7795919999"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是182949.5997350000"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(余物清理)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(拆除)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是21220645.1637400016"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是933061.7795919999"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是2501470269.7231497765"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是51486.7898090000"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是55265.9111100000"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是442897633.6273120046"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(拆除)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是1057484.3306960000"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是442897633.6273120046"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是21220645.1637400016"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(余物清理)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是336253.7482950000"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "的直接费是多少?",
|
||||
"answer": "直接费是61049.8665780000"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(余物清理)(1)的直接费是多少?",
|
||||
"answer": "直接费是61049.8665780000"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是24045.2334060000"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(拆除)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(拆除)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(余物清理)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(拆除)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(拆除)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是659466.5955000001"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表(拆除)的直接费是多少?",
|
||||
"answer": "直接费是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "线路取费表的直接费是多少?",
|
||||
"answer": "直接费是2501470269.7231497765"
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,202 @@
|
||||
[
|
||||
{
|
||||
"question": "降阻剂_数量的属性值是多少?",
|
||||
"answer": "属性值是f"
|
||||
},
|
||||
{
|
||||
"question": "导线2_单位单价的属性值是多少?",
|
||||
"answer": "属性值是9"
|
||||
},
|
||||
{
|
||||
"question": "导线_单公里用量的属性值是多少?",
|
||||
"answer": "属性值是36"
|
||||
},
|
||||
{
|
||||
"question": "线路参数_导地线防震措施的属性值是多少?",
|
||||
"answer": "属性值是457"
|
||||
},
|
||||
{
|
||||
"question": "合成绝缘子_数量的属性值是多少?",
|
||||
"answer": "属性值是5"
|
||||
},
|
||||
{
|
||||
"question": "基础垫层的属性值是多少?",
|
||||
"answer": "属性值是"
|
||||
},
|
||||
{
|
||||
"question": "其中:基础护壁用量的属性值是多少?",
|
||||
"answer": "属性值是74394.212"
|
||||
},
|
||||
{
|
||||
"question": "铺石加混凝土的属性值是多少?",
|
||||
"answer": "属性值是0.0"
|
||||
},
|
||||
{
|
||||
"question": "导线用量(西北)的属性值是多少?",
|
||||
"answer": "属性值是-795976.0855"
|
||||
},
|
||||
{
|
||||
"question": "导线单公里用量(西北)的属性值是多少?",
|
||||
"answer": "属性值是-159195.2171"
|
||||
},
|
||||
{
|
||||
"question": "灰土垫层单公里用量(西北)的属性值是多少?",
|
||||
"answer": "属性值是8.0"
|
||||
},
|
||||
{
|
||||
"question": "地线瓷绝缘子单公里用量(西北)的属性值是多少?",
|
||||
"answer": "属性值是738.253"
|
||||
},
|
||||
{
|
||||
"question": "地形条件_高山的属性值是多少?",
|
||||
"answer": "属性值是7"
|
||||
},
|
||||
{
|
||||
"question": "流砂坑比例的属性值是多少?",
|
||||
"answer": "属性值是0.001"
|
||||
},
|
||||
{
|
||||
"question": "碎石_数量的属性值是多少?",
|
||||
"answer": "属性值是12"
|
||||
},
|
||||
{
|
||||
"question": "线路参数_导地线防震措施的属性值是多少?",
|
||||
"answer": "属性值是457"
|
||||
},
|
||||
{
|
||||
"question": "灰土垫层的属性值是多少?",
|
||||
"answer": "属性值是40.0"
|
||||
},
|
||||
{
|
||||
"question": "交叉跨越_弱电线路的属性值是多少?",
|
||||
"answer": "属性值是45"
|
||||
},
|
||||
{
|
||||
"question": "地线1_根数的属性值是多少?",
|
||||
"answer": "属性值是12"
|
||||
},
|
||||
{
|
||||
"question": "土质比例_岩石(人凿)的属性值是多少?",
|
||||
"answer": "属性值是49"
|
||||
},
|
||||
{
|
||||
"question": "耐张混凝土杆基数的属性值是多少?",
|
||||
"answer": "属性值是26.0"
|
||||
},
|
||||
{
|
||||
"question": "设计单位的属性值是多少?",
|
||||
"answer": "属性值是3"
|
||||
},
|
||||
{
|
||||
"question": "接地钢的属性值是多少?",
|
||||
"answer": "属性值是"
|
||||
},
|
||||
{
|
||||
"question": "间隔棒_单公里用量的属性值是多少?",
|
||||
"answer": "属性值是r"
|
||||
},
|
||||
{
|
||||
"question": "导线其中:跳线和导线弧垂单公里用量(西北)的属性值是多少?",
|
||||
"answer": "属性值是159203.0171"
|
||||
},
|
||||
{
|
||||
"question": "桩基础的属性值是多少?",
|
||||
"answer": "属性值是310.0"
|
||||
},
|
||||
{
|
||||
"question": "降阻剂的属性值是多少?",
|
||||
"answer": "属性值是"
|
||||
},
|
||||
{
|
||||
"question": "可抵扣增值税(万元)的属性值是多少?",
|
||||
"answer": "属性值是2005241.808822"
|
||||
},
|
||||
{
|
||||
"question": "主要技术经济指标2的属性值是多少?",
|
||||
"answer": "属性值是"
|
||||
},
|
||||
{
|
||||
"question": "合成绝缘子_数量的属性值是多少?",
|
||||
"answer": "属性值是5"
|
||||
},
|
||||
{
|
||||
"question": "土质比例_水坑的属性值是多少?",
|
||||
"answer": "属性值是47"
|
||||
},
|
||||
{
|
||||
"question": "基础_插入式的属性值是多少?",
|
||||
"answer": "属性值是3"
|
||||
},
|
||||
{
|
||||
"question": "耐张角钢塔比例的属性值是多少?",
|
||||
"answer": "属性值是250%"
|
||||
},
|
||||
{
|
||||
"question": "地线的属性值是多少?",
|
||||
"answer": "属性值是"
|
||||
},
|
||||
{
|
||||
"question": "回路数的属性值是多少?",
|
||||
"answer": "属性值是三回"
|
||||
},
|
||||
{
|
||||
"question": "导线其中:跳线和导线弧垂用量的属性值是多少?",
|
||||
"answer": "属性值是796015.0855"
|
||||
},
|
||||
{
|
||||
"question": "OPGW用量(西北)的属性值是多少?",
|
||||
"answer": "属性值是2904.737"
|
||||
},
|
||||
{
|
||||
"question": "现浇混凝土_单公里用量的属性值是多少?",
|
||||
"answer": "属性值是22"
|
||||
},
|
||||
{
|
||||
"question": "架线工程费用(万元)(含价差)的属性值是多少?",
|
||||
"answer": "属性值是3203726.0"
|
||||
},
|
||||
{
|
||||
"question": "耐张钢管塔比例的属性值是多少?",
|
||||
"answer": "属性值是300%"
|
||||
},
|
||||
{
|
||||
"question": "单公里土石方量_基面的属性值是多少?",
|
||||
"answer": "属性值是8*8"
|
||||
},
|
||||
{
|
||||
"question": "地线2的属性值是多少?",
|
||||
"answer": "属性值是"
|
||||
},
|
||||
{
|
||||
"question": "降阻剂的属性值是多少?",
|
||||
"answer": "属性值是"
|
||||
},
|
||||
{
|
||||
"question": "土质比例的属性值是多少?",
|
||||
"answer": "属性值是"
|
||||
},
|
||||
{
|
||||
"question": "地线1_单位单价的属性值是多少?",
|
||||
"answer": "属性值是113"
|
||||
},
|
||||
{
|
||||
"question": "绝缘子串型式_悬垂串的属性值是多少?",
|
||||
"answer": "属性值是48"
|
||||
},
|
||||
{
|
||||
"question": "基坑土石方量(西北)的属性值是多少?",
|
||||
"answer": "属性值是405403506.156"
|
||||
},
|
||||
{
|
||||
"question": "基坑坚土的属性值是多少?",
|
||||
"answer": "属性值是25585167.713"
|
||||
},
|
||||
{
|
||||
"question": "基坑普通土的属性值是多少?",
|
||||
"answer": "属性值是313873965.334"
|
||||
},
|
||||
{
|
||||
"question": "瓷绝缘子单公里用量(西北)的属性值是多少?",
|
||||
"answer": "属性值是201.0"
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,202 @@
|
||||
[
|
||||
{
|
||||
"question": "电杆坑、塔坑、拉线坑人工挖方(或爆破)及回填 水坑 坑深2.0m以内的编码是多少?",
|
||||
"answer": "编码是YX2-72"
|
||||
},
|
||||
{
|
||||
"question": "钢筋加工及制作的编码是多少?",
|
||||
"answer": "编码是YX3-43"
|
||||
},
|
||||
{
|
||||
"question": "船舶运输 线材 每件重400kg以内 运输的编码是多少?",
|
||||
"answer": "编码是YX1-132"
|
||||
},
|
||||
{
|
||||
"question": "船舶运输 钢管塔材 运输的编码是多少?",
|
||||
"answer": "编码是YX1-152"
|
||||
},
|
||||
{
|
||||
"question": "碎石的编码是多少?",
|
||||
"answer": "编码是C10020103"
|
||||
},
|
||||
{
|
||||
"question": "混凝土(保护帽)的编码是多少?",
|
||||
"answer": "编码是ZH1001"
|
||||
},
|
||||
{
|
||||
"question": "船舶运输 金具、绝缘子、零星钢材 运输的编码是多少?",
|
||||
"answer": "编码是YX1-144"
|
||||
},
|
||||
{
|
||||
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
|
||||
"answer": "编码是YX1-1"
|
||||
},
|
||||
{
|
||||
"question": "船舶运输 线材 每件重1000kg以内 运输的编码是多少?",
|
||||
"answer": "编码是YX1-136"
|
||||
},
|
||||
{
|
||||
"question": "混凝土搅拌及浇制 每基基础联系梁混凝土量20m³以内的编码是多少?",
|
||||
"answer": "编码是YX3-69"
|
||||
},
|
||||
{
|
||||
"question": "索道运输 循环式 塔材 荷载1t以内 装卸的编码是多少?",
|
||||
"answer": "编码是YX1-185"
|
||||
},
|
||||
{
|
||||
"question": "人力运输 混凝土预制品 每件重100kg以内的编码是多少?",
|
||||
"answer": "编码是YX1-6"
|
||||
},
|
||||
{
|
||||
"question": "船舶运输 混凝土杆 每件重1500kg以上 运输的编码是多少?",
|
||||
"answer": "编码是YX1-118"
|
||||
},
|
||||
{
|
||||
"question": "碎石的编码是多少?",
|
||||
"answer": "编码是C10020103"
|
||||
},
|
||||
{
|
||||
"question": "电杆坑、塔坑、拉线坑人工挖方(或爆破)及回填 泥水 坑深8.0m以上的编码是多少?",
|
||||
"answer": "编码是YX2-55"
|
||||
},
|
||||
{
|
||||
"question": "机械施工土方 场地平整的编码是多少?",
|
||||
"answer": "编码是GT1-1"
|
||||
},
|
||||
{
|
||||
"question": "汽车运输 混凝土预制品 每件重100kg以内 装卸的编码是多少?",
|
||||
"answer": "编码是YX1-69"
|
||||
},
|
||||
{
|
||||
"question": "汽车运输 其他建筑安装材料 运输的编码是多少?",
|
||||
"answer": "编码是YX1-108"
|
||||
},
|
||||
{
|
||||
"question": "钻孔灌注桩基础 混凝土搅拌及浇制 孔深10m以内的编码是多少?",
|
||||
"answer": "编码是YX3-171"
|
||||
},
|
||||
{
|
||||
"question": "线路复测及分坑 直线双杆及拉线塔的编码是多少?",
|
||||
"answer": "编码是YX2-3"
|
||||
},
|
||||
{
|
||||
"question": "氧化锌避雷器安装 35kV的编码是多少?",
|
||||
"answer": "编码是YX7-32"
|
||||
},
|
||||
{
|
||||
"question": "混凝土(保护帽)的编码是多少?",
|
||||
"answer": "编码是ZH1002"
|
||||
},
|
||||
{
|
||||
"question": "汽车运输 其他建筑安装材料 装卸的编码是多少?",
|
||||
"answer": "编码是YX1-107"
|
||||
},
|
||||
{
|
||||
"question": "船舶运输 混凝土杆 每件重500kg以内 装卸的编码是多少?",
|
||||
"answer": "编码是YX1-109"
|
||||
},
|
||||
{
|
||||
"question": "混凝土(保护帽)的编码是多少?",
|
||||
"answer": "编码是ZH1001"
|
||||
},
|
||||
{
|
||||
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
|
||||
"answer": "编码是YX1-1"
|
||||
},
|
||||
{
|
||||
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
|
||||
"answer": "编码是YX1-1"
|
||||
},
|
||||
{
|
||||
"question": "普通硅酸盐水泥的编码是多少?",
|
||||
"answer": "编码是C09010102"
|
||||
},
|
||||
{
|
||||
"question": "拖拉机运输 钢管塔材 运输的编码是多少?",
|
||||
"answer": "编码是YX1-44"
|
||||
},
|
||||
{
|
||||
"question": "尖峰及施工基面挖方(或爆破) 普通土的编码是多少?",
|
||||
"answer": "编码是YX2-226"
|
||||
},
|
||||
{
|
||||
"question": "汽车运输 角钢塔材 装卸的编码是多少?",
|
||||
"answer": "编码是YX1-103"
|
||||
},
|
||||
{
|
||||
"question": "接地槽挖方(或爆破)及回填 普通土的编码是多少?",
|
||||
"answer": "编码是YX2-213"
|
||||
},
|
||||
{
|
||||
"question": "水的编码是多少?",
|
||||
"answer": "编码是C21010101"
|
||||
},
|
||||
{
|
||||
"question": "直线(直线换位、直线转角)杆塔绝缘子串悬挂安装 35kV 针式单联串(悬垂串)的编码是多少?",
|
||||
"answer": "编码是YX6-21"
|
||||
},
|
||||
{
|
||||
"question": "直线(直线换位、直线转角)杆塔绝缘子串悬挂安装 35kV I型双联串(悬垂串)的编码是多少?",
|
||||
"answer": "编码是YX6-22"
|
||||
},
|
||||
{
|
||||
"question": "钻孔灌注桩基础 机械推钻成孔 砂砾石 孔深20m以内 孔径1.0m以内的编码是多少?",
|
||||
"answer": "编码是YX3-117"
|
||||
},
|
||||
{
|
||||
"question": "线路复测及分坑 直线自立塔的编码是多少?",
|
||||
"answer": "编码是YX2-6"
|
||||
},
|
||||
{
|
||||
"question": "钻孔灌注桩基础 凿桩头 桩径0.8m以上的编码是多少?",
|
||||
"answer": "编码是YX3-180"
|
||||
},
|
||||
{
|
||||
"question": "线路复测及分坑 耐张(转角)单杆的编码是多少?",
|
||||
"answer": "编码是YX2-2"
|
||||
},
|
||||
{
|
||||
"question": "中砂的编码是多少?",
|
||||
"answer": "编码是C10010101"
|
||||
},
|
||||
{
|
||||
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
|
||||
"answer": "编码是YX1-1"
|
||||
},
|
||||
{
|
||||
"question": "带电跨越电力线 被跨线电压等级 35kV的编码是多少?",
|
||||
"answer": "编码是YX5-186"
|
||||
},
|
||||
{
|
||||
"question": "人工挖土方 普土 深2m以内的编码是多少?",
|
||||
"answer": "编码是YT1-1"
|
||||
},
|
||||
{
|
||||
"question": "混凝土杆的编码是多少?",
|
||||
"answer": "编码是"
|
||||
},
|
||||
{
|
||||
"question": "接地模块安装的编码是多少?",
|
||||
"answer": "编码是YX3-213"
|
||||
},
|
||||
{
|
||||
"question": "拖拉机运输 线材 每件重400kg以内 运输的编码是多少?",
|
||||
"answer": "编码是YX1-34"
|
||||
},
|
||||
{
|
||||
"question": "拖拉机运输 其他建筑安装材料 装卸的编码是多少?",
|
||||
"answer": "编码是YX1-45"
|
||||
},
|
||||
{
|
||||
"question": "普通硅酸盐水泥的编码是多少?",
|
||||
"answer": "编码是C09010102"
|
||||
},
|
||||
{
|
||||
"question": "船舶运输 线材 每件重4000kg以内 装卸的编码是多少?",
|
||||
"answer": "编码是YX1-139"
|
||||
},
|
||||
{
|
||||
"question": "水的编码是多少?",
|
||||
"answer": "编码是C21010101"
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,202 @@
|
||||
[
|
||||
{
|
||||
"question": "架空输电线路本体工程的金额是多少?",
|
||||
"answer": "金额是55105688268.5176010132"
|
||||
},
|
||||
{
|
||||
"question": "价差预备费的金额是多少?",
|
||||
"answer": "金额是22731130869.6655998230"
|
||||
},
|
||||
{
|
||||
"question": "工程静态投资的金额是多少?",
|
||||
"answer": "金额是715035853336.3909912109"
|
||||
},
|
||||
{
|
||||
"question": "工程动态投资的金额是多少?",
|
||||
"answer": "金额是776282009093.5660400391"
|
||||
},
|
||||
{
|
||||
"question": "其中:工程建设检测费的金额是多少?",
|
||||
"answer": "金额是185575370.1463980079"
|
||||
},
|
||||
{
|
||||
"question": "工程静态投资的金额是多少?",
|
||||
"answer": "金额是715035853336.3909912109"
|
||||
},
|
||||
{
|
||||
"question": "建设期贷款利息的金额是多少?",
|
||||
"answer": "金额是38515024887.5095977783"
|
||||
},
|
||||
{
|
||||
"question": "特殊项目的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "动态费用的金额是多少?",
|
||||
"answer": "金额是61246155757.1752014160"
|
||||
},
|
||||
{
|
||||
"question": "动态费用的金额是多少?",
|
||||
"answer": "金额是61246155757.1752014160"
|
||||
},
|
||||
{
|
||||
"question": "小计的金额是多少?",
|
||||
"answer": "金额是458257942570.3129882812"
|
||||
},
|
||||
{
|
||||
"question": "其他费用的金额是多少?",
|
||||
"answer": "金额是210942912572.8689880371"
|
||||
},
|
||||
{
|
||||
"question": "基本预备费的金额是多少?",
|
||||
"answer": "金额是14020310849.7332000732"
|
||||
},
|
||||
{
|
||||
"question": "其中:水土保持监测及验收费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "其中:工程建设检测费的金额是多少?",
|
||||
"answer": "金额是185575370.1463980079"
|
||||
},
|
||||
{
|
||||
"question": "其中:特种设备安全监测费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "工程静态投资的金额是多少?",
|
||||
"answer": "金额是715035853336.3909912109"
|
||||
},
|
||||
{
|
||||
"question": "其中:水土保持监测及验收费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "架空输电线路本体工程的金额是多少?",
|
||||
"answer": "金额是55105688268.5176010132"
|
||||
},
|
||||
{
|
||||
"question": "基本预备费的金额是多少?",
|
||||
"answer": "金额是14020310849.7332000732"
|
||||
},
|
||||
{
|
||||
"question": "其中:水土保持监测及验收费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "小计的金额是多少?",
|
||||
"answer": "金额是458257942570.3129882812"
|
||||
},
|
||||
{
|
||||
"question": "编制基准期价差的金额是多少?",
|
||||
"answer": "金额是29246752707.1180000305"
|
||||
},
|
||||
{
|
||||
"question": "其中:水土保持监测及验收费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "小计的金额是多少?",
|
||||
"answer": "金额是458257942570.3129882812"
|
||||
},
|
||||
{
|
||||
"question": "其他费用的金额是多少?",
|
||||
"answer": "金额是210942912572.8689880371"
|
||||
},
|
||||
{
|
||||
"question": "特殊项目的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "编制基准期价差的金额是多少?",
|
||||
"answer": "金额是29246752707.1180000305"
|
||||
},
|
||||
{
|
||||
"question": "特殊项目的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "小计的金额是多少?",
|
||||
"answer": "金额是458257942570.3129882812"
|
||||
},
|
||||
{
|
||||
"question": "工程动态投资的金额是多少?",
|
||||
"answer": "金额是776282009093.5660400391"
|
||||
},
|
||||
{
|
||||
"question": "其中:建设场地征用及清理费的金额是多少?",
|
||||
"answer": "金额是16831284.2287110016"
|
||||
},
|
||||
{
|
||||
"question": "其中:可抵扣增值税额的金额是多少?",
|
||||
"answer": "金额是20069645492.2888984680"
|
||||
},
|
||||
{
|
||||
"question": "小计的金额是多少?",
|
||||
"answer": "金额是458257942570.3129882812"
|
||||
},
|
||||
{
|
||||
"question": "动态费用的金额是多少?",
|
||||
"answer": "金额是61246155757.1752014160"
|
||||
},
|
||||
{
|
||||
"question": "建设期贷款利息的金额是多少?",
|
||||
"answer": "金额是38515024887.5095977783"
|
||||
},
|
||||
{
|
||||
"question": "工程静态投资的金额是多少?",
|
||||
"answer": "金额是715035853336.3909912109"
|
||||
},
|
||||
{
|
||||
"question": "其中:建设场地征用及清理费的金额是多少?",
|
||||
"answer": "金额是16831284.2287110016"
|
||||
},
|
||||
{
|
||||
"question": "建设期贷款利息的金额是多少?",
|
||||
"answer": "金额是38515024887.5095977783"
|
||||
},
|
||||
{
|
||||
"question": "工程动态投资的金额是多少?",
|
||||
"answer": "金额是776282009093.5660400391"
|
||||
},
|
||||
{
|
||||
"question": "架空输电线路本体工程的金额是多少?",
|
||||
"answer": "金额是55105688268.5176010132"
|
||||
},
|
||||
{
|
||||
"question": "其中:工程建设检测费的金额是多少?",
|
||||
"answer": "金额是185575370.1463980079"
|
||||
},
|
||||
{
|
||||
"question": "其中:水土保持监测及验收费的金额是多少?",
|
||||
"answer": "金额是0E-10"
|
||||
},
|
||||
{
|
||||
"question": "工程动态投资的金额是多少?",
|
||||
"answer": "金额是776282009093.5660400391"
|
||||
},
|
||||
{
|
||||
"question": "其中:可抵扣增值税额的金额是多少?",
|
||||
"answer": "金额是20069645492.2888984680"
|
||||
},
|
||||
{
|
||||
"question": "价差预备费的金额是多少?",
|
||||
"answer": "金额是22731130869.6655998230"
|
||||
},
|
||||
{
|
||||
"question": "一般线路本体工程的金额是多少?",
|
||||
"answer": "金额是55105688268.5176010132"
|
||||
},
|
||||
{
|
||||
"question": "其中:工程建设检测费的金额是多少?",
|
||||
"answer": "金额是185575370.1463980079"
|
||||
},
|
||||
{
|
||||
"question": "基本预备费的金额是多少?",
|
||||
"answer": "金额是14020310849.7332000732"
|
||||
},
|
||||
{
|
||||
"question": "设备购置费的金额是多少?",
|
||||
"answer": "金额是2567934636.3574500084"
|
||||
}
|
||||
]
|
||||
@@ -1,20 +1,5 @@
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
from llama_index.core.evaluation import CorrectnessEvaluator
|
||||
from app.engine import get_chat_engine
|
||||
from app.engine.index import get_index
|
||||
from app.observability import init_observability
|
||||
from app.settings import init_settings
|
||||
|
||||
init_settings()
|
||||
init_observability()
|
||||
|
||||
index = get_index()
|
||||
|
||||
|
||||
import os
|
||||
import json
|
||||
from dotenv import load_dotenv
|
||||
import asyncio
|
||||
import nest_asyncio
|
||||
nest_asyncio.apply()
|
||||
@@ -70,14 +55,31 @@ DEFAULT_EVAL_TEMPLATE = ChatPromptTemplate(
|
||||
]
|
||||
)
|
||||
|
||||
from app.api.routers.models import ChatData, Message
|
||||
from llama_index.core.chat_engine.types import BaseChatEngine, NodeWithScore
|
||||
from llama_index.core.vector_stores.types import MetadataFilter, MetadataFilters
|
||||
from llama_index.core.evaluation import CorrectnessEvaluator
|
||||
from app.engine import get_chat_engine
|
||||
from app.api.routers.chat import generate_filters
|
||||
from app.engine.index import get_index
|
||||
from app.observability import init_observability
|
||||
from app.settings import init_settings
|
||||
|
||||
|
||||
load_dotenv()
|
||||
|
||||
|
||||
init_settings()
|
||||
init_observability()
|
||||
|
||||
index = get_index()
|
||||
|
||||
# 初始化聊天引擎和评估器
|
||||
chat_engine = get_chat_engine()
|
||||
corr_evaluator_qwen = CorrectnessEvaluator()
|
||||
|
||||
# 加载本地问题回答文件
|
||||
script_dir = os.path.dirname(os.path.abspath(__file__))
|
||||
file_path = os.path.join(script_dir, 'questions_and_answers.json')
|
||||
file_path = 'D:/LLM_model/text2sql/zjdataai-app-test/backend/unit_test/test.json'
|
||||
output_file_path = file_path.replace('.json', '_test.json')
|
||||
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
@@ -86,13 +88,8 @@ with open(file_path, 'r', encoding='utf-8') as f:
|
||||
# 异步函数用于评估查询
|
||||
async def evaluate_query(question, answer, index, output_file):
|
||||
response = await chat_engine.astream_chat(question)
|
||||
|
||||
# 检查sources是否为空
|
||||
if response.sources:
|
||||
content_str = str(response.sources[0])
|
||||
else:
|
||||
content_str = "<无回答>"
|
||||
|
||||
content_str = str(response.sources[0])
|
||||
|
||||
result = corr_evaluator_qwen.evaluate(
|
||||
query=question,
|
||||
response=content_str,
|
||||
@@ -104,13 +101,13 @@ async def evaluate_query(question, answer, index, output_file):
|
||||
"问题": question,
|
||||
"答案": answer,
|
||||
"回答": result.response,
|
||||
"得分(1~5)": result.score,
|
||||
"得分(0~5)": result.score,
|
||||
"评价": result.feedback
|
||||
}
|
||||
|
||||
with open(output_file, 'a', encoding='utf-8') as f:
|
||||
f.write(json.dumps(result_dict, ensure_ascii=False, indent=4))
|
||||
f.write(',\n')
|
||||
f.write(',')
|
||||
|
||||
# 主异步函数
|
||||
async def main():
|
||||
|
||||
@@ -1,55 +0,0 @@
|
||||
Attribute_Prompt = (
|
||||
"你是一个电力造价工程相关的项目经理,现在给你一些上下文信息,"
|
||||
"你需要根据现有的上下文信息,来生成{num_questions_per_chunk}个电力造价工程相关的问题和对应的回答,"
|
||||
"现在需要你针对数据中属性一列进行提问和回答。"
|
||||
"问题和回答的示例应该是这种类型的,示例:'工程总投资(万元),工程总投资(万元)是77469835.590045万元','尖峰及施工基面土石方量,尖峰及施工基面土石方量是8377.6','截止阀的编码,截止阀的编码是F01010203',"
|
||||
"你生成的回答必须严格按照示例中的格式('问题, 回答'),不允许有丝毫的变动。问题和回答应该在一个单引号内。"
|
||||
"这种类似的问题和答案,生成的问题和答案必须一一对应,要符合文件里的内容,不要生成一些无关的问题,不要生成一些重复的问题,"
|
||||
"不要生成一些过于简单的问题,不要生成一些过于复杂的问题。"
|
||||
)
|
||||
|
||||
|
||||
Amount_Prompt = (
|
||||
"你是一个电力造价工程相关的项目经理,现在给你一些上下文信息,"
|
||||
"你需要根据现有的上下文信息,来生成{num_questions_per_chunk}个电力造价工程相关的问题和对应的回答,"
|
||||
"现在需要你针对上下文信息中的金额或者合价进行提问和回答。"
|
||||
"问题和回答的示例应该是这种类型的,示例:'项目建设技术服务费的金额,项目建设技术服务费的金额是16855957065.4302','项目后评价费的费率,项目后评价费的费率是0.5','架空输电线路本体工程的金额,架空输电线路本体工程的金额是55105688268.5176','工程静态投资的金额,工程静态投资的金额是715035853336.391'"
|
||||
"你生成的回答必须严格按照示例中的格式('问题, 回答'),不允许有丝毫的变动。问题和回答应该在一个单引号内。"
|
||||
"这种类似的问题和答案,生成的问题和答案必须一一对应,要符合文件里的内容,不要生成一些无关的问题,不要生成一些重复的问题,"
|
||||
"不要生成一些过于简单的问题,不要生成一些过于复杂的问题。"
|
||||
)
|
||||
|
||||
|
||||
|
||||
Units_Prompt = (
|
||||
"你是一个电力造价工程相关的项目经理,现在给你一些上下文信息,"
|
||||
"你需要根据现有的上下文信息,来生成{num_questions_per_chunk}个电力造价工程相关的问题和对应的回答,"
|
||||
"现在需要你针对上下文信息来进行单位转化问题提问和回答。"
|
||||
"问题和回答的示例应该是这种类型的,示例:'工程总投资(万元)结果用元表示,工程总投资(万元)是774698355900.45元','本体工程(元)结果用万元表示,本体工程(元)是5490494.261046万元'"
|
||||
"你生成的回答必须严格按照示例中的格式('问题, 回答'),不允许有丝毫的变动。问题和回答应该在一个单引号内。"
|
||||
"这种类似的问题和答案,生成的问题和答案必须一一对应,要符合文件里的内容,不要生成一些无关的问题,不要生成一些重复的问题,"
|
||||
"不要生成一些过于简单的问题,不要生成一些过于复杂的问题。"
|
||||
)
|
||||
|
||||
Name_Prompt = (
|
||||
"你是一个电力造价工程相关的项目经理,现在给你一些上下文信息,"
|
||||
"你需要根据现有的上下文信息,来生成{num_questions_per_chunk}个电力造价工程相关的问题和对应的回答,"
|
||||
"现在需要你针对上下文信息中的重名问题进行提问和回答。"
|
||||
"问题和回答的示例应该是这种类型的,示例:'专业类型为线路的杆塔工程项目划分的合价,专业类型为线路的杆塔工程项目划分的合价是220969744.905856','专业类型为线路清理的杆塔工程项目划分的合价,电缆工程的合价是0'"
|
||||
"你生成的回答必须严格按照示例中的格式('问题, 回答'),不允许有丝毫的变动。问题和回答应该在一个单引号内。"
|
||||
"这种类似的问题和答案,生成的问题和答案必须一一对应,要符合文件里的内容,不要生成一些无关的问题,不要生成一些重复的问题,"
|
||||
"不要生成一些过于简单的问题,不要生成一些过于复杂的问题。"
|
||||
)
|
||||
|
||||
|
||||
All_Amount_Prompt = (
|
||||
"你是一个电力造价工程相关的项目经理,现在给你一些上下文信息,"
|
||||
"你需要根据现有的上下文信息,来生成{num_questions_per_chunk}个电力造价工程相关的问题和对应的回答,"
|
||||
"现在需要你针对上下文信息中的总体金额进行提问和回答。"
|
||||
"问题和回答的示例应该是这种类型的,示例:'架空输电线路本体工程的总体金额,架空输电线路本体工程的总体金额是7.706703','工程静态投资的总体金额,工程静态投资的总体金额是100'"
|
||||
"你生成的回答必须严格按照示例中的格式('问题, 回答'),不允许有丝毫的变动。问题和回答应该在一个单引号内。"
|
||||
"这种类似的问题和答案,生成的问题和答案必须一一对应,要符合文件里的内容,不要生成一些无关的问题,不要生成一些重复的问题,"
|
||||
"不要生成一些过于简单的问题,不要生成一些过于复杂的问题。"
|
||||
)
|
||||
|
||||
|
||||
@@ -1,144 +0,0 @@
|
||||
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
import json
|
||||
import sys
|
||||
|
||||
|
||||
from app.observability import init_observability
|
||||
from app.settings import init_settings
|
||||
|
||||
import nest_asyncio
|
||||
nest_asyncio.apply()
|
||||
|
||||
from llama_index.core.node_parser import SentenceSplitter
|
||||
from llama_index.core import SimpleDirectoryReader
|
||||
from llama_index.core.evaluation import DatasetGenerator
|
||||
|
||||
import prompts
|
||||
|
||||
init_settings()
|
||||
init_observability()
|
||||
|
||||
# 读取所有文档(即所有表格)
|
||||
documents = SimpleDirectoryReader("D:/LLM_model/text2sql/zjdataai-app-test/backend/data-test").load_data()
|
||||
|
||||
# 定义表格名称和索引的对应关系
|
||||
table_names = {
|
||||
"工程信息表": 0,
|
||||
"其他费用表": 1,
|
||||
"取费表": 2,
|
||||
"项目划分表": 3,
|
||||
"项目划分_费用预览表": 4,
|
||||
"总算表": 5,
|
||||
"工程量表": 6
|
||||
}
|
||||
|
||||
# 定义中文提示词和Python代码中提示词名称的映射
|
||||
prompt_mapping = {
|
||||
"普通属性": "Attribute_Prompt",
|
||||
"金额查询": "Amount_Prompt",
|
||||
"单位换算": "Units_Prompt",
|
||||
"重名项目划分": "Name_Prompt",
|
||||
"总体金额查询": "All_Amount_Prompt"
|
||||
}
|
||||
|
||||
# 定义表格与其对应的查询类别
|
||||
table_prompt_mapping = {
|
||||
"工程信息表": ["普通属性", "单位换算"],
|
||||
"其他费用表": ["金额查询", "单位换算"],
|
||||
"取费表": ["金额查询"],
|
||||
"总算表": ["金额查询", "总体金额查询"],
|
||||
"工程量表": ["普通属性", "重名项目划分"]
|
||||
}
|
||||
|
||||
# 根据表格名称选择特定的表格
|
||||
def select_document(documents, table_name):
|
||||
if table_name not in table_names:
|
||||
raise ValueError(f"未找到名为 '{table_name}' 的表格")
|
||||
index = table_names[table_name]
|
||||
return [documents[index]] # 返回一个包含所选表格的列表
|
||||
|
||||
# 选择提示词
|
||||
def select_prompt(prompt_category):
|
||||
prompt_name = prompt_mapping.get(prompt_category)
|
||||
if not prompt_name:
|
||||
raise ValueError(f"未找到名为 '{prompt_category}' 的提示词")
|
||||
try:
|
||||
return getattr(prompts, prompt_name)
|
||||
except AttributeError:
|
||||
raise ValueError(f"未找到提示词 '{prompt_name}' 对应的函数")
|
||||
|
||||
# 生成问题和答案
|
||||
def generate_questions_from_document(document, quest_prompt, num_questions):
|
||||
question_generator = DatasetGenerator.from_documents(
|
||||
documents=document,
|
||||
question_gen_query=quest_prompt,
|
||||
num_questions_per_chunk=num_questions
|
||||
)
|
||||
|
||||
eval_questions = question_generator.generate_questions_from_nodes(num_questions)
|
||||
print(eval_questions)
|
||||
|
||||
qa_pairs = []
|
||||
for qa in eval_questions:
|
||||
if ',' in qa:
|
||||
question, answer = qa.split(",", 1)
|
||||
qa_pairs.append({
|
||||
"question": question.strip(),
|
||||
"answer": answer.strip()
|
||||
})
|
||||
else:
|
||||
print(f"无法处理的问题和答案: {qa}")
|
||||
|
||||
return qa_pairs
|
||||
|
||||
# 主函数,控制生成多个表格的问题和使用多个提示词,并将结果合并到一个文件中
|
||||
def main(documents, table_names_input, prompt_categories_input, num_questions_per_prompt):
|
||||
if table_names_input == "all":
|
||||
selected_tables = list(table_prompt_mapping.keys())
|
||||
else:
|
||||
selected_tables = table_names_input.strip('[]').split(',')
|
||||
|
||||
all_results = {}
|
||||
|
||||
for table_name in selected_tables:
|
||||
table_name = table_name.strip() # 去掉前后空格
|
||||
document = select_document(documents, table_name)
|
||||
|
||||
if prompt_categories_input == "all":
|
||||
selected_prompts = table_prompt_mapping[table_name]
|
||||
else:
|
||||
selected_prompts = prompt_categories_input.strip('[]').split(',')
|
||||
selected_prompts = [p.strip() for p in selected_prompts] # 去掉前后空格
|
||||
|
||||
for prompt_category in selected_prompts:
|
||||
if prompt_category not in table_prompt_mapping[table_name]:
|
||||
print(f"跳过表格 '{table_name}' 的提示词 '{prompt_category}',因为该表中不包含该类别的信息")
|
||||
continue
|
||||
|
||||
quest_prompt = select_prompt(prompt_category).format(num_questions_per_chunk=num_questions_per_prompt)
|
||||
qa_pairs = generate_questions_from_document(document, quest_prompt, num_questions_per_prompt)
|
||||
|
||||
label = f"test:{table_name}_{prompt_category}"
|
||||
all_results[label] = qa_pairs
|
||||
|
||||
# 自动生成输出文件名
|
||||
output_file = "combined_test.json"
|
||||
|
||||
with open(output_file, "w", encoding="utf-8") as f:
|
||||
json.dump(all_results, f, ensure_ascii=False, indent=4)
|
||||
|
||||
print(f"All questions and answers have been saved to '{output_file}'")
|
||||
|
||||
# 获取命令行参数
|
||||
if __name__ == "__main__":
|
||||
if len(sys.argv) != 4:
|
||||
print("Usage: python script.py <table_names_input> <prompt_categories_input> <num_questions_per_prompt>")
|
||||
else:
|
||||
table_names_input = sys.argv[1]
|
||||
prompt_categories_input = sys.argv[2]
|
||||
num_questions_per_prompt = int(sys.argv[3])
|
||||
|
||||
main(documents, table_names_input, prompt_categories_input, num_questions_per_prompt)
|
||||
@@ -0,0 +1,10 @@
|
||||
[
|
||||
{
|
||||
"question": "人工费的费率是多少?",
|
||||
"answer": "费率是100.0000000000"
|
||||
},
|
||||
{
|
||||
"question": "临时设施费的费率是多少?",
|
||||
"answer": "费率是6.3500000000"
|
||||
}
|
||||
]
|
||||
@@ -0,0 +1,18 @@
|
||||
{
|
||||
"编号": 1,
|
||||
"问题": "人工费的费率是多少?",
|
||||
"答案": "费率是100.0000000000",
|
||||
"实际回答": "人工费的费率是100.0。",
|
||||
"得分": 4.5,
|
||||
"评价": "生成的答案与参考答案一致,只是表达方式略有不同,但完全正确且相关。"
|
||||
}
|
||||
********************
|
||||
{
|
||||
"编号": 2,
|
||||
"问题": "临时设施费的费率是多少?",
|
||||
"答案": "费率是6.3500000000",
|
||||
"实际回答": "在新的上下文中,临时设施费的计算应当基于取费定额人工费和取费定额机械费,但当前费率被设置为0.0,这意味着需要重新确认或调整费率。在没有具体费率的情况下,不能直接给出确定的费率值。如果需要计算临时设施费,应根据实际的取费定额人工费和取费定额机械费进行计算。参考之前的6.35%的费率可能是一个起点,但请注意,这需要根据项目的具体情况和最新的费用标准进行调整。",
|
||||
"得分": 1.0,
|
||||
"评价": "生成的答案与用户查询不相关,用户询问的是临时设施费的费率,而生成的答案提供的是一个关于如何计算临时设施费的解释,且提到了一个不相关的0.0费率,这与用户的问题不符。同时,即使提到了6.35%的费率,也没有明确指出这就是用户想要的答案,反而强调了需要根据项目具体情况调整,这增加了用户的困惑。"
|
||||
}
|
||||
********************
|
||||
@@ -0,0 +1,43 @@
|
||||
from typing import Dict, List
|
||||
|
||||
class ClsRegister:
|
||||
clsLst:Dict[str,Dict[str,str]] = {}
|
||||
|
||||
@classmethod
|
||||
def add(cls,catalog,name,obj) -> None:
|
||||
if catalog in cls.clsLst:
|
||||
registry = cls.clsLst[catalog]
|
||||
registry[name] = obj
|
||||
else:
|
||||
registry:Dict[str,str] = {}
|
||||
registry[name] = obj
|
||||
cls.clsLst[catalog] = registry
|
||||
|
||||
@classmethod
|
||||
def get(cls,catalog,name,fuzzy:bool=False) -> None:
|
||||
if catalog in cls.clsLst:
|
||||
registry = cls.clsLst[catalog]
|
||||
for key,value in registry.items():
|
||||
if fuzzy:
|
||||
if key in name:
|
||||
return value
|
||||
else:
|
||||
if key == name:
|
||||
return value
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
def getClsList(cls,catalog) -> None:
|
||||
res_Lst = []
|
||||
if catalog in cls.clsLst:
|
||||
registry = cls.clsLst[catalog]
|
||||
for key,value in registry.items():
|
||||
res_Lst.append(value)
|
||||
return res_Lst
|
||||
|
||||
|
||||
def register(catalog,name):
|
||||
def decorator(className):
|
||||
ClsRegister.add(catalog,name,className)
|
||||
return className
|
||||
return decorator
|
||||
Reference in New Issue
Block a user