4 Commits

Author SHA1 Message Date
ly 72ddf46fc7 Merge pull request '增加新的前端子模块' (#4) from dev into main
Reviewed-on: #4
2024-08-29 10:51:50 +08:00
ly f57c0c84ef Merge pull request 'dev' (#3) from dev into main
Reviewed-on: #3
2024-08-29 10:13:10 +08:00
ly 9ee24627c2 Merge pull request 'dev' (#2) from dev into main
Reviewed-on: #2
2024-08-23 09:37:06 +08:00
ly 88761a5d10 Merge pull request 'dev' (#1) from dev into main
Reviewed-on: #1
2024-08-22 09:41:13 +08:00
63 changed files with 3008 additions and 358368 deletions
+23 -54
View File
@@ -4,59 +4,34 @@ SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zj
#SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
SQLITE_DATABASE_URL=sqlite:///./source.db
# The number of similar embeddings to return when retrieving documents.
TOP_K=10
#--------------------------
# 是否启用混合检索
HYBRID_ENABLED = true
# 混合检索阈值
HYBRID_ALPHA = 0.6
# 是否启用检索重排功能
RERANK_ENABLED=true
#---------- rerank- Xinference ----------------
#RERANK_PROVIDER=xinference
#RERANK_MODEL=bge-reranker-v2-m3
#RERANK_BASE_URL=http://10.1.16.39:9995
#RERANK_TOP_N=5
#RERANK_THRESHOLD=0.3
#---------- rerank- ollama ----------------
RERANK_PROVIDER=ollama
RERANK_MODEL= /models/bge-reranker-base
RERANK_TOP_N=5
RERANK_THRESHOLD=0.3
#---------- model - Xinference ----------------
#MODEL_PROVIDER=xinference
#OPENAI_API_KEY=xinference
#BASE_URL=http://172.20.0.145:9995
#MODEL=Qwen2-72B-Instruct-GPTQ-Int8
## Temperature for sampling from the model.
#LLM_TEMPERATURE=0.1
#---------- model - dashscope ----------------
DASHSCOPE_API_KEY=sk-02c8540e86d84b7ca0e6f4f51bac6e60
# The provider for the AI models to use.
MODEL_PROVIDER=dashscope
DASHSCOPE_API_KEY=sk-221d2d202e104618a56002ce2e7dc0d0
MODEL=qwen2-math-72b-instruct
#---------- embedding - Xinference ----------------
#EMBEDDING_PROVIDER=xinference
#EMBEDDING_MODEL=bge-m3
#EMBEDDING_BASE_URL=http://10.1.16.39:9995
#EMBEDDING_DIM=1024
---------- embedding - dashscope ----------------
EMBEDDING_PROVIDER=dashscope
EMBEDDING_MODEL=text-embedding-v1
# The name of LLM model to use.
MODEL=qwen-max
# 是否启用检索重排功能
ENABLE_RERANK=true
# Name of the embedding model to use.
EMBEDDING_MODEL=text-embedding-v2
# Dimension of the embedding model to use.
EMBEDDING_DIM=1024
# The questions to help users get started (multi-line).
CONVERSATION_STARTERS=本工程指什么?\n总算表有哪些费用?\n项目划分哪些内容构成?\n其他费用表有哪些内容?
# The OpenAI API key to use.
# OPENAI_API_KEY=
# Temperature for sampling from the model.
# LLM_TEMPERATURE=
# Maximum number of tokens to generate.
# LLM_MAX_TOKENS=
# The number of similar embeddings to return when retrieving documents.
TOP_K=5
# The time in milliseconds to wait for the stream to return a response.
STREAM_TIMEOUT=60000
@@ -78,6 +53,7 @@ VECTOR_STORE_PATH=./storage_vector
BM_RETRIEVER_PATH =./storage_bm
PHOENIX_API_KEY=123456
PHOENIX_URL=http://localhost:6006/v1/traces
PHOENIX_PROJECT_NAME=ly_zjapp
@@ -104,10 +80,3 @@ SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weath
- You can install any pip package (if it exists) by running a cell with pip install.
"
PRJTOJSON_URL = 'http://10.1.6.60:8092'
PROJECT_TITLE = "您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!"
CHAT_UPLOAD_FILECACHE = "./output/uploaded"
JIEBA_DATA=./nltk_data
NLTK_DATA=./nltk_data
+13 -23
View File
@@ -1,8 +1,3 @@
JIEBA_DATA=./nltk_data
NLTK_DATA=./nltk_data
SQLITE_DATABASE_URL=sqlite:///./source.db
DATA_SOURCE_CACHE=./restapi
# The Llama Cloud API key.
# LLAMA_CLOUD_API_KEY=
SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
@@ -19,28 +14,27 @@ HYBRID_ALPHA = 0.6
#--------------------------
# 是否启用检索重排功能
RERANK_ENABLED=true
#---------- rerank- Xinference ----------------
RERANK_PROVIDER=xinference
# Rerank model
RERANK_MODEL=bge-reranker-v2-m3
RERANK_BASE_URL=http://10.1.16.39:9995
RERANK_TOP_N=5
RERANK_THRESHOLD=0.3
#---------- model - Xinference ----------------
MODEL_PROVIDER=xinference # The provider for the AI models to use.
OPENAI_API_KEY=xinference # The OpenAI API key to use.
#---------- Xinference ----------------
# The provider for the AI models to use.
MODEL_PROVIDER=xinference
# The OpenAI API key to use.
OPENAI_API_KEY=xinference
BASE_URL=http://10.1.0.142:9995
MODEL=Qwen2-72B-Instruct-GPTQ-Int8
LLM_TEMPERATURE=0.1 # Temperature for sampling from the model.
#LLM_MAX_TOKENS= # Maximum number of tokens to generate.
#---------- embedding - Xinference ----------------
EMBEDDING_PROVIDER=xinference
# Temperature for sampling from the model.
LLM_TEMPERATURE=0.1
# Maximum number of tokens to generate.
#LLM_MAX_TOKENS=
# Name of the embedding model to use.
EMBEDDING_MODEL=bge-m3
EMBEDDING_BASE_URL=http://10.1.16.39:9995
EMBEDDING_DIM=1024 # Dimension of the embedding model to use.
# Dimension of the embedding model to use.
EMBEDDING_DIM=1024
##---------- OpenAI ----------------
## The provider for the AI models to use.
@@ -117,7 +111,3 @@ SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weath
- You can install any pip package (if it exists) by running a cell with pip install.
"
PRJTOJSON_URL = 'http://10.1.6.60:8092'
PROJECT_TITLE = "您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!"
CHAT_UPLOAD_FILECACHE = "./output/uploaded"
+244 -373
View File
@@ -1,4 +1,3 @@
import asyncio
import json
import logging
@@ -6,224 +5,97 @@ import time
from typing import Dict, List, Any, Optional, AsyncGenerator
from aiostream import stream
from fastapi import APIRouter, Request,HTTPException
from fastapi import APIRouter, Request
from fastapi.responses import StreamingResponse
from llama_index.core import BaseCallbackHandler
from llama_index.core.base.llms.types import ChatMessage
from llama_index.core.callbacks import CBEventType
from llama_index.core.chat_engine.types import StreamingAgentChatResponse
from llama_index.core.tools import ToolOutput
from llama_index.core.schema import NodeWithScore
from pydantic import BaseModel
from app.api.routers.request.base import userMng, conversations,message,ProjectInfo,feedback
from app.api.routers.request.baseConfig import *
from app.api.routers.request.base import userMng, conversations,message,parameter
from app.api.routers.request.models import ChatRequestData,ChatFileUploadRequest
from app.engine import get_chat_engine
import uuid
from app.api.routers.services.fileServices import PrjFileLoadService,ChatFileService
from app.api.routers.services.suggestion import NextQuestionSuggestion
import time
from llama_index.core.settings import Settings
logger = logging.getLogger("uvicorn")
api_router = r = APIRouter()
v1_router = v = APIRouter()
gEvent_handler = None
CH_Event_map={
'CHUNKING':'文本切片',
'NODE_PARSING':'节点解析',
'EMBEDDING':'生成向量',
'LLM':'知识问答',
'QUERY':'查询',
'RETRIEVE':'检索',
'SYNTHESIZE':'答案合成',
'TREE':'总结',
'SUB_QUESTION':'问题分解',
'TEMPLATING':'生成提示词模板',
'FUNCTION_CALL':'函数调用',
'RERANKING':'节点重排',
'EXCEPTION':'执行异常',
'AGENT_STEP':'单步执行'
}
class ChatCallbackEvent(BaseModel):
event_type: ChatEventType
event_type: CBEventType
payload: Optional[Dict[str, Any]] = None
event_id: str = ""
def get_common_param(self)-> dict:
def get_retrieval_message(self) -> dict | None:
if self.payload:
nodes = self.payload.get("nodes")
if nodes:
msg = f"根据查询检索到 {len(nodes)} 源文件"
else:
msg = f"查询检索中: '{self.payload.get('query_str')}'"
return {
'event': self.event_type.value,
'conversation_id':self.payload.get("conversation_id"),
'message_id': self.payload.get("message_id"),
'created_at': int(time.time()),
'task_id': self.payload.get("task_id")
"type": "events",
"data": {"title": msg},
}
else:
return None
def get_WorkflowStart_param(self) -> dict:
params = self.get_common_param()
params.update({
'workflow_run_id':self.payload.get('workflow_run_id'),
'data':{
"id": self.payload.get('workflow_run_id'),
"workflow_id": self.payload.get('workflow_id'),
"sequence_number": 1709,
"inputs": {
"sys.query": f"开始查询 {self.payload.get('query')}",
"sys.files": [],
"sys.conversation_id": self.payload.get('conversation_id'),
"sys.user_id": self.payload.get('use_id')
def get_tool_message(self) -> dict | None:
func_call_args = self.payload.get("function_call")
if func_call_args is not None and "tool" in self.payload:
tool = self.payload.get("tool")
return {
"type": "events",
"data": {
"title": f"调用工具 {tool.name} ,参数: {func_call_args}",
},
"created_at": int(time.time())
}
})
return params
def get_WorkflowFinished_param(self) -> dict:
params = self.get_common_param()
params.update({
'workflow_run_id':self.payload.get('workflow_run_id'),
'data':{
"id": self.payload.get('workflow_run_id'),
"workflow_id": self.payload.get('workflow_id'),
"sequence_number": 1709,
"status": "succeeded",
"outputs": {
"answer": self.payload.get('response')
},
"error": '',
"elapsed_time": 36.03764106379822,
"total_tokens": 11707,
"total_steps": 10,
"created_by": {
"id": str(uuid.uuid4()),
"user": self.payload.get('use_id')
},
"created_at": int(time.time()),
"finished_at": int(time.time()),
"files": []
}
})
return params
def get_NodeStart_param(self) -> dict:
params = self.get_common_param()
params.update({
'workflow_run_id':self.payload.get('workflow_run_id'),
'data':{
"id": self.payload.get('nodeid'),
"node_id": self.payload.get('nodeid'),
"node_type": "http-request",
"title": CH_Event_map[self.payload.get('title')],
"index": self.payload.get('index'),
"predecessor_node_id": self.payload.get('predecessor_node_id'),
"inputs": '',
"created_at": 1724398751,
"extras": {}
}
})
return params
def get_NodeFinished_param(self) -> dict:
params = self.get_common_param()
params.update({
'workflow_run_id':self.payload.get('workflow_run_id'),
'data':{
"id": self.payload.get('nodeid'),
"node_id": self.payload.get('nodeid'),
"node_type": "http-request",
"title": CH_Event_map[self.payload.get('title')],
"index": self.payload.get('index'),
"predecessor_node_id": self.payload.get('predecessor_node_id'),
"inputs": '',
"process_data": '',
"outputs": '',
"status": "succeeded",
"error": '',
"elapsed_time": 0.10402441816404462,
"execution_metadata": '',
"created_at": 1724398751,
"finished_at": 1724398751,
"files": []
}
})
return params
def get_Message_param(self) -> dict:
params = self.get_common_param()
params.update({
'id':self.payload.get('message_id'),
'answer':self.payload.get('answer')
})
return params
def get_MessageEnd_param(self) -> dict:
params = self.get_common_param()
nodeInfos = []
source_nodes = self.payload.get('source_node')
if source_nodes is not None:
for i in range(len(source_nodes)):
source_node:NodeWithScore = source_nodes[i]
metadata:dict = source_node.node.metadata
nodeInfo = {
"position": i,
"dataset_id": metadata.get("pipeline_id"),
"dataset_name": metadata.get("file_name"),
"document_id": source_node.node_id,
"document_name": metadata.get("file_name"),
"data_source_type": "upload_file",
"segment_id": source_node.node_id,
"retriever_from": "workflow",
"score": source_node.score,
"hit_count": 1,
"word_count": 632,
"segment_position": i,
"index_node_hash": "",
"content": source_node.text
}
nodeInfos.append(nodeInfo)
params.update({
'id':self.payload.get('message_id'),
'metadata':{
"retriever_resources":nodeInfos,
"usage":{
"prompt_tokens": 4972,
"prompt_unit_price": "0.0",
"prompt_price_unit": "0.0",
"prompt_price": "0.0",
"completion_tokens": 332,
"completion_unit_price": "0.0",
"completion_price_unit": "0.0",
"completion_price": "0.0",
"total_tokens": 5304,
"total_price": "0.0",
"currency": "USD",
"latency": 4.897703120019287
}
}
})
return params
def to_response(self)-> dict|None:
def _is_output_serializable(self, output: Any) -> bool:
try:
match self.event_type.value:
case "workflow_started":
return self.get_WorkflowStart_param()
case "workflow_finished":
return self.get_WorkflowFinished_param()
case "node_started":
return self.get_NodeStart_param()
case 'node_finished':
return self.get_NodeFinished_param()
case 'message':
return self.get_Message_param()
case 'message_end':
return self.get_MessageEnd_param()
json.dumps(output)
return True
except TypeError:
return False
def get_agent_tool_response(self) -> dict | None:
response = self.payload.get("response")
if response is not None:
sources = response.sources
for source in sources:
# Return the tool response here to include the toolCall information
if isinstance(source, ToolOutput):
if self._is_output_serializable(source.raw_output):
output = source.raw_output
else:
output = source.content
return {
"type": "tools",
"data": {
"toolOutput": {
"output": output,
"isError": source.is_error,
},
"toolCall": {
"id": None, # There is no tool id in the ToolOutput
"name": source.tool_name,
"input": source.raw_input,
},
},
}
def to_response(self):
try:
match self.event_type:
case "retrieve":
return self.get_retrieval_message()
case "function_call":
return self.get_tool_message()
case "agent_step":
return self.get_agent_tool_response()
case _:
return None
except Exception as e:
@@ -234,7 +106,9 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
_aqueue: asyncio.Queue
is_done: bool = False
def __init__(self):
def __init__(
self,
):
"""Initialize the base callback handler."""
ignored_events = [
# CBEventType.CHUNKING,
@@ -245,19 +119,6 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
]
super().__init__(ignored_events, ignored_events)
self._aqueue = asyncio.Queue()
self._response: StreamingAgentChatResponse = None
self._ids:Dict[str,Any] = {}
self._chatData:ChatRequestData = None
self._nodeStack:List[str] = []
self._firstEventID:str = None
def setInitParams(self,ids:dict,data:ChatRequestData):
self._ids = ids
self._chatData = data
self._firstEventID = None
def setResponse(self,response: StreamingAgentChatResponse):
self._response = response
def on_event_start(
self,
@@ -266,26 +127,11 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
event_id: str = "",
**kwargs: Any,
) -> str:
if self._firstEventID is None:
self._firstEventID = event_id
self.start()
logger.info("event_start:{} type:{} payload:{}\n".format(event_id, event_type, payload))
self._nodeStack.append(event_id)
nindex = len(self._nodeStack) - 1
args:Dict[str,Any] = self._ids
args.update(
{
'nodeid':event_id,
'title':event_type.name,
'index':nindex + 1,
'predecessor_node_id': self._nodeStack[nindex - 1] if nindex > 0 else ''
}
)
nd_event = ChatCallbackEvent(event_type = ChatEventType.NODE_START,payload = args)
if nd_event.to_response() is not None:
self._aqueue.put_nowait(nd_event)
event = ChatCallbackEvent(event_id=event_id, event_type=event_type, payload=payload)
if event.to_response() is not None:
self._aqueue.put_nowait(event)
def on_event_end(
self,
@@ -295,27 +141,9 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
**kwargs: Any,
) -> None:
logger.info("event_end:{} type:{} payload:{}\n".format(event_id, event_type, payload))
#self.response = payload.get("response","")
args:Dict[str,Any] = self._ids
nodeID = self._nodeStack[-1]
if nodeID == event_id:
nindex = len(self._nodeStack) - 1
args.update(
{
'nodeid':event_id,
'title':event_type.name,
'index':nindex + 1,
'predecessor_node_id':self._nodeStack[nindex - 1] if nindex > 0 else ''
}
)
nd_event = ChatCallbackEvent(event_type = ChatEventType.NODE_FINISHED,payload = args)
if nd_event.to_response() is not None:
self._aqueue.put_nowait(nd_event)
self._nodeStack.pop()
if self._firstEventID is not None and self._firstEventID == event_id:
self.finished()
event = ChatCallbackEvent(event_id=event_id, event_type=event_type, payload=payload)
if event.to_response() is not None:
self._aqueue.put_nowait(event)
def start_trace(self, trace_id: Optional[str] = None) -> None:
"""No-op."""
@@ -336,51 +164,6 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
except asyncio.TimeoutError:
pass
def makeWorkflow_startEvent(self)->ChatCallbackEvent:
args:Dict[str,Any] = self._ids
args.update(
{
'use_id': self._chatData.user,
'query': self._chatData.query,
'conversation_id': self._chatData.conversation_id
}
)
return ChatCallbackEvent(event_type = ChatEventType.WORKFLOW_START,payload = args)
def makeWorkflow_finishedEvent(self)->ChatCallbackEvent:
args:Dict[str,Any] = self._ids
args.update(
{
'response': '',
'conversation_id': self._chatData.conversation_id
}
)
return ChatCallbackEvent(event_type = ChatEventType.WORKFLOW_FINISHED,payload = args)
def makeMessage_EndEvent(self)->ChatCallbackEvent:
args:Dict[str,Any] = self._ids
if self._response is not None:
args.update({
'source_node': self._response.source_nodes
})
msgEnt_event = ChatCallbackEvent(event_type = ChatEventType.MESSAGE_END,payload = args)
return msgEnt_event
def start(self):
#添加工作流开始事件
wf_event = self.makeWorkflow_startEvent()
if wf_event.to_response() is not None:
self._aqueue.put_nowait(wf_event)
def finished(self):
wf_event = self.makeWorkflow_finishedEvent()
if wf_event.to_response() is not None:
self._aqueue.put_nowait(wf_event)
msgEnt_event = self.makeMessage_EndEvent()
if msgEnt_event.to_response() is not None:
self._aqueue.put_nowait(msgEnt_event)
class IDManager:
def createID(self):
return {
@@ -390,26 +173,95 @@ class IDManager:
"workflow_id": str(uuid.uuid4())
}
class DifyChatResponseEvent(BaseModel):
event: str
conversation_id: str
message_id: str
created_at: int = int(time.time())
task_id: str
class Workflow_started_DifyChatResponseEvent(DifyChatResponseEvent):
workflow_run_id:str
data:Dict[str,Any]
def __init__(self,**args):
args['data'] = {
"id": args['workflow_run_id'],
"workflow_id": args['workflow_id'],
"sequence_number": 1709,
"inputs": {
"sys.query": args['query'],
"sys.files": [],
"sys.conversation_id": args['conversation_id'],
"sys.user_id": args['use_id']
},
"created_at": int(time.time())
}
args['event'] = 'workflow_started'
super().__init__(**args)
class Workflow_finished_DifyChatResponseEvent(DifyChatResponseEvent):
workflow_run_id:str
data:Dict[str,Any]
def __init__(self,**args):
args['event'] = 'workflow_finished'
args['data'] = {
"id": args['workflow_run_id'],
"workflow_id": args['workflow_id'],
"sequence_number": 1709,
"status": "succeeded",
"outputs": {
"answer": args['response']
},
"error": '',
"elapsed_time": 36.03764106379822,
"total_tokens": 11707,
"total_steps": 10,
"created_by": {
"id": str(uuid.uuid4()),
"user": args['use_id']
},
"created_at": int(time.time()),
"finished_at": int(time.time()),
"files": []
}
super().__init__(**args)
class Message_DifyChatResponseEvent(DifyChatResponseEvent):
id:str
answer:str
def __init__(self,**args):
args['id'] = args['message_id']
args['event'] = 'message'
super().__init__(**args)
class MessageEnd_DifyChatResponseEvent(DifyChatResponseEvent):
id:str
metadata:Dict[str,Any] = {}
def __init__(self,**args):
args['id'] = args['message_id']
args['event'] = 'message_end'
super().__init__(**args)
class ChatStreamResponse(StreamingResponse):
TEXT_PREFIX = "data: "
DATA_PREFIX = "data: "
ids:Dict[str,Any] = {}
data:ChatRequestData = None
@classmethod
def convert_Message(cls, token: str):
params = cls.ids
params.update({
'answer':token,
'conversation_id':cls.data.conversation_id
})
event = ChatCallbackEvent(event_type = ChatEventType.MESSAGE,payload = params)
data_str = json.dumps(event.to_response())
def convert_text(cls, token: str):
# Escape newlines and double quotes to avoid breaking the stream
#token = json.dumps(token)
#return f"data: {{"event": "message", "conversation_id": "80d85523-de92-4b9d-aca0-c48a5eacb068", "message_id": "16a06b1b-a89b-49c0-bc15-123bd999f6d6", "created_at": 1724406492, "task_id": "802f3064-030d-42ac-a882-0e1293712d04", "id": "16a06b1b-a89b-49c0-bc15-123bd999f6d6", "answer": "{token}"}}"
return "\n"
@classmethod
def convert_data(cls, data: dict):
data_str = json.dumps(data)
return f"{cls.DATA_PREFIX}{data_str}\n\n"
@classmethod
def convert_Event(cls, data: dict):
data_str = json.dumps(data)
def convert_event(cls, event: DifyChatResponseEvent):
data_str = json.dumps(event.dict())
return f"{cls.DATA_PREFIX}{data_str}\n\n"
def __init__(
@@ -417,11 +269,8 @@ class ChatStreamResponse(StreamingResponse):
request: Request,
event_handler: ChatEventCallbackHandler,
response: StreamingAgentChatResponse,
data: ChatRequestData,
ids:Dict[str,Any]
data: ChatRequestData
):
ChatStreamResponse.ids = ids
ChatStreamResponse.data = data
content = ChatStreamResponse.content_generator(
request, event_handler, response, data
)
@@ -435,27 +284,41 @@ class ChatStreamResponse(StreamingResponse):
response: StreamingAgentChatResponse,
data: ChatRequestData
):
ids = IDManager().createID()
# Yield the text response
async def _chat_response_generator():
final_response = ""
async for token in response.async_response_gen():
final_response += token
yield ChatStreamResponse.convert_Message(token)
args = ids
args['answer'] = token
args['conversation_id'] = data.conversation_id
event = Message_DifyChatResponseEvent(**args)
yield ChatStreamResponse.convert_event(event)
#yield ChatStreamResponse.convert_text(token)
# 存储消息历史
message().add(user_id=data.user,conversation_id=data.conversation_id,query=data.query,answer=final_response)
# the text_generator is the leading stream, once it's finished, also finish the event stream
event_handler.is_done = True
event_handler.setResponse(response)
# 发送工作流结束事件
args = ids
args['response'] = final_response
args['conversation_id'] = data.conversation_id
wf_event = Workflow_finished_DifyChatResponseEvent(**args)
yield ChatStreamResponse.convert_event(wf_event)
msgEnt_event = MessageEnd_DifyChatResponseEvent(**ids)
yield ChatStreamResponse.convert_event(msgEnt_event)
# Yield the events from the event handler
async def _event_generator():
async for event in event_handler.async_event_gen():
event_response = event.to_response()
if event_response is not None:
yield ChatStreamResponse.convert_Event(event_response)
yield ChatStreamResponse.convert_text("")
combine = stream.merge(_chat_response_generator(), _event_generator())
is_stream_started = False
@@ -464,42 +327,52 @@ class ChatStreamResponse(StreamingResponse):
if not is_stream_started:
is_stream_started = True
# 发送工作流开始事件
args = ids
args['use_id'] = data.user
args['query'] = data.query
args['conversation_id'] = data.conversation_id
wf_event = Workflow_started_DifyChatResponseEvent(**args)
yield ChatStreamResponse.convert_event(wf_event)
# Stream a blank message to start the stream
# 发送一个空消息事件
#yield ChatStreamResponse.convert_text("")
yield output
if await request.is_disconnected():
break
@v.post("/chat-messages")
async def post_chatmessages(request: Request, data: ChatRequestData):
global gEvent_handler
async def post_conversations(request: Request, data: ChatRequestData):
userMng.findNoExistCreate(data.user)
data.conversation_id = data.conversation_id if data.conversation_id else str(uuid.uuid4())
conversaObj = conversations()
conversationinfo = conversaObj.get(data.conversation_id)
if conversationinfo is None:
conversationinfo = conversaObj.add(data.conversation_id, data.user, "新建会话",inputs= data.inputs)
conversationinfo = conversaObj.add(data.conversation_id, data.user, "新建会话")
# 生成聊天参数
last_message_content = ChatMessage.from_str(data.query)
filters = None
params = data.inputs or {}
# 启动聊天事件监听
ids = IDManager().createID()
if gEvent_handler is None:
gEvent_handler = ChatEventCallbackHandler()
Settings.llm.callback_manager.handlers.append(gEvent_handler)
if gEvent_handler is not None:
gEvent_handler.setInitParams(ids = ids,data = data)
# 获取聊天引擎对象
chat_engine = get_chat_engine(filters=filters, params=params)
# 启动聊天事件监听
event_handler = ChatEventCallbackHandler()
chat_engine.callback_manager.handlers.append(event_handler) # type: ignore
# 执行异步聊天
response = await chat_engine.astream_chat(data.query)
# 返回异步消息回应
return ChatStreamResponse(request, gEvent_handler, response, data,ids)
return ChatStreamResponse(request, event_handler, response, data)
@v.get("/messages")
async def query_messages(user:str, conversation_id:str):
@@ -515,9 +388,8 @@ async def query_messages(user:str, conversation_id:str):
for record in records:
res = record.dict()
feeds = feedback().query(res['id'])
res["message_files"] = []
res["feedback"] = {'rating':feeds['rating'] } if feeds != None else ''
res["feedback"] = ''
res["retriever_resources"] = []
res["created_at"] = 1723444905
res["agent_thoughts"] = []
@@ -566,51 +438,50 @@ async def query_conversations(user:str, first_id:str = None, limit:str = None, p
@v.get("/parameters")
async def query_parameters(user:str):
prjObj = ProjectInfo()
return BaseConfig().ParamterCfg(projectInfo = prjObj.projectNames())
@v.post("/messages/{message_id}/feedbacks")
async def post_feedbacks(request: Request,message_id:str,params:Dict[str,Any]):
if params['rating'] is None:
feedback().delete(message_id)
else:
results = message().query(message_id)
if len(results) > 0:
result = results[0]
feedback().add(message_id=message_id,query=result['query'],
answer=result['answer'],rating=params['rating'])
@v.post("/files/upload")
def upload_file(request: ChatFileUploadRequest):
try:
logger.info("Processing file")
resluts = ChatFileService.process_file(request.base64)
return {
'id':resluts.get('id'),
'name': resluts.get('name'),
'size': resluts.get('size'),
'extension':resluts.get('extension'),
'mime_type':resluts.get('mime_type'),
'created_by':str(uuid.uuid4()),
'created_at':int(time.time())
params = parameter().get(user)
if len(params) == 0:
params = {
"opening_statement": "您好,我是配网D3造价软件小助手,您可以问我有关配网造价软件的相关问题!",
"suggested_questions": [],
"suggested_questions_after_answer": {
"enabled": False
},
"speech_to_text": {
"enabled": False
},
"text_to_speech": {
"enabled": False,
"language": "",
"voice": ""
},
"retriever_resource": {
"enabled": True
},
"annotation_reply": {
"enabled": False
},
"more_like_this": {
"enabled": False
},
"user_input_form": [],
"sensitive_word_avoidance": {
"enabled": False
},
"file_upload": {
"image": {
"enabled": False,
"number_limits": 3,
"transfer_methods": [
"remote_url"
]
}
except Exception as e:
logger.error(f"Error processing file: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Error processing file")
@v.post("/project")
def upload_file(request: ChatFileUploadRequest):
try:
logger.info("Processing file")
return PrjFileLoadService.process_file(request.base64)
except Exception as e:
logger.error(f"Error processing file: {e}", exc_info=True)
raise HTTPException(status_code=500, detail="Error processing file")
@v.post("/messages/{message_id}/suggested")
async def post_suggested(request: Request,message_id:str,user:str):
questions = await NextQuestionSuggestion.suggest_next_questions(message_id)
return {
"result": "success",
"data":questions
},
"system_parameters": {
"image_file_size_limit": "10"
}
}
return params
@r.post("")
def upload_file(request: ChatFileUploadRequest) -> List[str]:
pass
+4 -68
View File
@@ -2,7 +2,7 @@ from datetime import datetime
import uuid
from app.api.routers.request.baseConfig import BaseConfig
from app.api.routers.request.dbOrm import DBManager
from typing import List
dbManage = DBManager()
class conversations:
@@ -24,13 +24,12 @@ class conversations:
return records[0]
return None
def add(self,id:str, user_id:str, name:str,inputs:dict):
template = BaseConfig().ConversationCfg()
def add(self,id:str, user_id:str, name:str):
template = BaseConfig.ConversationCfg
template['id'] = id
template['user_id'] = user_id
template['name'] = name
template['created_at'] = 1724399038
template['inputs'] = inputs
dbManage.addRecord(self._tableName,template)
def delete(self,id:str):
@@ -112,7 +111,7 @@ class message:
return datas
def add(self,user_id:str,conversation_id:str,query:str,answer:str):
template = BaseConfig.MessageCfg()
template = BaseConfig.MessageCfg
template['id'] = str(uuid.uuid4())
template['user_id'] = user_id
template['conversation_id'] = conversation_id
@@ -123,67 +122,4 @@ class message:
def delete(self,user_id:str):
dbManage.delete(self._tableName,user_id = user_id)
def query(self,id:str):
results = []
condition = {'id':id}
records = dbManage.query(self._tableName,**condition)
for record in records:
results.append(record.dict())
return results
class feedback:
def __init__(self) -> None:
self._tableName = 'feedbacks'
dbManage.createTable(self._tableName)
def add(self,message_id:str,query:str,answer:str,rating:str):
record = {
'message_id': message_id,
'query': query,
'answer': answer,
'rating': rating,
}
dbManage.addRecord(self._tableName,record)
def delete(self,message_id:str):
cond = {'message_id':message_id}
dbManage.delete(self._tableName,**cond)
def query(self,message_id:str):
cond = {'message_id':message_id}
records = dbManage.query(self._tableName,**cond)
if len(records) > 0:
return records[0].dict()
return None
class ProjectInfo:
def __init__(self) -> None:
self._tableName = 'projectInfos'
dbManage.createTable(self._tableName)
def add(self,name:str,flag:str):
info = dbManage.query(self._tableName,prjFlag = flag)
if len(info) == 0:
record = {
'prjectName': name,
'prjFlag': flag
}
dbManage.addRecord(self._tableName,record)
def projectNames(self)->List[str]:
records = dbManage.query(self._tableName)
names = []
for record in records:
data:dict = record.dict()
name = data.get('prjectName')
if name !='':
names.append(name)
return names
def prjFalg(self,name:str):
records = dbManage.query(self._tableName)
for record in records:
data:dict = record.dict()
if data.get('prjectName') == name:
return data['prjFlag']
return ''
+11 -52
View File
@@ -1,16 +1,8 @@
from pydantic import BaseModel
import os
from enum import Enum
class BaseConfig(BaseModel):
projectInfo:str = os.getenv("PROJECT_TITLE","会话提示消息")
def ParamterCfg(self,**args):
prjItems = args.get('projectInfo')
questions = os.getenv("CONVERSATION_STARTERS", "dev")
return{
"opening_statement": self.projectInfo,
"suggested_questions": questions.split('\n'),
class BaseConfig:
ParamterCfg = {
"opening_statement": "您好,我是配网D3造价软件小助手,您可以问我有关配网造价软件的相关问题!",
"suggested_questions": [],
"suggested_questions_after_answer": {
"enabled": False
},
@@ -31,18 +23,7 @@ class BaseConfig(BaseModel):
"more_like_this": {
"enabled": False
},
"user_input_form": [
{
"select": {
"variable": "projectname",
"label": "\u5de5\u7a0b\u540d\u79f0",
"type": "select",
"max_length": 48,
"required": True,
"options": prjItems
}
}
],
"user_input_form": [],
"sensitive_word_avoidance": {
"enabled": False
},
@@ -56,35 +37,22 @@ class BaseConfig(BaseModel):
}
},
"system_parameters": {
"image_file_size_limit": "10",
"language": "",
"voice": "",
},
"retriever_resource": {
"enabled": True
},
"annotation_reply": {
"enabled": False
},
"more_like_this": {
"enabled": False
},
"image_file_size_limit": "10"
}
}
def ConversationCfg(self):
return{
ConversationCfg = {
"id": "",
'user_id':'',
"name": "",
"inputs": {},
"status": "normal",
"introduction": self.projectInfo,
"introduction": ParamterCfg['opening_statement'],
"created_at":''
}
@classmethod
def MessageCfg(cls):
return {
MessageCfg = {
"id": "",
'user_id':'',
"conversation_id": "",
@@ -92,12 +60,3 @@ class BaseConfig(BaseModel):
"query": "",
"answer": ""
}
class ChatEventType(str, Enum):
WORKFLOW_START = "workflow_started"
WORKFLOW_FINISHED = "workflow_finished"
NODE_START = "node_started"
NODE_FINISHED = "node_finished"
MESSAGE = "message"
MESSAGE_END = "message_end"
+10 -42
View File
@@ -2,7 +2,7 @@ import os
from typing import Dict, List, Any
from pydantic import BaseModel
from sqlalchemy import create_engine, Column, String, Integer, JSON,Float
from sqlalchemy import create_engine, Column, String, Integer, JSON
from sqlalchemy.engine.reflection import Inspector
from sqlalchemy.orm import sessionmaker, declarative_base
@@ -24,6 +24,10 @@ class ConversationOrm(Base):
if 'name' in data:
self.name = data['name']
class UserOrm(Base):
__tablename__ = "user"
@@ -47,21 +51,6 @@ class MessagesOrm(Base):
query = Column(String)
answer = Column(String)
class FeedBackOrm(Base):
__tablename__ = "feedbacks"
message_id = Column(String,primary_key=True)
query = Column(String)
answer = Column(String)
rating = Column(String)
class ProjectInfoOrm(Base):
__tablename__ = "projectInfos"
prjFlag = Column(String,primary_key=True)
prjectName = Column(String)
#数据结构
class ConversationModel(BaseModel):
id: str
@@ -72,6 +61,7 @@ class ConversationModel(BaseModel):
created_at: int
class Config:
#orm_mode = True
from_attributes=True
@classmethod
@@ -83,6 +73,7 @@ class UserModel(BaseModel):
createtime: str
class Config:
#orm_mode = True
from_attributes=True
@classmethod
@@ -95,6 +86,7 @@ class ParametersModel(BaseModel):
value : Dict[str, Any]
class Config:
#orm_mode = True
from_attributes=True
@classmethod
@@ -109,36 +101,13 @@ class MessagesModel(BaseModel):
answer : str
class Config:
#orm_mode = True
from_attributes=True
@classmethod
def orm(cls):
return MessagesOrm
class FeedBackModel(BaseModel):
message_id :str
query :str
answer :str
rating :str
class Config:
from_attributes=True
@classmethod
def orm(cls):
return FeedBackOrm
class ProjectInfoModel(BaseModel):
prjectName:str
prjFlag:str
class Config:
from_attributes=True
@classmethod
def orm(cls):
return ProjectInfoOrm
class DBManager:
def __init__(self) -> None:
DATABASE_URL = os.getenv("SQLITE_DATABASE_URL")
@@ -178,8 +147,7 @@ class DBManager:
return
records = session.query(ormCls).filter_by(**filter).all()
if records is not None:
for record in records:
session.delete(record)
session.delete(records)
session.commit()
def update(self,tableName:str,data:Dict[str,Any],**filter):
+1 -3
View File
@@ -1,7 +1,7 @@
from typing import Dict, Any
from pydantic import BaseModel
from typing import Optional
class ChatRequestData(BaseModel):
inputs: Dict[str,Any]
@@ -13,5 +13,3 @@ class ChatRequestData(BaseModel):
class ChatFileUploadRequest(BaseModel):
base64: str
@@ -1,134 +0,0 @@
import base64,os,mimetypes,requests,tempfile
from typing import List,Dict,Any
from uuid import uuid4
from app.settings import init_settings
from app.engine.loaders import get_document_Types, get_documents,getFileCacahePath
from app.engine.vectordb import get_vector_store
from app.engine.generate import get_doc_store,run_pipeline,persist_storage
from llama_index.core.schema import Document
from pathlib import Path
from llama_index.core.readers.file.base import (
_try_loading_included_file_formats as get_file_loaders_map,
)
from llama_index.readers.file import FlatReader
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core import VectorStoreIndex
from app.engine.index import get_index
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
class PrjFileLoadService:
@staticmethod
def store_and_parse_file(file_data):
prjtoJson_url = os.getenv('PRJTOJSON_URL')
convert_url = prjtoJson_url +'/prj_convert_clt2json'
files ={'file':file_data}
response1 = requests.post(
url = convert_url,
files=files
)
if response1.text is None or response1.text=='':
return None
load_url = prjtoJson_url +'/file_download'
response2 = requests.post(
url = load_url,
data=response1.text
)
if response2.text is None or response2.content=='':
return None
try:
tempFilePath:str = tempfile.gettempdir() + f"\\{uuid4().hex}.zip"
with open(tempFilePath,'wb') as file:
file.write(response2.content)
prjID = str(uuid4())
filePath = getFileCacahePath() + f'/Projects/{prjID}'
os.makedirs(filePath)
import zipfile
with zipfile.ZipFile(tempFilePath,'r') as zip_File:
for zip_info in zip_File.infolist():
zip_info.filename = zip_info.filename.encode('cp437').decode('gbk')
zip_File.extract(zip_info,filePath)
os.remove(tempFilePath)
return f'Projects_{prjID}'
except Exception as e:
return None
@staticmethod
def process_file(base64_content: str) -> str:
prjFlag = PrjFileLoadService.store_and_parse_file(base64_content)
if prjFlag is None:
return None
#生成向量并持久化至本地
documents = get_documents(prjFlag)
for doc in documents:
doc.metadata["private"] = "false"
docstore = get_doc_store(prjFlag)
vector_store = get_vector_store(prjFlag)
_ = run_pipeline(docstore, vector_store, documents)
persist_storage(docstore, vector_store)
return prjFlag
class ChatFileService:
PRIVATE_STORE_PATH = os.getenv('CHAT_UPLOAD_FILECACHE','output/uploaded')
resluts:Dict[str,Any] = {}
@staticmethod
def process_file(base64_content: str) -> dict:
file_data, extension = ChatFileService.preprocess_base64_file(base64_content)
documents = ChatFileService.store_and_parse_file(file_data, extension)
pipeline = IngestionPipeline()
nodes = pipeline.run(documents=documents)
current_index = get_index()
pipeline = IngestionPipeline()
nodes = pipeline.run(documents=documents)
if current_index is None:
current_index = VectorStoreIndex(nodes=nodes)
else:
current_index.insert_nodes(nodes=nodes)
current_index.storage_context.persist(
persist_dir=os.environ.get("STORAGE_DIR", "storage")
)
return ChatFileService.resluts
@staticmethod
def preprocess_base64_file(base64_content: str) -> tuple:
header, data = base64_content.split(",", 1)
mime_type = header.split(";")[0].split(":", 1)[1]
extension = mimetypes.guess_extension(mime_type)
ChatFileService.resluts['mime_type'] = mime_type
ChatFileService.resluts['extension'] = extension
return base64.b64decode(data), extension
@staticmethod
def store_and_parse_file(file_data, extension) -> List[Document]:
os.makedirs(ChatFileService.PRIVATE_STORE_PATH, exist_ok=True)
fileID = uuid4().hex
file_name = f"{fileID}{extension}"
file_path = Path(os.path.join(ChatFileService.PRIVATE_STORE_PATH, file_name))
ChatFileService.resluts['id'] = fileID
ChatFileService.resluts['file_name'] = file_name
with open(file_path, "wb") as f:
f.write(file_data)
ChatFileService.resluts['size'] = os.path.getsize(file_path)
reader_cls = ChatFileService.default_file_loaders_map().get(extension)
if reader_cls is None:
raise ValueError(f"File extension {extension} is not supported")
reader = reader_cls()
documents = reader.load_data(file_path)
for doc in documents:
doc.metadata["file_name"] = file_name
doc.metadata["private"] = "true"
return documents
@staticmethod
def default_file_loaders_map():
default_loaders = get_file_loaders_map()
default_loaders[".txt"] = FlatReader
return default_loaders
@@ -1,43 +0,0 @@
from typing import List
from app.api.routers.request.base import message
from llama_index.core.prompts import PromptTemplate
from llama_index.core.settings import Settings
from pydantic import BaseModel
NEXT_QUESTIONS_SUGGESTION_PROMPT = PromptTemplate(
"你是一个乐于助人的助手!你的任务是对用户可能会问的下一个问题给出建议。 "
"\n这是对话历史记录"
"\n---------------------\n{conversation}\n---------------------"
"考虑到对话历史记录,仅限于现在知识库已有内容, 请给我 $number_of_questions 个你接下来可能会问题的问题!"
)
N_QUESTION_TO_GENERATE = 3
class NextQuestions(BaseModel):
"""A list of questions that user might ask next"""
questions: List[str]
class NextQuestionSuggestion:
@staticmethod
async def suggest_next_questions(
message_id: str,
number_of_questions: int = N_QUESTION_TO_GENERATE,
) -> List[str]:
last_user_message = None
last_assistant_message = None
results = message().query(message_id)
if len(results) > 0:
last_user_message = results[0]['query']
last_assistant_message = results[0]['answer']
conversation: str = f"{last_user_message}\n{last_assistant_message}"
output: NextQuestions = await Settings.llm.astructured_predict(
NextQuestions,
prompt=NEXT_QUESTIONS_SUGGESTION_PROMPT,
conversation=conversation,
nun_questions=number_of_questions,
)
return output.questions
return []
+14 -24
View File
@@ -6,25 +6,16 @@ from llama_index.core.tools.query_engine import QueryEngineTool
from app.engine.engine import create_query_engine, create_summary_query_engine
from app.engine.index import get_index
from app.engine.prompt import ReActChatFormatter_messages, tree_summary_query_engine_tool_messages, \
query_engine_tool_messages, summary_query_tool_messages
#from app.engine.loaders.db import makeDescriptionByEngine
from app.engine.tools import ToolFactory
from app.api.routers.request.base import ProjectInfo
from llama_index.core.response_synthesizers import ResponseMode
def getPrjFalg(params:dict=None)->str:
prjFlag = ''
if params is not None:
prjFlag = ProjectInfo().prjFalg(params.get('projectname'))
return prjFlag
def get_chat_engine(filters=None, params:dict=None):
def get_chat_engine(filters=None, params=None):
system_prompt = os.getenv("SYSTEM_PROMPT")
top_k = int(os.getenv("TOP_K", "3"))
use_reranker = os.getenv("RERANK_ENABLED")
tools = []
# 创建SQL查询工具
# sql_query_engine = create_summary_query_engine(index)
# sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine,
@@ -34,31 +25,31 @@ def get_chat_engine(filters=None, params:dict=None):
#tools.append(sql_query_tool)
# Add query tool if index exists
index = get_index(getPrjFalg(params))
index = get_index()
if index is not None:
summary_query_engine = create_summary_query_engine(index,top_k,use_reranker,filters)
summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool",
description=summary_query_tool_messages,
description="适用于任何需要进行全面总结、概括的要求。",
)
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = "COMPACT")
query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool",
description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后建议使用“zjdata_query_tool”工具。",
)
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = ResponseMode.TREE_SUMMARIZE)
query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool",
description=query_engine_tool_messages)
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = ResponseMode.TREE_SUMMARIZE)
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = "TREE_SUMMARIZE")
query_engine_tool_1 = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool_1",
description=tree_summary_query_engine_tool_messages)
description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后,且在询问工程中单位的具体数值,例如用量,费率,合计,金额等的时候建议使用“zj_query_tool_1”工具。",
)
tools.append(summary_query_tool)
tools.append(query_engine_tool)
tools.append(query_engine_tool_1)
tools.append(summary_query_tool)
# Add additional tools
tools += ToolFactory.from_env()
react_chat_formatter = ReActChatFormatter.from_defaults(ReActChatFormatter_messages)
prefix_messages = ("""您的设计旨在帮助完成各种任务,从回答问题到提供其他类型分析的摘要。\n\n##工具\n\n你可以访问各种工具。你有责任按照你认为合适的顺序使用这些工具来完成当前的任务。\n这可能需要将任务分解为子任务,并使用不同的工具来完成每个子任务。\n\n你可以访问以下工具:\n{tool_desc}\n\n\n##输出格式\n\n请用与问题相同的语言回答,并使用以下格式:\n\n \nThought: 用户当前的语言是:(user's language)。我需要使用工具来帮助我回答问题。\nAction: 如果使用工具,则为工具名称(one of {tool_names})。\nAction Input: 输入给工具的内容,使用JSON格式表示kwargs(例如{{\"input\": \"hello world\", \"num_beams\": 5}}\n \n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n切勿用Markdown代码标记包围你的响应。如果需要,可以在响应中使用代码标记。\n\n请为Action Input使用有效的JSON格式。不要这样做{{\'input\': \'hello world\', \'num_beams\': 5}}。\n\n如果使用此格式,用户将以下面的格式进行回应:\n\n \nObservation: 工具响应\n \n\n你应该继续重复上述格式,直到你有足够的信息来回答问题而无需使用更多工具。此时,你必须使用以下两种格式之一进行回答:\n\n \nThought: 我可以不用任何工具来回答。我将使用用户的语言来回答。\nAnswer: [你的答案(与用户问题相同的语言)]\n \n\n \nThought: 我无法使用提供的工具回答问题。\nAnswer: [你的答案(与用户问题相同的语言)]\n \n\n##如果从工具中得到的回应是Empty Response,那么只需要回答“我不知道”,不需要额外回答别的内容。## 当前对话\n\n以下是当前对话,由人类和助手的消息交替组成。\n""")
react_chat_formatter = ReActChatFormatter.from_defaults(prefix_messages)
agentrunner = AgentRunner.from_llm(
llm=Settings.llm,
tools=tools,
@@ -67,7 +58,6 @@ def get_chat_engine(filters=None, params:dict=None):
verbose=True,
)
return agentrunner
# create the function calling worker for reasoning
# worker = FunctionCallingAgentWorker.from_tools(
# tools, verbose=True
+5 -31
View File
@@ -7,28 +7,10 @@ from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.response_synthesizers import ResponseMode
from llama_index.readers.database import DatabaseReader
from sqlalchemy import create_engine
from util.register import *
from app.engine.prompt import text_qa_template, refine_template, summary_template, simple_template
from app.engine.retriever.HybridRetriever import HybridRetriever
from app.engine.response.treeSummResponse import CustomTreeResponse
from llama_index.core.settings import Settings
ModelPlateCategory = '模型平台'
def get_node_postprocessors():
rerank_enabled = os.getenv("RERANK_ENABLED").title()
if rerank_enabled is None or rerank_enabled == 'False':
return []
Rerank_provider = os.getenv("RERANK_PROVIDER")
modelPaltCls = ClsRegister.get(ModelPlateCategory,Rerank_provider)
postprocess = None
if modelPaltCls is not None:
modelPalt = modelPaltCls()
postprocess = modelPalt.rerank()
else:
raise ValueError(f"Invalid rerank provider: {Rerank_provider}")
return postprocess
from app.settings import get_node_postprocessors
def makeDescriptionByEngine(sql_database:SQLDatabase):
reader = DatabaseReader(sql_database)
@@ -67,14 +49,6 @@ def get_Retriever(index,**kwargs):
return retriever
def get_synthesizer():
return CustomTreeResponse(
llm=Settings.llm,
summary_template=summary_template,
use_async=True,
streaming=False,
)
sql_database = None
sql_obj_index = None
@@ -107,7 +81,7 @@ def create_summary_query_engine(index, top_k=3, use_reranker=False, filters=None
summary_query_engine = summary_index.as_query_engine(
response_mode=ResponseMode.TREE_SUMMARIZE,
use_async=True,
streaming=False,
streaming=True,
)
return summary_query_engine
@@ -128,8 +102,8 @@ def create_query_engine(index, top_k=3, use_reranker=False, filters=None, respon
simple_template = simple_template,
node_postprocessors=postprocess,
use_async=True,
streaming=False,
response_mode = response_mode
streaming=True,
ResponseMode = response_mode
)
return query_engine
+14 -19
View File
@@ -5,12 +5,12 @@ load_dotenv()
import logging
import os
from app.engine.loaders import get_document_Types, get_documents
from app.engine.loaders import get_documents
from app.engine.vectordb import get_vector_store
from app.settings import init_settings
from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter,MarkdownNodeParser
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.settings import Settings
from llama_index.core.storage import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
@@ -21,13 +21,12 @@ logger = logging.getLogger()
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
def get_doc_store(docType:str):
def get_doc_store():
# If the storage directory is there, load the document store from it.
# If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
storeDir = os.path.join(STORAGE_DIR,docType)
if os.path.exists(storeDir):
return SimpleDocumentStore.from_persist_dir(storeDir)
if os.path.exists(STORAGE_DIR):
return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
else:
return SimpleDocumentStore()
@@ -35,11 +34,10 @@ def get_doc_store(docType:str):
def run_pipeline(docstore, vector_store, documents):
pipeline = IngestionPipeline(
transformations=[
#SentenceSplitter(
#chunk_size=Settings.chunk_size,
#chunk_overlap=Settings.chunk_overlap,
#),
#MarkdownNodeParser(),
SentenceSplitter(
chunk_size=Settings.chunk_size,
chunk_overlap=Settings.chunk_overlap,
),
Settings.embed_model,
],
docstore=docstore,
@@ -63,9 +61,8 @@ def persist_storage(docstore, vector_store):
def persist_BMRetriever(vector_store):
STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
nodes = vector_store.get_nodes([])
top_k = min(int(os.getenv("TOP_K", "3")),len(nodes))
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes = nodes)
top_k = int(os.getenv("TOP_K", "3"))
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes=vector_store.get_nodes([]))
bmRetriver.persist(STORAGE_DIR)
@@ -74,14 +71,12 @@ def generate_datasource():
logger.info("Generate index for the provided data")
# Get the stores and documents or create new ones
docTypes = get_document_Types()
for docType in docTypes:
documents = get_documents(docType)
documents = get_documents()
# Set private=false to mark the document as public (required for filtering)
for doc in documents:
doc.metadata["private"] = "false"
docstore = get_doc_store(docType)
vector_store = get_vector_store(docType)
docstore = get_doc_store()
vector_store = get_vector_store()
# Run the ingestion pipeline
_ = run_pipeline(docstore, vector_store, documents)
+13 -6
View File
@@ -1,15 +1,22 @@
import logging
from llama_index.core.indices import VectorStoreIndex
from app.engine.vectordb import get_vector_store
from app.engine.loaders import get_document_Types
from typing import Dict,Any
logger = logging.getLogger("uvicorn")
def get_index(prjFlag:str):
if prjFlag is None or prjFlag == '':
raise ValueError('无效的工程标识')
index = None
def get_index(params=None):
global index
if index is None:
logger.info("Connecting vector store...")
store = get_vector_store(prjFlag)
store = get_vector_store()
# Load the index from the vector store
# If you are using a vector store that doesn't store text,
# you must load the index from both the vector store and the document store
index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished load index from vector store.")
return index
+4 -73
View File
@@ -3,86 +3,17 @@ import yaml
from app.engine.loaders.db import DBLoaderConfig, get_db_documents
from app.engine.loaders.file import FileLoaderConfig, get_file_documents
from app.engine.loaders.web import WebLoaderConfig, get_web_documents
from app.engine.loaders.file import getProjectName
import os
logger = logging.getLogger(__name__)
def load_configs():
with open("config/loaders.yaml",encoding='utf-8') as f:
with open("config/loaders.yaml",encoding='UTF-8') as f:
configs = yaml.safe_load(f)
return configs
def path_difference(path1:str, path2:str):
import os
path1 = os.path.abspath(path1)
path2 = os.path.abspath(path2)
path1_parts = path1.split(os.path.sep)
path2_parts = path2.split(os.path.sep)
for i, part in enumerate(path1_parts):
if part != path2_parts[i]:
break
else:
i += 1
pathKey = ''
for j in range(i,len(path2_parts)):
pathKey+=path2_parts[j] + '_'
return pathKey[0:-1]
def getFileCacahePath():
rootPath = 'data'
configs = load_configs()
if configs is not None and len(configs.items()) > 0:
for loader_type, loader_config in configs.items():
if loader_type == "file":
rootPath = FileLoaderConfig(**loader_config).data_dir
break
return rootPath
def get_document_Types():
rootPath = getFileCacahePath()
types = []
dirStack = [rootPath]
while len(dirStack) > 0:
curDir = dirStack.pop()
dirs = [os.path.join(curDir, d) for d in os.listdir(curDir) if os.path.isdir(os.path.join(curDir, d))]
if len(dirs) > 0:
for dir in dirs:
dirStack.append(dir)
else:
types.append(path_difference(rootPath,curDir))
return types
def getProjectInfos():
config = load_configs()
if config is None or len(config.items()) == 0:
return None
prjDir = None
for loader_type, loader_config in config.items():
if loader_config.get('enable', True):
loader_config = loader_config or []
config = FileLoaderConfig(**loader_config)
prjDir = config.data_dir
break
if prjDir is None:
return None
prjInfos = []
prjFlags = get_document_Types()
for prjFlag in prjFlags:
fileDir = os.path.join(config.data_dir,prjFlag.replace('_','\\'))
prjInfo = {}
prjInfo['flag'] = prjFlag
prjInfo['name'] = getProjectName(fileDir)
prjInfos.append(prjInfo)
return prjInfos
def get_documents(docType:str):
def get_documents():
documents = []
config = load_configs()
@@ -98,7 +29,7 @@ def get_documents(docType:str):
loader_config = loader_config or []
match loader_type:
case "file":
document = get_file_documents(FileLoaderConfig(**loader_config),docType)
document = get_file_documents(FileLoaderConfig(**loader_config))
case "web":
document = get_web_documents(WebLoaderConfig(**loader_config))
case "db":
+6 -39
View File
@@ -6,9 +6,6 @@ from llama_index.core.readers.base import BaseReader
from llama_index.core.readers.json import JSONReader
from llama_parse import LlamaParse
from pydantic import BaseModel, validator
from app.engine.loaders.markdownReader import ChunkMarkdownReader
from app.engine.loaders.projectJson import ProjectJson
logger = logging.getLogger(__name__)
@@ -23,6 +20,7 @@ class FileLoaderConfig(BaseModel):
raise ValueError(f"Directory '{v}' does not exist")
return v
def llama_parse_parser():
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
raise ValueError(
@@ -37,6 +35,7 @@ def llama_parse_parser():
)
return parser
def llama_parse_extractor() -> Dict[str, LlamaParse]:
from llama_parse.utils import SUPPORTED_FILE_TYPES
@@ -44,13 +43,10 @@ def llama_parse_extractor() -> Dict[str, LlamaParse]:
return {file_type: parser for file_type in SUPPORTED_FILE_TYPES}
def llama_local_extractor() -> Dict[str, BaseReader]:
parser = {
".json" : JSONReader(clean_json=False,levels_back=0),
".md" : ChunkMarkdownReader(),
}
return parser
return {".json" : JSONReader(clean_json=False,levels_back=0)}
def get_file_documents(config: FileLoaderConfig,childPath: str):
def get_file_documents(config: FileLoaderConfig):
from llama_index.core.readers import SimpleDirectoryReader
try:
@@ -67,7 +63,7 @@ def get_file_documents(config: FileLoaderConfig,childPath: str):
file_extractor = llama_local_extractor()
reader = SimpleDirectoryReader(
os.path.join(config.data_dir,childPath.replace('_','\\')),
config.data_dir,
recursive=True,
filename_as_id=True,
raise_on_error=True,
@@ -90,32 +86,3 @@ def get_file_documents(config: FileLoaderConfig,childPath: str):
else:
# Raise the error if it is not the case of empty data dir
raise e
def prjFileSuffix(dir:str):
entries = os.listdir(dir)
file_names = [entry for entry in entries if os.path.isfile(os.path.join(dir, entry))]
if len(file_names) > 0:
return os.path.splitext(file_names[0])[1]
return ''
def getProjectName(dir:str):
suffix = prjFileSuffix(dir)
if suffix== '.json':
prjJson = ProjectJson(dir)
prjJson.parse()
tb = prjJson.table('工程属性')
records = tb.records()
for record in records:
name = record.value('名称')
if name == '工程名称':
return record.value('')
elif suffix == '.md':
md_files = [f for f in os.listdir(dir) if f.endswith('.md')]
for md_file in md_files:
prjPath = os.path.join(dir, md_file)
basename = os.path.splitext(md_file)[0]
if basename =='工程属性':
rd = ChunkMarkdownReader()
rd.load_data(prjPath)
return rd.findValue("名称=='工程名称'",'')
return ''
-64
View File
@@ -1,64 +0,0 @@
from app.engine.loaders.projectJson import *
class MarkDown:
def __init__(self,table:JsonTable,path:str) -> None:
self._table = table
self._path = path
def build(self):
flds:Dict[str,Field] = self._table.fields()
records:List[Record] = self._table.records()
columns:list = []
colComments:list = []
ignores:List[str] = []
for name,fld in flds.items():
if name =='_id' or name =='nodeType' or name =='relTbId':
ignores.append(name)
continue
columns.append(fld.value('name'))
colComments.append(fld.value('alias'))
rowdatas = []
for record in records:
datas = []
for col in columns:
if col in ignores:
continue
txt:str = record.value(col)
datas.append(txt.replace('\n'," "))
rowdatas.append(datas)
content = self.convert(self._table.name(),self._table.comment(),columns,colComments,rowdatas)
with open(self._path, 'w',encoding='utf-8') as file:
file.write(content)
def convert(self,tableName:str,tableComment:str,columns:list,colComments:list,rowdatas:list):
strTitle = "# " + tableName + '\n'
if tableName!='':
strTitle+= f"备注:{tableComment}" + '\n'
for i in range(len(columns)):
strTitle+= f"- 字段名称:{columns[i]}" + '\n'
comment = colComments[i]
if comment!='':
strTitle+= f" - 备注:{comment}" + '\n'
markdown_table = "|"
# 添加列标题
markdown_table += "|".join(columns) + "|\n"
# 添加分隔行
markdown_table += "|" + "|".join(['---' for _ in columns]) + "|\n"
# 遍历每个数据行
for row in rowdatas:
# 添加数据行
markdown_table += "|" + "|".join(row) + "|\n"
return strTitle + "\n" + markdown_table
prjSon = ProjectJson('')
prjSon.parse()
tables = prjSon.tables()
for name,table in tables.items():
mdObj = MarkDown(table,f'')
mdObj.build()
@@ -1,89 +0,0 @@
from llama_index.readers.file.markdown import MarkdownReader
from typing import Any, Dict, List, Optional, Tuple
import re
from llama_index.core.utils import get_tokenizer
class ChunkMarkdownReader(MarkdownReader):
def __init__(
self,
*args: Any,
chunkSize:int = 2048,
**kwargs: Any,
) -> None:
self._chunkSize = chunkSize
self._tokenizer = get_tokenizer()
self._colheader = ''
self._rows = []
super().__init__(*args,**kwargs)
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
markdown_tups: List[Tuple[Optional[str], str]] = []
lines = markdown_text.split("\n")
strTitle = ''
tokensNum:int = 0
current_lines = []
strheader:str = ''
headerSize:int = 0
for line in lines:
tokensNum += self._token_size(line)
if tokensNum > self._chunkSize and len(current_lines) > 0:
if len(markdown_tups) == 0:
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
else:
markdown_tups.append((strheader , "\n".join(current_lines)))
tokensNum = headerSize
current_lines.clear()
current_lines.append(line)
if strTitle!='' and strheader!='':
self._rows.append(line)
if line == '\n' or line == '\r':
if tokensNum > self._chunkSize:
raise ValueError('标题Token数大于chunkSize大小')
strTitle = "\n".join(current_lines)
#headerSize = headerSize + self._token_size(strTitle)
current_lines.clear()
if line.startswith("|---"):
self._colheader = current_lines[0]
strheader = "\n".join(current_lines)
headerSize= headerSize + self._token_size(strheader)
current_lines.clear()
if len(current_lines) > 0:
if len(markdown_tups) == 0:
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
else:
markdown_tups.append((strheader , "\n".join(current_lines)))
return [
(
key if key is None else re.sub(r"#", "", key).strip(),
re.sub(r"<.*?>", "", value),
)
for key, value in markdown_tups
]
def _token_size(self, text: str) -> int:
return len(self._tokenizer(text))
def findValue(self,expression:str,Field:str):
cols = self._colheader.split('|')
cols = [item for item in cols if item]
for row in self._rows:
rowtrs = row.split('|')
rowdatas = [item for item in rowtrs if item and (item!='\r' or item!='\n')]
if len(rowdatas) == 0:
continue
gData = {}
for cName,rValue in zip(cols,rowdatas):
gData[cName] = rValue
if eval(expression,gData):
return gData[Field]
return ''
-94
View File
@@ -1,94 +0,0 @@
from typing import Dict,List,Any
import json,os
class Record:
def __init__(self,datas:Dict[str,Any]) -> None:
self._datas:Dict[str,Any] = datas
def value(self,key:str):
if key in self._datas:
return self._datas.get(key)
return ''
class Field:
def __init__(self,datas:Dict[str,Any]) -> None:
self._datas:Dict[str,Any] = datas
def value(self,key:str):
if key in self._datas:
return self._datas.get(key)
return ''
class JsonTable:
def __init__(self,filePth:str) -> None:
self._filePth = filePth
self._fields:Dict[str,Field] = {}
self._records:List[Record] = []
self._fileName = os.path.splitext(os.path.basename(filePth))[0]
self._name = ''
self._comment = ''
def parse(self):
with open(self._filePth, 'r',encoding='utf-8') as file:
jsObj = json.load(file)
data:dict = jsObj.get('table')
self._name = data.get('name')
self._comment = data.get('comment')
Jsfields = data.get('fields')
for jsfiled in Jsfields:
field = Field(jsfiled)
self._fields[field.value('name')] =field
JsRecords = data.get('records')
for jsRecord in JsRecords:
self._records.append(Record(jsRecord))
def records(self):
return self._records
def fields(self):
return self._fields
def name(self):
return self._fileName
def comment(self):
return self._comment
class ProjectJson:
def __init__(self,dir:str) -> None:
self._dir = dir
self._tables:Dict[str,JsonTable] = {}
def parse(self):
json_files = [f for f in os.listdir(self._dir) if f.endswith('.json')]
for json_file in json_files:
prjPath = os.path.join(self._dir, json_file)
tb = JsonTable(prjPath)
tb.parse()
basename = os.path.splitext(json_file)[0]
self._tables[basename] = tb
def table(self,tableName:str):
return self._tables[tableName]
def tables(self):
return self._tables
def getProjectName(dir:str):
result = dir.split('\\')
if len(result) > 0:
return result[-1]
return "未知工程名称"
prjJson = ProjectJson(dir)
prjJson.parse()
tb:JsonTable = prjJson.table('工程属性')
records = tb.records()
for record in records:
name = record.value('名称')
if name == '工程名称':
return record.value('')
return ''
+49 -73
View File
@@ -2,31 +2,39 @@ from llama_index.core import PromptTemplate
text_qa_template_str = (
"# 角色\n"
"你是一名擅长博微造价工程数据问答的专家,可以根据电力工程文件中的内容回答用户问题。\n"
"\n"
"# 任务描述:\n"
"请仔细阅读所给的文档片段,并根据其内容回答问题。\n"
"您需要判断文档的内容是否可以回答问题,不要强行回答。如果可以回答,答案必须严格遵循文档内容,即使与事实不符\n"
"如果答案与事实不符,直接给出答案,不要做解释。\n"
"\n"
"# 回答规则:\n"
"- 请使用与文档材料相同的语言回答问题。\n"
"- 评估文档是否含有足够信息回答问题。无关时不要回答\n"
"- 如果问题能被回答,你的回答必须严格遵循文档内容,即使与事实不符。一定不要做多余解释\n"
"- 如果问题能被回答,直接引用文档的相关信息保证答案准确、完整,并追求简洁。\n"
"- 当文档中只有少量信息与问题相关时,重点关注这部分信息,这种情况下一定回答。\n"
"- 当文档中信息与问题无关时,请不要额外发散回答,只需要回答“我不知道这个问题的答案”\n"
"\n"
"来自多个来源的文档片段如下,请充分理解以下参考资料内容,组织出满足用户提问的条理清晰的回复。\n"
"你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。"
"你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答,"
"如同直接从文件中提取的内容。\n"
"知识库中已经导入一个工程的全部数据,请你站在当前工程的角度回答用户关于工程文件的问题。\n"
"例如:询问“此工程”指当前导入的工程。询问“此工程名称”指当前导入的工程的工程名称\n"
"## 技能\n"
"### 技能 1: 数据查询与提供\n"
"- 准确回答所有关于电力工程造价的相关问题。\n"
"- 提供具体数据,如成本估算、材料清单、劳动力需求等\n"
"- 确保提供的信息严格基于工程文档中的记录\n"
"### 技能 2: 技术性解释\n"
"- 解释造价工程中的技术术语和概念\n"
"- 为复杂的工程细节提供清晰易懂的说明。\n"
"## 约束\n"
"- 仅回答与电力工程造价文件相关的具体问题。\n"
"- 不进行任何超出文件内容的猜测或假设。\n"
"- 所有回答均基于文件内容,采用客观和技术性的语言。\n"
"- 请基于这些信息回答问题。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。\n"
"以下为上下文信息\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"鉴于来自多个来源的文档片段而非先知识,回答查询\n"
"请根据上下文信息而非先知识回答我的问题或回复我的指令。前面的上下文信息可能有用,也可能没用,你需要从我给出的上下文信息中选出与我的问题最相关的那些,来为你的回答提供依据。回答一定要忠于原文,简洁但不丢信息,不要胡乱编造。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。我的问题或指令是什么语种,你就用什么语种回复\n"
"如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
"Query: {query_str}\n"
"Answer: "
"问题:{query_str}\n"
"你的回复: "
)
text_qa_template = PromptTemplate(text_qa_template_str)
refine_template_str = (
@@ -50,26 +58,31 @@ refine_template = PromptTemplate(refine_template_str)
summary_template_str = (
"# 角色\n"
"你是一名擅长博微造价工程数据问答的专家,可以根据电力工程文件中的内容回答用户问题。\n"
"\n"
"# 任务描述:\n"
"请仔细阅读所给的文档片段,并根据其内容回答问题。\n"
"您需要判断文档的内容是否可以回答问题,不要强行回答。如果可以回答,答案必须严格遵循文档内容,即使与事实不符。\n"
"如果答案与事实不符,直接给出答案,不要做解释。\n"
"\n"
"# 回答规则:\n"
"- 请使用与文档材料相同的语言回答问题\n"
"- 评估文档是否含有足够信息回答问题。无关时不要回答。\n"
"- 如果问题能被回答,你的回答必须严格遵循文档内容,即使与事实不符。一定不要做多余解释\n"
"- 如果问题能被回答,直接引用文档的相关信息保证答案准确、完整,并追求简洁\n"
"- 当文档中只有少量信息与问题相关时,重点关注这部分信息,这种情况下一定回答\n"
"- 当文档中信息与问题无关时,请不要额外发散回答,只需要回答“我不知道这个问题的答案”。\n"
"\n"
"来自多个来源的文档片段如下,请充分理解以下参考资料内容,组织出满足用户提问的条理清晰的回复\n"
"你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。"
"你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答,"
"如同直接从文件中提取的内容。\n"
"## 技能\n"
"### 技能 1: 数据查询与提供\n"
"- 准确回答所有关于电力工程造价的相关问题。\n"
"- 提供具体数据,如成本估算、材料清单、劳动力需求等。\n"
"- 确保提供的信息严格基于工程文档中的记录\n"
"### 技能 2: 技术性解释\n"
"- 解释造价工程中的技术术语和概念\n"
"- 为复杂的工程细节提供清晰易懂的说明\n"
"## 约束\n"
"- 仅回答与电力工程造价文件相关的具体问题\n"
"- 不进行任何超出文件内容的猜测或假设。\n"
"- 所有回答均基于文件内容,采用客观和技术性的语言。\n"
"- 请基于这些信息回答问题。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。\n"
"来自多个来源的上下文信息如下。\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"鉴于来自多个来源的文档片段而非先验知识,回答查询。\n"
"鉴于来自多个来源的信息而非先验知识, "
"回答查询。\n"
"如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
"Query: {query_str}\n"
"Answer: "
@@ -80,40 +93,3 @@ simple_template_str = (
"{query_str}"
)
simple_template = PromptTemplate(simple_template_str)
ReActChatFormatter_messages = (
"您的设计旨在帮助完成各种任务,从回答问题到提供其他类型分析的摘要。\n\n"
"##工具\n\n"
"你可以访问各种工具。你有责任按照你认为合适的顺序使用这些工具来完成当前的任务。\n"
"这可能需要将任务分解为子任务,并使用不同的工具来完成每个子任务。\n\n"
"你可以访问以下工具:\n"
"{tool_desc}\n\n\n"
"##输出格式\n\n"
"请用与问题相同的语言回答,并使用以下格式:\n\n"
"'''\n"
"Thought: 用户当前的语言是:(user's language)。我需要使用工具来帮助我回答问题。\n"
"Action: 如果使用工具,则为工具名称(one of {tool_names})。\n"
"Action Input: 输入给工具的内容,使用JSON格式表示kwargs(例如{{\"input\": \"hello world\", \"num_beams\": 5}}\n"
"'''\n\n"
"请始终以Thought开始。\n\n"
"切勿用Markdown代码标记包围你的响应。如果需要,可以在响应中使用代码标记。\n\n"
"请为Action Input使用有效的JSON格式。不要这样做{{\'input\': \'hello world\', \'num_beams\': 5}}。\n\n"
"如果使用此格式,用户将以下面的格式进行回应:\n\n"
"'''\n"
"Observation: 工具响应\n"
"'''\n\n"
"你应该继续重复上述格式,直到你有足够的信息来回答问题而无需使用更多工具。此时,你必须使用以下两种格式之一进行回答:\n\n"
"'''\nThought: 我可以不用任何工具来回答。我将使用用户的语言来回答。\n"
"Answer: [你的答案(与用户问题相同的语言)]\n"
"'''\n\n"
"'''\n"
"Thought: 我无法使用提供的工具回答问题。\n"
"Answer: [你的答案(与用户问题相同的语言)]\n"
"'''\n\n##如果从工具中得到的回应是Empty Response,那么只需要回答“我不知道”,不需要额外回答别的内容。## 当前对话\n\n"
"以下是当前对话,由人类和助手的消息交替组成。\n"
)
summary_query_tool_messages = "适用于任何需要进行全面总结、概括的要求。"
query_engine_tool_messages = "适用于回答任何问题。"
tree_summary_query_engine_tool_messages = "在询问工程中单位的具体数值,例如用量,费率,合计,金额等的时候建议使用本工具。"
-70
View File
@@ -1,70 +0,0 @@
from typing import Any, List, Optional
from llama_index.core.postprocessor import SentenceTransformerRerank
from llama_index.core.schema import MetadataMode, NodeWithScore, QueryBundle
from llama_index.core.callbacks import CBEventType, EventPayload
from llama_index.core.bridge.pydantic import PrivateAttr
class OllamaRerank(SentenceTransformerRerank):
_score_threshold: float = PrivateAttr()
def __init__(
self,
top_n: int = 2,
model: str = "cross-encoder/stsb-distilroberta-base",
device: Optional[str] = None,
keep_retrieval_score: Optional[bool] = False,
score_threshold:float = 0.3
):
self._score_threshold = score_threshold
super().__init__(top_n,model,device,keep_retrieval_score)
@classmethod
def class_name(cls) -> str:
return "OllamaRerank"
def _postprocess_nodes(
self,
nodes: List[NodeWithScore],
query_bundle: Optional[QueryBundle] = None,
) -> List[NodeWithScore]:
if query_bundle is None:
raise ValueError("Missing query bundle in extra info.")
if len(nodes) == 0:
return []
query_and_nodes = [
(
query_bundle.query_str,
node.node.get_content(metadata_mode=MetadataMode.EMBED),
)
for node in nodes
]
with self.callback_manager.event(
CBEventType.RERANKING,
payload={
EventPayload.NODES: nodes,
EventPayload.MODEL_NAME: self.model,
EventPayload.QUERY_STR: query_bundle.query_str,
EventPayload.TOP_K: self.top_n,
},
) as event:
scores = self._model.predict(query_and_nodes)
assert len(scores) == len(nodes)
for node, score in zip(nodes, scores):
if self.keep_retrieval_score:
node.node.metadata["retrieval_score"] = node.score
node.score = score
for i in range(len(nodes)-1,-1,-1):
node = nodes[i]
if node.score < self._score_threshold:
nodes.remove(node)
new_nodes = sorted(nodes, key=lambda x: -x.score if x.score else 0)[
: self.top_n
]
event.on_end(payload={EventPayload.NODES: new_nodes})
return new_nodes
@@ -1,234 +0,0 @@
from llama_index.core.response_synthesizers.tree_summarize import TreeSummarize
from typing import Any, Optional, Sequence,List
import asyncio
from llama_index.core.callbacks.base import CallbackManager
from llama_index.core.indices.prompt_helper import PromptHelper
from llama_index.core.prompts import BasePromptTemplate
from llama_index.core.service_context import ServiceContext
from llama_index.core.service_context_elements.llm_predictor import LLMPredictorType
from llama_index.core.types import BaseModel,RESPONSE_TEXT_TYPE
from llama_index.core.async_utils import run_async_tasks
from llama_index.core.utils import get_tokenizer
from llama_index.core.prompts.prompt_utils import get_empty_prompt_txt
class CustomTreeResponse(TreeSummarize):
def __init__(
self,
llm: Optional[LLMPredictorType] = None,
callback_manager: Optional[CallbackManager] = None,
prompt_helper: Optional[PromptHelper] = None,
summary_template: Optional[BasePromptTemplate] = None,
output_cls: Optional[BaseModel] = None,
streaming: bool = False,
use_async: bool = False,
verbose: bool = False,
service_context: Optional[ServiceContext] = None,
) -> None:
self._tokenizer = get_tokenizer()
super().__init__(llm,callback_manager,prompt_helper,summary_template,output_cls
,streaming,use_async,verbose,service_context)
async def aget_response(
self,
query_str: str,
text_chunks: Sequence[str],
**response_kwargs: Any,
) -> RESPONSE_TEXT_TYPE:
"""Get tree summarize response."""
summary_template = self._summary_template.partial_format(query_str=query_str)
text_chunks = self.repack(text_chunks=text_chunks)
if self._verbose:
print(f"{len(text_chunks)} text chunks after repacking")
# give final response if there is only one chunk
if len(text_chunks) == 1:
response: RESPONSE_TEXT_TYPE
if self._streaming:
response = await self._llm.astream(
summary_template, context_str=text_chunks[0], **response_kwargs
)
else:
if self._output_cls is None:
response = await self._llm.apredict(
summary_template,
context_str=text_chunks[0],
**response_kwargs,
)
else:
response = await self._llm.astructured_predict(
self._output_cls,
summary_template,
context_str=text_chunks[0],
**response_kwargs,
)
# return pydantic object if output_cls is specified
return response
else:
# summarize each chunk
if self._output_cls is None:
tasks = [
self._llm.apredict(
summary_template,
context_str=text_chunk,
**response_kwargs,
)
for text_chunk in text_chunks
]
else:
tasks = [
self._llm.astructured_predict(
self._output_cls,
summary_template,
context_str=text_chunk,
**response_kwargs,
)
for text_chunk in text_chunks
]
summary_responses = await asyncio.gather(*tasks)
if self._output_cls is not None:
summaries = [summary.json() for summary in summary_responses]
else:
summaries = summary_responses
# recursively summarize the summaries
return await self.aget_response(
query_str=query_str,
text_chunks=summaries,
**response_kwargs,
)
def get_response(
self,
query_str: str,
text_chunks: Sequence[str],
**response_kwargs: Any,
) -> RESPONSE_TEXT_TYPE:
"""Get tree summarize response."""
summary_template = self._summary_template.partial_format(query_str=query_str)
text_chunks = self.repack(text_chunks=text_chunks)
if self._verbose:
print(f"{len(text_chunks)} text chunks after repacking")
# give final response if there is only one chunk
if len(text_chunks) == 1:
response: RESPONSE_TEXT_TYPE
if self._streaming:
response = self._llm.stream(
summary_template, context_str=text_chunks[0], **response_kwargs
)
else:
if self._output_cls is None:
response = self._llm.predict(
summary_template,
context_str=text_chunks[0],
**response_kwargs,
)
else:
response = self._llm.structured_predict(
self._output_cls,
summary_template,
context_str=text_chunks[0],
**response_kwargs,
)
return response
else:
# summarize each chunk
if self._use_async:
if self._output_cls is None:
tasks = [
self._llm.apredict(
summary_template,
context_str=text_chunk,
**response_kwargs,
)
for text_chunk in text_chunks
]
else:
tasks = [
self._llm.astructured_predict(
self._output_cls,
summary_template,
context_str=text_chunk,
**response_kwargs,
)
for text_chunk in text_chunks
]
summary_responses = run_async_tasks(tasks)
if self._output_cls is not None:
summaries = [summary.json() for summary in summary_responses]
else:
summaries = summary_responses
else:
if self._output_cls is None:
summaries = [
self._llm.predict(
summary_template,
context_str=text_chunk,
**response_kwargs,
)
for text_chunk in text_chunks
]
else:
summaries = [
self._llm.structured_predict(
self._output_cls,
summary_template,
context_str=text_chunk,
**response_kwargs,
)
for text_chunk in text_chunks
]
summaries = [summary.json() for summary in summaries]
# recursively summarize the summaries
return self.get_response(
query_str=query_str, text_chunks=summaries, **response_kwargs
)
def repack( self,text_chunks: Sequence[str],) ->List[str]:
prompt_str = get_empty_prompt_txt(self._summary_template)
num_prompt_tokens = self._token_size(prompt_str)
avaliableSize = self._get_available_context_size(num_prompt_tokens)
ava_chunks = []
sumSize = 0
results = []
for text_chunk in text_chunks:
one_chunk_size = self._token_size(text_chunk)
if one_chunk_size > avaliableSize:
raise ValueError("文本块大小大于可用上下文大小")
sumSize = sumSize + one_chunk_size
if sumSize > avaliableSize:
results.append(self._merge_chunks(ava_chunks))
ava_chunks.clear()
sumSize = 0
ava_chunks.append(text_chunk)
if len(ava_chunks) > 0:
results.append(self._merge_chunks(ava_chunks))
return results
def _get_available_context_size(self, num_prompt_tokens: int) -> int:
llm_metadata = self._llm.metadata
context_size_tokens = llm_metadata.context_window - num_prompt_tokens - llm_metadata.num_output
if context_size_tokens < 0:
raise ValueError(
f"Calculated available context size {context_size_tokens} was"
" not non-negative."
)
return context_size_tokens
def _token_size(self, text: str) -> int:
return len(self._tokenizer(text))
def _merge_chunks(self,ava_chunks:list):
return "\n\n".join([c.strip() for c in ava_chunks if c.strip()])
+1 -5
View File
@@ -1,4 +1,3 @@
import os
from typing import Any, Dict, List, Union, Callable, NamedTuple
from bm25s.tokenization import *
@@ -9,12 +8,9 @@ except ImportError:
def tqdm(iterable, *args, **kwargs):
return iterable
import jieba
jiebapath = os.environ.get("JIEBA_DATA", "")
jieba.set_dictionary(os.path.join(jiebapath, 'dict.txt')) #设置字典
jieba.initialize() #初始化jeiba
def chinese_tokenizer(text: str) -> List[str]:
import jieba
from nltk.corpus import stopwords
tokens = jieba.lcut(text)
return [token for token in tokens if token not in stopwords.words('chinese')]
@@ -24,15 +24,13 @@ class HybridRetriever(BaseRetriever):
self._vecRetriever = vector_index.as_retriever(
similarity_top_k=similarity_top_k,filters = filters
)
self._bm25Retriever = None
STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
if os.path.exists(STORAGE_DIR) and len(os.listdir(STORAGE_DIR)) > 0:
self._bm25Retriever = CHBM25Retriever.from_persist_dir(STORAGE_DIR)
else:
nodes = self._vector_index.vector_store.get_nodes(None)
similarity_top_k = min(len(nodes),similarity_top_k)
self._bm25Retriever = CHBM25Retriever.from_defaults(similarity_top_k=similarity_top_k,nodes=nodes)
self._bm25Retriever.persist(STORAGE_DIR)
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=similarity_top_k,nodes=self._vector_index.vector_store.get_nodes(None))
bmRetriver.persist(STORAGE_DIR)
self._alpha = alpha
@@ -45,16 +43,6 @@ class HybridRetriever(BaseRetriever):
for node in bmNodes:
bmDic[node.node_id] = node
vecScores = [node_with_score.score for node_with_score in vecNodes]
bmSores = [node_with_score.score for node_with_score in bmNodes]
vec_min_score = min(vecScores) if len(vecScores) > 0 else 0
vec_max_score = max(vecScores) if len(vecScores) > 0 else 0
bm_min_score = min(bmSores) if len(bmSores) > 0 else 0
bm_max_score = max(bmSores) if len(bmSores) > 0 else 0
result_tups = []
for i in range(len(vecNodes)):
node = vecNodes[i]
@@ -64,11 +52,7 @@ class HybridRetriever(BaseRetriever):
bmDic.pop(node.node_id)
else:
bmScore = 0.0
bmScore = self.normal_score(bmScore,bm_min_score,bm_max_score)
vecScore = self.normal_score(node.score,vec_min_score,vec_max_score)
full_similarity = (self._alpha * vecScore) + (
full_similarity = (self._alpha * node.score) + (
(1 - self._alpha) * bmScore
)
result_tups.append((full_similarity, node))
@@ -81,9 +65,3 @@ class HybridRetriever(BaseRetriever):
for full_score, node in result_tups:
node.score = full_score
return [n for _, n in result_tups][:self._out_top_k]
def normal_score(self,score,min,max):
if min == max:
return 1.0 if score > 0 else 0.0
else:
return (score - min) / (max - min)
+8 -9
View File
@@ -5,13 +5,12 @@ from qdrant_client import qdrant_client
qclient = None
def get_qdrant_vector_store(docType:str):
collection_name = docType
def get_qdrant_vector_store():
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
vector_store_path = os.getenv("VECTOR_STORE_PATH")
host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"),
port=int(os.getenv("VECTOR_STORE_PORT", "6333")),
vector_store_path =os.path.join(vector_store_path,docType)
if not vector_store_path or not host:
raise ValueError(
"Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
@@ -33,9 +32,9 @@ def get_qdrant_vector_store(docType:str):
vector_store = QdrantVectorStore(client=qclient, collection_name=collection_name)
return vector_store
def get_chroma_vector_store(docType:str):
collection_name = docType
vector_store_path =os.path.join(os.getenv("VECTOR_STORE_PATH"),docType)
def get_chroma_vector_store():
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
vector_store_path = os.getenv("VECTOR_STORE_PATH")
# if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path
# otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
if vector_store_path:
@@ -56,16 +55,16 @@ def get_chroma_vector_store(docType:str):
)
return store
def get_vector_store(docType:str):
def get_vector_store():
store_type=os.getenv("VECTOR_STORE_TYPE")
store = None
match store_type:
case "chroma":
store = get_chroma_vector_store(docType)
store = get_chroma_vector_store()
case "qdrant":
store = get_qdrant_vector_store(docType)
store = get_qdrant_vector_store()
case _:
raise ValueError(f"Invalid vector store type: {store_type}")
+113 -213
View File
@@ -1,114 +1,19 @@
import os
from typing import Dict
from abc import abstractmethod
from llama_index.core.constants import DEFAULT_TEMPERATURE
from llama_index.core.settings import Settings
from llama_index.embeddings.xinference import XinferenceEmbedding
from llama_index.llms.xinference import Xinference
#from llama_index.embeddings.xinference import XinferenceEmbedding
from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP
from llama_index.postprocessor.xinference_rerank import XinferenceRerank
from app.engine.loaders import getProjectInfos
from app.api.routers.request.base import ProjectInfo
from modelProvide.customDashScope import CustomDashScope
from util.register import *
from llama_index.core.callbacks import CallbackManager
from app.xinference.base import XinferenceEmbedding, XinferenceRerank
ModelPlateCategory = '模型平台'
def get_node_postprocessors():
rerank_enabled = os.getenv("RERANK_ENABLED").title()
if rerank_enabled is None or rerank_enabled == 'False':
return []
def init_settings():
model_provider = os.getenv("MODEL_PROVIDER")
modelPaltCls:ModelPlatform = ClsRegister.get(ModelPlateCategory,model_provider)
if modelPaltCls is not None:
modelPalt:ModelPlatform = modelPaltCls()
Settings.llm = modelPalt.model()
else:
raise ValueError(f"Invalid model provider: {model_provider}")
embedding_provider = os.getenv("EMBEDDING_PROVIDER")
modelPaltCls:ModelPlatform = ClsRegister.get(ModelPlateCategory,embedding_provider)
if modelPalt is not None:
modelPalt:ModelPlatform = modelPaltCls()
Settings.embed_model = modelPalt.embedding()
else:
raise ValueError(f"Invalid embedding provider: {embedding_provider}")
Settings.llm.callback_manager = CallbackManager()
Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
class ModelPlatform:
@abstractmethod
def model(self):
pass
@abstractmethod
def embedding(self):
pass
@abstractmethod
def rerank(self):
pass
@register(ModelPlateCategory,'ollama')
class OllamaPlatform(ModelPlatform):
def model(self):
from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama
base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
request_timeout = float(
os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT)
)
Settings.llm = Ollama(
base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout
)
pass
def embedding(self):
#from llama_index.embeddings.ollama import OllamaEmbedding
# base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
# Settings.embed_model = OllamaEmbedding(
# base_url=base_url,
# model_name=os.getenv("EMBEDDING_MODEL"),
# )
pass
def rerank(self):
from app.engine.rerank.ollamRerank import OllamaRerank
modelpath = os.getcwd() + os.getenv('RERANK_MODEL')
top_n = os.getenv('RERANK_TOP_N',5)
threshold = float(os.getenv('RERANK_THRESHOLD',0.3))
rerank = OllamaRerank(
model=modelpath,
top_n=top_n,
device="cpu",
score_threshold= threshold
)
return [rerank]
@register(ModelPlateCategory,'xinference')
class XinferencePlatform(ModelPlatform):
def model(self):
base_url = os.getenv("BASE_URL")
model = os.getenv("MODEL")
max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None
temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP))
return Xinference(model, base_url, temperature, max_tokens)
def embedding(self):
base_url = os.getenv("BASE_URL")
embedding_base_url = os.getenv("EMBEDDING_BASE_URL")
embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url
embed_model_name = os.getenv("EMBEDDING_MODEL")
dimensions = os.getenv("EMBEDDING_DIM")
dimensions = int(dimensions) if dimensions is not None else None
return XinferenceEmbedding(embed_model_name, embedding_base_url)
def rerank(self):
rerank_model = os.getenv("RERANK_MODEL")
rerank_url = os.getenv("RERANK_BASE_URL")
rerank_top_n = os.getenv("RERANK_TOP_N")
@@ -118,10 +23,73 @@ class XinferencePlatform(ModelPlatform):
postprocess = [XinferenceRerank(rerank_model, rerank_url, top_n=rerank_top_n, threshold=rerank_threshold)]
return postprocess
@register(ModelPlateCategory,'openai')
class OpenAIPlatform(ModelPlatform):
def model(self):
def init_settings():
model_provider = os.getenv("MODEL_PROVIDER")
match model_provider:
case "openai":
init_openai()
case "dashscope":
init_dashscope()
case "groq":
init_groq()
case "ollama":
init_ollama()
case "anthropic":
init_anthropic()
case "gemini":
init_gemini()
case "mistral":
init_mistral()
case "azure-openai":
init_azure_openai()
case "t-systems":
from .llmhub import init_llmhub
init_llmhub()
case "xinference":
init_xinference()
case _:
raise ValueError(f"Invalid model provider: {model_provider}")
Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
def init_ollama():
# from llama_index.embeddings.ollama import OllamaEmbedding
# from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama
#
# base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
# request_timeout = float(
# os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT)
# )
# Settings.embed_model = OllamaEmbedding(
# base_url=base_url,
# model_name=os.getenv("EMBEDDING_MODEL"),
# )
# Settings.llm = Ollama(
# base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout
# )
pass
def init_xinference():
base_url = os.getenv("BASE_URL")
model = os.getenv("MODEL")
max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None
temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP))
Settings.llm = Xinference(model, base_url, temperature, max_tokens)
embedding_base_url = os.getenv("EMBEDDING_BASE_URL")
embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url
embed_model_name = os.getenv("EMBEDDING_MODEL")
dimensions = os.getenv("EMBEDDING_DIM")
dimensions = int(dimensions) if dimensions is not None else None
Settings.embed_model = XinferenceEmbedding(embed_model_name, embedding_base_url, dimensions=dimensions)
def init_openai():
from llama_index.core.constants import DEFAULT_TEMPERATURE
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.llms.openai import OpenAI
max_tokens = os.getenv("LLM_MAX_TOKENS")
@@ -130,41 +98,39 @@ class OpenAIPlatform(ModelPlatform):
"temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
"max_tokens": int(max_tokens) if max_tokens is not None else None,
}
return OpenAI(**config)
Settings.llm = OpenAI(**config)
def embedding(self):
from llama_index.embeddings.openai import OpenAIEmbedding
dimensions = os.getenv("EMBEDDING_DIM")
config = {
"model": os.getenv("EMBEDDING_MODEL"),
"dimensions": int(dimensions) if dimensions is not None else None,
}
return OpenAIEmbedding(**config)
Settings.embed_model = OpenAIEmbedding(**config)
def rerank(self):
pass
def init_dashscope():
from llama_index.llms.dashscope import DashScope,DashScopeGenerationModels
from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeBatchTextEmbeddingModels,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels
@register(ModelPlateCategory,'dashscope')
class DashscopePlatform(ModelPlatform):
def model(self):
apikey = os.getenv('DASHSCOPE_API_KEY')
modelName = os.getenv('MODEL')
return CustomDashScope(model_name=modelName,api_key = apikey)
max_tokens = os.getenv("LLM_MAX_TOKENS")
config = {
"model": os.getenv("MODEL"),
"temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
"max_tokens": int(max_tokens) if max_tokens is not None else None,
}
Settings.llm = llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX)
def embedding(self):
from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels
api_key = os.getenv('DASHSCOPE_API_KEY')
modelName = os.getenv('EMBEDDING_MODEL')
return DashScopeEmbedding(model_name=modelName,
text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY,api_key = api_key)
dimensions = os.getenv("EMBEDDING_DIM")
config = {
"model": os.getenv("EMBEDDING_MODEL"),
"dimensions": int(dimensions) if dimensions is not None else None,
}
Settings.embed_model = DashScopeEmbedding(model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY)
def rerank(self):
pass
@register(ModelPlateCategory,'azure-openai')
class AzureOpenaiPlatform(ModelPlatform):
def model(self):
def init_azure_openai():
# from llama_index.core.constants import DEFAULT_TEMPERATURE
# from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
# from llama_index.llms.azure_openai import AzureOpenAI
#
# llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"]
@@ -180,32 +146,15 @@ class AzureOpenaiPlatform(ModelPlatform):
# or os.getenv("OPENAI_API_VERSION"),
# }
#
# return AzureOpenAI(
# Settings.llm = AzureOpenAI(
# model=os.getenv("MODEL"),
# max_tokens=int(max_tokens) if max_tokens is not None else None,
# temperature=float(temperature),
# deployment_name=llm_deployment,
# **azure_config,
# )
pass
def embedding(self):
# from llama_index.core.constants import DEFAULT_TEMPERATURE
# from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
#
# llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"]
# embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]
# max_tokens = os.getenv("LLM_MAX_TOKENS")
# temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
# dimensions = os.getenv("EMBEDDING_DIM")
#
# azure_config = {
# "api_key": os.environ["AZURE_OPENAI_KEY"],
# "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
# "api_version": os.getenv("AZURE_OPENAI_API_VERSION")
# or os.getenv("OPENAI_API_VERSION"),
# }
# return AzureOpenAIEmbedding(
# Settings.embed_model = AzureOpenAIEmbedding(
# model=os.getenv("EMBEDDING_MODEL"),
# dimensions=int(dimensions) if dimensions is not None else None,
# deployment_name=embedding_deployment,
@@ -213,17 +162,11 @@ class AzureOpenaiPlatform(ModelPlatform):
# )
pass
def rerank(self):
pass
@register(ModelPlateCategory,'fastembed')
class FastembedPlatform(ModelPlatform):
@abstractmethod
def model(self):
pass
@abstractmethod
def embedding(self):
def init_fastembed():
"""
Use Qdrant Fastembed as the local embedding provider.
"""
# from llama_index.embeddings.fastembed import FastEmbedEmbedding
#
# embed_model_map: Dict[str, str] = {
@@ -239,14 +182,8 @@ class FastembedPlatform(ModelPlatform):
# )
pass
@abstractmethod
def rerank(self):
pass
@register(ModelPlateCategory,'groq')
class GroqPlatform(ModelPlatform):
@abstractmethod
def model(self):
def init_groq():
# from llama_index.llms.groq import Groq
#
# model_map: Dict[str, str] = {
@@ -260,17 +197,8 @@ class GroqPlatform(ModelPlatform):
# init_fastembed()
pass
@abstractmethod
def embedding(self):
pass
@abstractmethod
def rerank(self):
pass
@register(ModelPlateCategory,'anthropic')
class AnthropicPlatform(ModelPlatform):
def model(self):
def init_anthropic():
# from llama_index.llms.anthropic import Anthropic
#
# model_map: Dict[str, str] = {
@@ -286,50 +214,22 @@ class AnthropicPlatform(ModelPlatform):
# init_fastembed()
pass
def embedding(self):
pass
def rerank(self):
pass
@register(ModelPlateCategory,'gemini')
class GeminiPlatform(ModelPlatform):
def model(self):
# from llama_index.llms.gemini import Gemini
# model_name = f"models/{os.getenv('MODEL')}"
# return Gemini(model=model_name)
pass
def embedding(self):
def init_gemini():
# from llama_index.embeddings.gemini import GeminiEmbedding
# from llama_index.llms.gemini import Gemini
#
# model_name = f"models/{os.getenv('MODEL')}"
# embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}"
# return GeminiEmbedding(model_name=embed_model_name)
#
# Settings.llm = Gemini(model=model_name)
# Settings.embed_model = GeminiEmbedding(model_name=embed_model_name)
pass
def rerank(self):
pass
@register(ModelPlateCategory,'mistral')
class MistralPlatform(ModelPlatform):
def model(self):
# from llama_index.llms.mistralai import MistralAI
# return MistralAI(model=os.getenv("MODEL"))
pass
def embedding(self):
def init_mistral():
# from llama_index.embeddings.mistralai import MistralAIEmbedding
# return MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
# from llama_index.llms.mistralai import MistralAI
#
# Settings.llm = MistralAI(model=os.getenv("MODEL"))
# Settings.embed_model = MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
pass
def rerank(self):
pass
def init_ProjectInfo():
prjObj = ProjectInfo()
prjInfos:list[tuple] = getProjectInfos()
for prjInfo in prjInfos:
prjObj.add(prjInfo['name'],prjInfo['flag'])
View File
+272
View File
@@ -0,0 +1,272 @@
"""Xinference embeddings file."""
import logging
from enum import Enum
from http import HTTPStatus
from typing import Any, Dict, List, Optional, Union, Tuple
from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding, dispatcher
from llama_index.core.bridge.pydantic import PrivateAttr
from llama_index.core.callbacks import CBEventType, EventPayload
from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding
from llama_index.core.instrumentation.events.rerank import ReRankStartEvent, ReRankEndEvent
from llama_index.core.postprocessor.types import BaseNodePostprocessor
from llama_index.core.schema import ImageType, NodeWithScore, QueryBundle
from pydantic import Field
logger = logging.getLogger(__name__)
EMBED_MAX_INPUT_LENGTH = 2048
EMBED_MAX_BATCH_SIZE = 1
class XinferenceEmbedding(BaseEmbedding):
"""Xinference class for text embedding.
"""
model_description: Dict[str, Any] = Field(
description="The model description from Xinference."
)
_generator: Any = PrivateAttr()
_model_uid: str = Field(description="The Xinference model to use.")
_endpoint: str = Field(description="The Xinference endpoint URL to use.")
def __init__(
self,
model_uid: str,
endpoint: str,
embed_batch_size: int = EMBED_MAX_BATCH_SIZE,
dimensions: Optional[int] = None,
additional_kwargs: Optional[Dict[str, Any]] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
max_retries: int = 10,
# timeout: float = 60.0,
# reuse_client: bool = True,
# callback_manager: Optional[CallbackManager] = None,
# default_headers: Optional[Dict[str, str]] = None,
# http_client: Optional[httpx.Client] = None,
# async_http_client: Optional[httpx.AsyncClient] = None,
# num_workers: Optional[int] = None,
**kwargs: Any,
) -> None:
generator, model_description, embed_batch_size, dimensions = self.load_model(
model_uid, endpoint
)
self._generator = generator
#self._model_uid = model_uid
#self._endpoint = endpoint
super().__init__(
embed_batch_size=embed_batch_size,
dimensions=dimensions,
#callback_manager=callback_manager,
model_name=model_uid,
additional_kwargs=additional_kwargs,
api_key=api_key,
api_base=api_base,
api_version=api_version,
max_retries=max_retries,
# reuse_client=reuse_client,
# timeout=timeout,
# default_headers=default_headers,
# num_workers=num_workers,
**kwargs,
)
def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
try:
from xinference.client import RESTfulClient
except ImportError:
raise ImportError(
"Could not import Xinference library."
'Please install Xinference with `pip install "xinference[all]"`'
)
client = RESTfulClient(endpoint)
try:
assert isinstance(client, RESTfulClient)
except AssertionError:
raise RuntimeError(
"Could not create RESTfulClient instance."
"Please make sure Xinference endpoint is running at the correct port."
)
generator = client.get_model(model_uid)
model_description = client.list_models()[model_uid]
try:
assert generator is not None
assert model_description is not None
except AssertionError:
raise RuntimeError(
"Could not get model from endpoint."
"Please make sure Xinference endpoint is running at the correct port."
)
model = model_description["model_name"]
replica = model_description['replica']
dimensions = model_description['dimensions']
max_tokens = model_description['max_tokens']
return generator, model_description, replica, dimensions
@classmethod
def class_name(cls) -> str:
return "XinferenceEmbedding"
def _get_text_embedding(self, text: str) -> Embedding:
"""
Embed the input text synchronously.
Subclasses should implement this method. Reference get_text_embedding's
docstring for more information.
"""
assert self._generator is not None
response = self._generator.create_embedding(input=text)
return response['data'][0]['embedding']
def _get_query_embedding(self, query: str) -> Embedding:
"""
Embed the input query synchronously.
Subclasses should implement this method. Reference get_query_embedding's
docstring for more information.
"""
return self._get_text_embedding(query)
async def _aget_query_embedding(self, query: str) -> Embedding:
"""
Embed the input query asynchronously.
Subclasses should implement this method. Reference get_query_embedding's
docstring for more information.
"""
return self._get_query_embedding(query)
class XinferenceRerank(BaseNodePostprocessor):
"""Xinference class for rerank.
"""
model_description: Dict[str, Any] = Field(
description="The model description from Xinference."
)
_generator: Any = PrivateAttr()
_model_uid: str = Field(description="The Xinference model to use.")
_endpoint: str = Field(description="The Xinference endpoint URL to use.")
model: str = Field(description="Dashscope rerank model name.")
top_n: int = Field(description="Top N nodes to return.")
threshold: float = Field(description="threshold nodes to return.")
def __init__(
self,
model_uid: str,
endpoint: str,
top_n: int = None,
threshold: float = None,
return_documents: bool = False
):
_model_uid = model_uid
_endpoint = endpoint
_op_n = top_n
threshold = threshold
generator, model_description = self.load_model(
model_uid, endpoint
)
self._generator = generator
super().__init__(top_n=top_n, model=model_uid, model_uid=model_uid, threshold = threshold, return_documents=return_documents)
@classmethod
def class_name(cls) -> str:
return "XinferenceRerank"
def _postprocess_nodes(
self,
nodes: List[NodeWithScore],
query_bundle: Optional[QueryBundle] = None,
) -> List[NodeWithScore]:
if query_bundle is None:
raise ValueError("Missing query bundle in extra info.")
if len(nodes) == 0:
return []
dispatcher.event(
ReRankStartEvent(
nodes = nodes,
top_n = self.top_n,
query = query_bundle,
model_name = self.model
)
)
with self.callback_manager.event(
CBEventType.RERANKING,
payload={
EventPayload.NODES: nodes,
EventPayload.MODEL_NAME: self._model_uid,
EventPayload.QUERY_STR: query_bundle.query_str,
EventPayload.TOP_K: self.top_n,
},
) as event:
texts = [node.node.get_content() for node in nodes]
response = self._generator.rerank(texts,query_bundle.query_str)
new_nodes = []
for result in response['results']:
new_node_with_score = NodeWithScore(
node=nodes[result['index']].node, score=result['relevance_score']
)
if self.threshold is not None:
if new_node_with_score.score >=self.threshold:
new_nodes.append(new_node_with_score)
if self.top_n is not None:
if len(new_nodes) > self.top_n:
for index in new_nodes[self.top_n:-1]:
new_nodes.remove(index)
event.on_end(payload={EventPayload.NODES: new_nodes})
dispatcher.event(
ReRankEndEvent(
nodes= new_nodes
)
)
return new_nodes
def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
try:
from xinference.client import RESTfulClient
except ImportError:
raise ImportError(
"Could not import Xinference library."
'Please install Xinference with `pip install "xinference[all]"`'
)
client = RESTfulClient(endpoint)
try:
assert isinstance(client, RESTfulClient)
except AssertionError:
raise RuntimeError(
"Could not create RESTfulClient instance."
"Please make sure Xinference endpoint is running at the correct port."
)
generator = client.get_model(model_uid)
model_description = client.list_models()[model_uid]
try:
assert generator is not None
assert model_description is not None
except AssertionError:
raise RuntimeError(
"Could not get model from endpoint."
"Please make sure Xinference endpoint is running at the correct port."
)
model = model_description["model_name"]
return generator, model_description
+32 -32
View File
@@ -3,46 +3,46 @@ file:
# use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable
use_llama_parse: false
#db:
db:
# The configuration for the database loader, only supports MySQL and PostgreSQL databases for now.
# uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
# query: The query to fetch data from the database. E.g.: SELECT * FROM table
#- uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
#enable: false # 添加 enable 字段
#queries:
#- sql: select * from ProjectProperties;
#explanation: "工程属性表数据,层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。"
- uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
enable: true # 添加 enable 字段
queries:
- sql: select * from ProjectProperties;
explanation: "工程属性表数据,层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。"
#- sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable;
#explanation: "总算表数据,层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。"
- sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable;
explanation: "总算表数据,层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。"
#- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '线路';
#explanation: "专业类型为线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
#- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '余物清理';
#explanation: "专业类型为余物清理的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
#- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '拆除线路';
#explanation: "专业类型为拆除线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '线路';
explanation: "专业类型为线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '余物清理';
explanation: "专业类型为余物清理的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '拆除线路';
explanation: "专业类型为拆除线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
#- sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee;
#explanation: "其他费用表数据,层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中"
- sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee;
explanation: "其他费用表数据,层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中"
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表'
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(调试工程)aa'
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '大型土石方取费表'
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)'
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)(1)'
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(拆除)'
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表'
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(调试工程)aa'
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '大型土石方取费表'
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)'
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)(1)'
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(拆除)'
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
#- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '线路'
#explanation: "专业类型为线路的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
#- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '余物清理'
#explanation: "专业类型为余物清理的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '线路'
explanation: "专业类型为线路的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '余物清理'
explanation: "专业类型为余物清理的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
#web:
# driver_arguments:
# # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode
@@ -0,0 +1,71 @@
{
"Table": [
{
"name": "FeeCollectionTable",
"alias": "",
"comment": "取费表是取费设置中各取费表明细。查询示例: SELECT Rate FROM FeeCollectionTable WHERE Name = 'findname'。",
"fields": [
{
"name": "FeeCollectionTableName",
"alias": "取费表名称,取费名称,取费名",
"comment": "取费表名称",
"type": "VARCHAR"
},
{
"name": "Name",
"alias": "名称,费用名,项目名",
"comment": "费用名称,项目名称",
"type": "VARCHAR"
},
{
"name": "SerialNumber",
"alias": "序号,序列号,费用序号",
"comment": "费用表序号",
"type": "VARCHAR"
},
{
"name": "Code",
"alias": "编号,代号,代码",
"comment": "费用代码",
"type": "VARCHAR"
},
{
"name": "CalculationFormula",
"alias": "公式,表达式,计算式",
"comment": "取费基数",
"type": "VARCHAR"
},
{
"name": "Rate",
"alias": "费用利率,费率",
"comment": "取费费率",
"type": "REAL"
},
{
"name": "Remarks",
"alias": "说明,备注",
"comment": "费用项备注说明",
"type": "VARCHAR"
},
{
"name": "Major",
"alias": "专业",
"comment": "取费表专业",
"type": "VARCHAR"
},
{
"name": "Type",
"alias": "类型,取费类型",
"comment": "取费表类型",
"type": "VARCHAR"
},
{
"name": "Path",
"alias": "路径,费用全路径",
"comment": "费用项层级全路径",
"type": "VARCHAR"
}
]
}
]
}
@@ -0,0 +1,89 @@
{
"Table": [
{
"name": "OtherFee",
"alias": "",
"comment": "其他费用表被称为“工程费用中其他费用明细”。其他费用是指为完成工程项目建设所必需的,但不属于建筑工程费、安装工程费、设备购置费、基本预备费的其他相关费用。包括建设场地征用及清理费、项目建设管理费、项目建设技术服务费、生产准备费、大件运输措施费、专业爆破服务费等。查询示例: SELECT Rate FROM OtherFee WHERE Name = 'findname'。",
"fields": [
{
"name": "Id",
"alias": "项目idid,费用id",
"comment": "费用项目id",
"type": "INT"
},
{
"name": "ParentId",
"alias": "父级id,父id",
"comment": "费用项目父级id",
"type": "INT"
},
{
"name": "Level",
"alias": "层号,层级,层编号",
"comment": "层级编号,从1开始",
"type": "INT"
},
{
"name": "Name",
"alias": "名称,费用名,项目名",
"comment": "费用名称,项目名称",
"type": "VARCHAR"
},
{
"name": "SerialNumber",
"alias": "序号,序列号",
"comment": "费用表序号",
"type": "VARCHAR"
},
{
"name": "Code",
"alias": "编号,代号,代码",
"comment": "费用代码",
"type": "VARCHAR"
},
{
"name": "CalculationFormula",
"alias": "公式,表达式,计算式",
"comment": "取费基数",
"type": "VARCHAR"
},
{
"name": "Rate",
"alias": "费用利率,费率",
"comment": "取费费率",
"type": "REAL"
},
{
"name": "Amount",
"alias": "金额,价格",
"comment": "金额、合计、费用,\n单位为元",
"type": "REAL"
},
{
"name": "Remarks",
"alias": "说明,备注",
"comment": "费用项备注说明",
"type": "VARCHAR"
},
{
"name": "Compilation_Basis",
"alias": "编制依据,编制来源",
"comment": "费用项编制依据",
"type": "VARCHAR"
},
{
"name": "WBS_Code",
"alias": "WBS编号,WBS编码",
"comment": "费用项WBS编码",
"type": "VARCHAR"
},
{
"name": "Path",
"alias": "路径,费用全路径",
"comment": "费用项层级全路径",
"type": "VARCHAR"
}
]
}
]
}
@@ -0,0 +1,149 @@
{
"Table": [
{
"name": "ProjectDivision",
"alias": "",
"comment": "项目划分表是用于存储工程项目划分树状数据。内部包含安装工程项目划分,建筑工程项目划分,线路项目划分,工程分部分项。查询示例: SELECT Sum_Price FROM ProjectDivision WHERE Name = 'findname'。",
"fields": [
{
"name": "Id",
"alias": "项目idid,费用id",
"comment": "项目划分id",
"type": "INT"
},
{
"name": "ParentId",
"alias": "父级id,父id",
"comment": "项目划分父级id",
"type": "INT"
},
{
"name": "Level",
"alias": "层号,层级,层编号",
"comment": "层级编号,从1开始",
"type": "INT"
},
{
"name": "Quantity",
"alias": "个数,数量,数目",
"comment": "项目划分数量",
"type": "REAL"
},
{
"name": "SerialNumber",
"alias": "序号,序列号,项目序号",
"comment": "项目划分序号",
"type": "VARCHAR"
},
{
"name": "Name",
"alias": "名称,项目名",
"comment": "项目名称",
"type": "VARCHAR"
},
{
"name": "Encoding",
"alias": "编码,译码",
"comment": "项目划分编码",
"type": "VARCHAR"
},
{
"name": "Sum_Price",
"alias": "合计,合价",
"comment": "项目划分合价,分部分项费用",
"type": "REAL"
},
{
"name": "FeeCollectionTableName",
"alias": "取费表",
"comment": "项目划分的取费表,此项目划分选用的取费表",
"type": "VARCHAR"
},
{
"name": "Remarks",
"alias": "说明,备注",
"comment": "备注",
"type": "VARCHAR"
},
{
"name": "WBS_Code",
"alias": "WBS编号,WBS编码",
"comment": "WBS编码",
"type": "VARCHAR"
},
{
"name": "Manual_Adjustment_Coefficient",
"alias": "人工调差系数",
"comment": "此项目划分下人工调差系数",
"type": "REAL"
},
{
"name": "Material_Adjustment_Coefficient",
"alias": "材料调差系数",
"comment": "此项目划分下材料调差系数",
"type": "REAL"
},
{
"name": "Mechanical_Adjustment_Coefficient",
"alias": "机械调差系数",
"comment": "此项目划分下机械调差系数",
"type": "REAL"
},
{
"name": "Demolition_Manual_Adjustment_Coefficient",
"alias": "拆除人工调差系数",
"comment": "此项目划分下拆除人工调差系数",
"type": "REAL"
},
{
"name": "Demolition_Material_Adjustment_Coefficient",
"alias": "拆除材料调差系数",
"comment": "此项目划分下拆除材料调差系数",
"type": "REAL"
},
{
"name": "Demolition_Mechanical_Adjustment_Coefficient",
"alias": "拆除机械调差系数",
"comment": "此项目划分下拆除机械调差系数",
"type": "REAL"
},
{
"name": "ProfessionalType",
"alias": "专业类型",
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。",
"type": "VARCHAR"
},
{
"name": "Unit",
"alias": "单位",
"comment": "项目划分单位",
"type": "VARCHAR"
},
{
"name": "CalculationFormula",
"alias": "公式,表达式,计算式",
"comment": "项目划分计算式",
"type": "VARCHAR"
},
{
"name": "Rate",
"alias": "费用利率,费率",
"comment": "项目划分费率",
"type": "REAL"
},
{
"name": "Code",
"alias": "编号,代号,代码",
"comment": "项目划分代码",
"type": "VARCHAR"
},
{
"name": "Path",
"alias": "路径,项目全路径",
"comment": "项目划分层级全路径",
"type": "VARCHAR"
}
]
}
]
}
@@ -0,0 +1,239 @@
{
"Table": [
{
"name": "ProjectDivisions_CostPreview",
"alias": "",
"comment": "项目划分_费用预览表也被称为“项目划分费用预览”、“项目划分取费费用”。其中包含项目划分合价、直接费、间接费、利润、税金、主材费等。查询示例: SELECT Total FROM ProjectDivisions_CostPreview WHERE Id = '15'。",
"fields": [
{
"name": "Id",
"alias": "id,项目id",
"comment": "项目划分id",
"type": "INT"
},
{
"name": "ParentId",
"alias": "父级id,父id",
"comment": "项目划分父级id",
"type": "INT"
},
{
"name": "Level",
"alias": "层号,层级,层编号",
"comment": "层级编号,从1开始",
"type": "INT"
},
{
"name": "ProfessionalType",
"alias": "专业类型",
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。",
"type": "VARCHAR"
},
{
"name": "FeeCollectionTableName",
"alias": "取费表",
"comment": "项目划分的取费表,此项目划分选用的取费表",
"type": "VARCHAR"
},
{
"name": "Direct_Cost",
"alias": "直接费",
"comment": "直接费是指施工过程中直接耗用于建筑、安装工程产品的各项费用的总和。包括直接工程费和措施费。",
"type": "REAL"
},
{
"name": "Direct_Project_Cost",
"alias": "直接工程费",
"comment": "直接工程费是指按照正常的施工条件,在施工过程中耗费的构成工程实体的各项费用。包括人工费、材料费和施工机械使用费。",
"type": "REAL"
},
{
"name": "Quota_Direct_Cost",
"alias": "定额直接费",
"comment": "定额直接费,包含人工费、材料费中已进入定额基价的消耗性材料费和施工机械使用费。",
"type": "REAL"
},
{
"name": "Labor_Cost",
"alias": "人工费",
"comment": "人工费是指支付给直接从事建筑安装工程施工作业的生产人员的各项费用。包括基本工资、工资性补贴、辅助工资、职工福利费、生产人员劳动保护费。",
"type": "REAL"
},
{
"name": "Material_Cost",
"alias": "材料费",
"comment": "材料费是指施工过程中一次性消耗材料及摊销材料的费用。指已进入定额基价的消耗性材料费。",
"type": "REAL"
},
{
"name": "Construction_Machinery_Cost",
"alias": "施工机械使用费",
"comment": "施工机械使用费是指施工机械作业所发生的机械使用费以及机械的现场安拆费和场外运费。包括折旧费、检修费、维护费、安装及拆卸费、场外运费、操作人员人工费、燃料动力费、其他费等。",
"type": "REAL"
},
{
"name": "Installation_Material_Cost",
"alias": "装置性材料费",
"comment": "装置性材料费是指建设工程中构成工艺系统实体的工艺性材料,也称主要材料费。装置性材料通常在概算或预算定额中未计价,也称未计价材料,也称主材。",
"type": "REAL"
},
{
"name": "A_Supply_Installation_Material_Cost",
"alias": "甲供装置性材料费",
"comment": "供货方为甲供的装置性材料费。",
"type": "REAL"
},
{
"name": "B_Supply_Installation_Material_Cost",
"alias": "乙供装置性材料费",
"comment": "供货方为乙供的装置性材料费。",
"type": "REAL"
},
{
"name": "Measure_Cost",
"alias": "措施费",
"comment": "措施费是指为完成工程项目施工而进行施工准备、克服自然条件的不利影响和辅助施工所发生的不构成工程实体的各项费用。包括冬雨季施工增加费、夜间施工增加费、施工工具用具使用费、特殊地区施工增加费、临时设施费、施工机构迁移费、安全文明施工费。",
"type": "REAL"
},
{
"name": "WinterRainySeasons_Additional_Construction_Cost",
"alias": "冬雨季施工增加费",
"comment": "冬雨季施工增加费是指按照合理的工期要求,建筑、安装工程必须在冬季、雨季期间连续施工而需要增加的费用。",
"type": "REAL"
},
{
"name": "Night_Additional_Construction_Cost",
"alias": "夜间施工增加费",
"comment": "夜间施工增加费是指按照规程要求,工程必须在夜间连续施工所发生的夜班补助、夜间施工降效、夜间施工照明设备摊销及照明用电等费用。",
"type": "REAL"
},
{
"name": "Construction_Tool_Usage_Cost",
"alias": "施工工具用具使用费",
"comment": "施工工具用具使用费是指施工企业的生产、检验、试验部门使用的不属于固定资产的工具用具和仪器仪表的购置、摊销和维护费用。",
"type": "REAL"
},
{
"name": "Special_Areas_Additional_Construction_Cost",
"alias": "特殊地区施工增加费",
"comment": "特殊地区施工增加费是指在高海拔、酷热、严寒等地区施工:因特殊自然条件影响而需额外增加的施工费用。",
"type": "REAL"
},
{
"name": "Temporary_Facility_Cost",
"alias": "临时设施费",
"comment": "临时设施费是指施工企业为满足现场正常生产、生活需要在现场必须搭设的生产、生活用临时建筑物、构筑物和其他临时设施所发生的费用,以及维修、拆除、折旧及摊销费,或临时设施的租赁费等。",
"type": "REAL"
},
{
"name": "Construction_Organization_Relocation_Cost",
"alias": "施工机构迁移费",
"comment": "施工机构迁移费是指施工企业派遣施工队伍到所承建工程现场所发生的搬迁费用。包括职工调遣差旅费和调遣期间的工资,以及办公设备、工器具、家具、材料用品和施工机械等的搬迁费用。",
"type": "REAL"
},
{
"name": "Safe_Civilized_Construction_Cost",
"alias": "安全文明施工费",
"comment": "安全文明施工费,包括安全生产费、文明施工费、环境保护费。",
"type": "REAL"
},
{
"name": "Indirect_Cost",
"alias": "间接费",
"comment": "间接费是指建筑安装工程的施工过程中,为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。",
"type": "REAL"
},
{
"name": "Regulatory_Cost",
"alias": "规费",
"comment": "规费是指按照国家行政主管部门或省级政府和省级有关权力部门规定必须缴纳并计入建筑安装工程造价的费用。包括社会保险费和住房公积金。",
"type": "REAL"
},
{
"name": "Social_Insurance_Premiums",
"alias": "社会保险费",
"comment": "社会保险费包括养老保险费、失业保险费、医疗保险费、生育保险费和工伤保险费。",
"type": "REAL"
},
{
"name": "Housing_Provident_Fund",
"alias": "住房公积金",
"comment": "住房公积金是指企业按照规定标准为职工缴纳的住房公积金。",
"type": "REAL"
},
{
"name": "Enterprise_Management_Cost",
"alias": "企业管理费",
"comment": "企业管理费是指建筑安装施工企业为组织施工生产和经营管理所发生的费用。",
"type": "REAL"
},
{
"name": "Construction_Enterprise_Cooperation_Debugging_Cost",
"alias": "施工企业配合调试费",
"comment": "施工企业配合调试费是指在工程整套启动试运阶段,施工企业安装专业配合调试所发生的费用。",
"type": "REAL"
},
{
"name": "Profit",
"alias": "利润",
"comment": "利润是指施工企业完成所承包工程获得的盈利。",
"type": "REAL"
},
{
"name": "Taxes",
"alias": "税金",
"comment": "税金是指按照国家税法规定应计入建筑安装工程造价内的销项税额。",
"type": "REAL"
},
{
"name": "Equipment_Cost",
"alias": "设备费",
"comment": "设备购置费是指为项目建设而购置或自制各种设备,并将设备运至施工现场指定位置所支出的费用。包括设备费和设备运杂费。",
"type": "REAL"
},
{
"name": "B_Supply_Equipment_Excluding_Tax_Price",
"alias": "乙供设备不含税价",
"comment": "设备费中,供货方为乙供设备,不含税价",
"type": "REAL"
},
{
"name": "A_Supply_Equipment_Tax_Price",
"alias": "甲供设备含税价",
"comment": "设备费中,供货方为甲供设备,含税价",
"type": "REAL"
},
{
"name": "Installation_Cost",
"alias": "安装费",
"comment": "安装费包含定额直接费、措施费、间接费、利润、税金和一笔性费用。",
"type": "REAL"
},
{
"name": "Main_Material_Cost",
"alias": "主材费",
"comment": "主材费指装置性材料费",
"type": "REAL"
},
{
"name": "Total",
"alias": "总价,总计,总体费用,总的费用",
"comment": "总计包含安装费、主材费、设备费。",
"type": "REAL"
},
{
"name": "Sum",
"alias": "合计,合价",
"comment": "项目划分合价,分部分项费用,项目划分费用。合计包含安装费和主材费。",
"type": "REAL"
},
{
"name": "Path",
"alias": "路径,项目划分全路径",
"comment": "项目划分层级全路径",
"type": "VARCHAR"
}
]
}
]
}
@@ -0,0 +1,35 @@
{
"Table": [
{
"name": "ProjectProperties",
"alias": "",
"comment": "工程属性表是用于存储整个工程的重要属性,访问该表都是为了通过属性名查找属性值。通常属性值有工程信息、工程属性、技经参数,表中包含工程总投资、工程总费用,工程主要费用,工程技经参数等。查询示例: SELECT Value FROM ProjectProperties WHERE Name = 'findname'。",
"fields": [
{
"name": "Name\n",
"alias": "名称、属性、属性名称、字段、字段名称、变量、参数,属性名",
"comment": "属性的唯一标识",
"type": "VARCHAR"
},
{
"name": "Value",
"alias": "值、变量值、参数值、数值,属性值",
"comment": "属性对应的实际值",
"type": "VARCHAR"
},
{
"name": "Type",
"alias": "类型、变量类型、数值类型,属性类型",
"comment": "属性变量的类型",
"type": "VARCHAR"
},
{
"name": "Unit",
"alias": "单位",
"comment": "单位",
"type": "VARCHAR"
}
]
}
]
}
@@ -0,0 +1,335 @@
{
"Table": [
{
"name": "ProjectQuantities",
"alias": "",
"comment": "工程量表是项目划分下工程量,包含定额、主材、设备、一笔性费用。查询示例: SELECT BudgetPrice FROM ProjectQuantities WHERE Name = 'findname'。",
"fields": [
{
"name": "Id",
"alias": "id",
"comment": "消耗量id,工程量id",
"type": "INT"
},
{
"name": "ParentId",
"alias": "父级id,父id",
"comment": "父级id",
"type": "INT"
},
{
"name": "ProjectDivisionId",
"alias": "项目划分id,项目id",
"comment": "父级项目划分id",
"type": "INT"
},
{
"name": "Quantity",
"alias": "个数,数量,数目",
"comment": "数量,消耗量数量,工程量数量,主材数量,定额数量,设备数量,项目划分单位",
"type": "REAL"
},
{
"name": "FeatureSegment",
"alias": "特征段",
"comment": "线路特征段",
"type": "VARCHAR"
},
{
"name": "ParentQuantity",
"alias": "父级个数,父级数量",
"comment": "父级id的数量",
"type": "REAL"
},
{
"name": "Name",
"alias": "名称",
"comment": "项目名称,工程量名称,消耗量名称,主材名称,定额名称,设备名称,材料名称",
"type": "VARCHAR"
},
{
"name": "Encoding",
"alias": "编码,译码",
"comment": "编码,定额编码,主材编码,设备编码",
"type": "VARCHAR"
},
{
"name": "SpecificationModel",
"alias": "规格型号",
"comment": "规格型号,主材规格型号,设备规格型号",
"type": "VARCHAR"
},
{
"name": "Unit",
"alias": "单位",
"comment": "单位,主材单位,定额单位,设备单位,项目划分单位",
"type": "VARCHAR"
},
{
"name": "BasePrice",
"alias": "基价",
"comment": "定额基价",
"type": "REAL"
},
{
"name": "LaborCost",
"alias": "人工费",
"comment": "定额人工费",
"type": "REAL"
},
{
"name": "MaterialCost",
"alias": "材料费",
"comment": "定额材料费",
"type": "REAL"
},
{
"name": "MachineryCost",
"alias": "机械费",
"comment": "定额机械费",
"type": "REAL"
},
{
"name": "QuotaCoefficient",
"alias": "定额系数",
"comment": "定额系数",
"type": "REAL"
},
{
"name": "LaborCoefficient",
"alias": "人工系数",
"comment": "定额人工系数",
"type": "REAL"
},
{
"name": "MaterialCoefficient",
"alias": "材料系数",
"comment": "定额材料系数",
"type": "REAL"
},
{
"name": "MechanicalCoefficient",
"alias": "机械系数",
"comment": "定额机械系数",
"type": "REAL"
},
{
"name": "ExpenseType",
"alias": "费用类型",
"comment": "费用类型,取值为取费、不取费",
"type": "VARCHAR"
},
{
"name": "BudgetPrice",
"alias": "预算价",
"comment": "预算价",
"type": "REAL"
},
{
"name": "MarketPrice",
"alias": "市场价",
"comment": "间接费是指建筑安装工程的施工过程中,为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。",
"type": "REAL"
},
{
"name": "Supplier",
"alias": "供货方",
"comment": "供货方,设备供货方,主材供货方,取值为甲供、乙供",
"type": "VARCHAR"
},
{
"name": "Type",
"alias": "类型",
"comment": "工程量类型,取值定额、主材、设备、一笔性费用",
"type": "VARCHAR"
},
{
"name": "QuotaRange",
"alias": "定额范围",
"comment": "定额范围,取值概算、预算",
"type": "VARCHAR"
},
{
"name": "A_Supply_Material_Cost_Excluding_Tax",
"alias": "甲供材料费不含税",
"comment": "甲供材料费不含税",
"type": "REAL"
},
{
"name": "A_Supply_Material_Cost_Including_Tax",
"alias": "甲供材料费含税",
"comment": "甲供材料费含税",
"type": "REAL"
},
{
"name": "B_Supply_Material_Cost_Excluding_Tax",
"alias": "乙供材料费不含税",
"comment": "乙供材料费不含税",
"type": "REAL"
},
{
"name": "B_Supply_Material_Cost_Including_Tax",
"alias": "乙供材料费含税",
"comment": "乙供材料费含税",
"type": "REAL"
},
{
"name": "ScaffoldCalculation",
"alias": "脚手架计取",
"comment": "脚手架计取,取值计取、不计取",
"type": "VARCHAR"
},
{
"name": "Remarks",
"alias": "说明,备注",
"comment": "备注,说明",
"type": "VARCHAR"
},
{
"name": "FeeCollectionTableName",
"alias": "取费表",
"comment": "项目划分的取费表,工程量的取费表",
"type": "VARCHAR"
},
{
"name": "Quota_Section_Name",
"alias": "定额章节名称",
"comment": "定额章节名称",
"type": "VARCHAR"
},
{
"name": "ProfessionalType",
"alias": "专业类型",
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。",
"type": "VARCHAR"
},
{
"name": "split",
"alias": "拆分",
"comment": "是否为拆分材料,取值1为拆分,取值0为不拆分",
"type": "INT"
},
{
"name": "Loss",
"alias": "损耗",
"comment": "损耗率,主材损耗率",
"type": "REAL"
},
{
"name": "SingleWeight",
"alias": "单重",
"comment": "单重,主材单重",
"type": "REAL"
},
{
"name": "LineWeight",
"alias": "线重",
"comment": "线重,主材线重",
"type": "REAL"
},
{
"name": "SupervisedMaterials",
"alias": "监造物料",
"comment": "监造物料,取值1为监造物料,取值0为非监造物料",
"type": "INT"
},
{
"name": "EquipmentMaterials",
"alias": "设备性材料",
"comment": "设备性材料,取值1为设备性材料,取值0为主材",
"type": "INT"
},
{
"name": "GrossWeight",
"alias": "毛重",
"comment": "毛重,主材毛重",
"type": "VARCHAR"
},
{
"name": "TransportationType",
"alias": "运输类型",
"comment": "运输类型,主材运输类型",
"type": "VARCHAR"
},
{
"name": "TransportationMiscellaneous",
"alias": "运杂费率",
"comment": "运杂费率,设备运杂费率",
"type": "REAL"
},
{
"name": "EquipmentType",
"alias": "设备类型",
"comment": "设备类型,取值为主要设备、普通设备",
"type": "VARCHAR"
},
{
"name": "UnitPrice",
"alias": "单价",
"comment": "单价",
"type": "REAL"
},
{
"name": "Market_Price_Excluding_Tax",
"alias": "市场价不含税",
"comment": "市场价不含税",
"type": "REAL"
},
{
"name": "Market_Price_Including_Tax",
"alias": "市场价含税",
"comment": "市场价含税,设备含税价",
"type": "REAL"
},
{
"name": "Budget_Price_Excluding_Tax",
"alias": "预算价不含税",
"comment": "预算价不含税",
"type": "REAL"
},
{
"name": "Budget_Price_Including_Tax",
"alias": "预算价含税",
"comment": "预算价含税",
"type": "REAL"
},
{
"name": "Unit_Price_Excluding_Tax",
"alias": "单价不含税",
"comment": "单价不含税,设备不含税价",
"type": "REAL"
},
{
"name": "GroupPrice",
"alias": "分组合价",
"comment": "分组合价",
"type": "REAL"
},
{
"name": "Pump_Truck_Pouring",
"alias": "泵车浇制",
"comment": "泵车浇制,取值1为泵车浇制,取值0为非泵车浇制",
"type": "INT"
},
{
"name": "On_Site_Preparation",
"alias": "现场制备",
"comment": "现场制备,取值1为现场制备,取值0为非现场制备",
"type": "INT"
},
{
"name": "Clear_Water_Concrete",
"alias": "清水混凝土",
"comment": "清水混凝土,取值1为清水混凝土,取值0为非清水混凝土",
"type": "INT"
},
{
"name": "Debugging_Fee_Calculation",
"alias": "调试费计取",
"comment": "调试费计取,取值计取、不计取",
"type": "VARCHAR"
}
]
}
]
}
@@ -0,0 +1,101 @@
{
"Table": [
{
"name": "TotalCalculateTable",
"alias": "",
"comment": "总算表也被称为“工程总费用”、“工程费用”。其中包含本地工程、辅助设施工程、编制基准期价差、设备购置费、其他费用、基本预备费、特殊费用、工程静态投资、动态费用、价差预备费、建设期贷款利息、工程动态投资、可抵扣增值税额。查询示例: SELECT Amount FROM TotalCalculateTable WHERE Name = 'findname'。",
"fields": [
{
"name": "Id",
"alias": "项目idid,费用id",
"comment": "费用项目id",
"type": "INT"
},
{
"name": "ParentId",
"alias": "父级id,父id",
"comment": "费用项目父级id",
"type": "INT"
},
{
"name": "Level",
"alias": "层号,层级,层编号",
"comment": "层级编号,从1开始",
"type": "INT"
},
{
"name": "Name",
"alias": "名称,费用名,项目名",
"comment": "费用名称,项目名称",
"type": "VARCHAR"
},
{
"name": "SerialNumber",
"alias": "序号",
"comment": "工程费用序号",
"type": "VARCHAR"
},
{
"name": "Code",
"alias": "编号,代号,代码",
"comment": "费用代码",
"type": "VARCHAR"
},
{
"name": "Rate",
"alias": "费用利率,费率",
"comment": "费率",
"type": "REAL"
},
{
"name": "Amount",
"alias": "金额,价格",
"comment": "合计费",
"type": "REAL"
},
{
"name": "WBS_Code",
"alias": "WBS编号,WBS编码",
"comment": "费用编码",
"type": "VARCHAR"
},
{
"name": "Path",
"alias": "路径,费用全路径",
"comment": "费用名称全路径",
"type": "VARCHAR"
},
{
"name": "Amount_InstallationCost",
"alias": "安装金额,金额_安装费,安装价格",
"comment": "安装费金额",
"type": "REAL"
},
{
"name": "Amount_EquipmentCost",
"alias": "金额_设备费,设备金额,设备价格",
"comment": "设备费金额",
"type": "REAL"
},
{
"name": "Amount_OtherCost",
"alias": "其他费用金额,金额_其他费,其他费用价格",
"comment": "其他费金额",
"type": "REAL"
},
{
"name": "Amount_Total",
"alias": "总的金额,金额_占总计,总体金额",
"comment": "合计费占总计",
"type": "REAL"
},
{
"name": "Amount_UnitInvestment",
"alias": "金额_单位投资,合计投资金额",
"comment": "合计费单位投资",
"type": "REAL"
}
]
}
]
}
Binary file not shown.
Binary file not shown.
+5 -5
View File
@@ -1,5 +1,7 @@
from dotenv import load_dotenv
from llama_index.core.node_parser import SentenceSplitter
load_dotenv()
import logging
@@ -11,24 +13,22 @@ from fastapi.responses import RedirectResponse
from app.api.routers.chat import chat_router
from app.api.routers.upload import file_upload_router
from app.api.routers.app import v1_router
from app.settings import init_settings,init_ProjectInfo
from app.settings import init_settings
from app.observability import init_observability
from fastapi.staticfiles import StaticFiles
from phoenix.trace import using_project
logger = logging.getLogger("uvicorn")
usPrj = using_project(os.getenv("PHOENIX_PROJECT_NAME"))
usPrj.__enter__()
import nest_asyncio
nest_asyncio.apply()
init_settings()
init_observability()
init_ProjectInfo()
app = FastAPI()
environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set
-64
View File
@@ -1,64 +0,0 @@
from llama_index.llms.dashscope import DashScope
from llama_index.core.base.llms.types import LLMMetadata
class DashScopeGenerationModels:
"""DashScope Qwen serial models."""
QWEN_TURBO = "qwen-turbo"
QWEN_PLUS = "qwen-plus"
QWEN_MAX = "qwen-max"
QWEN_MAX_1201 = "qwen-max-1201"
QWEN_MAX_LONGCONTEXT = "qwen-max-longcontext"
QWEN2_MATH_72B_INSTRUCT = 'qwen2-math-72b-instruct',
QWEN2_72B = 'qwen2-72b-instruct'
DASHSCOPE_MODEL_META = {
DashScopeGenerationModels.QWEN_TURBO: {
"context_window": 1024 * 8,
"num_output": 1024 * 8,
"is_chat_model": True,
},
DashScopeGenerationModels.QWEN_PLUS: {
"context_window": 1024 * 32,
"num_output": 1024 * 32,
"is_chat_model": True,
},
DashScopeGenerationModels.QWEN_MAX: {
"context_window": 1024 * 8,
"num_output": 1024 * 8,
"is_chat_model": True,
},
DashScopeGenerationModels.QWEN_MAX_1201: {
"context_window": 1024 * 8,
"num_output": 1024 * 8,
"is_chat_model": True,
},
DashScopeGenerationModels.QWEN_MAX_LONGCONTEXT: {
"context_window": 1024 * 30,
"num_output": 1024 * 30,
"is_chat_model": True,
},
DashScopeGenerationModels.QWEN2_MATH_72B_INSTRUCT: {
"context_window": 1024 * 2,
"num_output": 1024 * 8,
"is_chat_model": True,
},
DashScopeGenerationModels.QWEN2_72B: {
"context_window": 1024 * 2,
"num_output": 1024 * 8,
"is_chat_model": True,
},
}
class CustomDashScope(DashScope):
@property
def metadata(self) -> LLMMetadata:
DASHSCOPE_MODEL_META[self.model_name]["num_output"] = (
self.max_tokens or DASHSCOPE_MODEL_META[self.model_name]["num_output"]
)
return LLMMetadata(
model_name=self.model_name, **DASHSCOPE_MODEL_META[self.model_name]
)
Binary file not shown.
Binary file not shown.
File diff suppressed because it is too large Load Diff
Binary file not shown.
+959 -5319
View File
File diff suppressed because it is too large Load Diff
+24 -56
View File
@@ -10,54 +10,43 @@ readme = "README.md"
generate = "app.engine.generate:generate_datasource"
[tool.poetry.dependencies]
python = "^3.11,<3.13"
fastapi = "0.110.3"
python-dotenv = "^1.0.1"
python = "^3.11,<3.12"
fastapi = "^0.110.3"
python-dotenv = "^1.0.0"
aiostream = "^0.6.2"
cachetools = "^5.5.0"
llama-index = "0.10.63"
cachetools = "^5.3.3"
protobuf = "4.25.4"
nltk = "^3.9.1"
nltk = "^3.8.2"
jieba = "^0.42.1"
transformers = "^4.43.0"
#arize-phoenix = "^4.12.0"
openinference-instrumentation-llama-index="^3.0.2"
llama-index = "^0.11.7"
llama-index-core = "^0.11.7"
llama-index-callbacks-arize-phoenix = "^0.2.1"
llama-index-llms-dashscope = "^0.2.0"
llama-index-embeddings-dashscope = "^0.2.1"
#llama-index-postprocessor-dashscope-rerank = "^0.2.0"
llama-index-llms-ollama = "^0.3.1"
llama-index-embeddings-ollama = "^0.3.0"
xinference = "^0.15.0"
xinference-client = "^0.15.0"
llama-index-llms-xinference = "^0.2.1"
llama-index-embeddings-xinference = "^0.1.0"
llama-index-postprocessor-xinference-rerank = "^0.1.0"
qdrant-client="^1.11.0"
llama-index-vector-stores-qdrant = "^0.3.0"
openinference-instrumentation-llama-index="2.2.3"
llama-index-callbacks-arize-phoenix = "^0.1.4"
llama-index-llms-dashscope = "^0.1.2"
llama-index-embeddings-dashscope = "^0.1.4"
llama-index-postprocessor-dashscope-rerank-custom = "0.1.0"
xinference = "^0.14.1"
xinference-client = "^0.14.1"
llama-index-llms-xinference = "^0.1.2"
qdrant-client="^1.10.1"
llama-index-vector-stores-qdrant = "^0.2.14"
chroma="^0.2.0"
llama-index-vector-stores-chroma = "^0.2.0"
llama-index-readers-json = "^0.2.0"
llama-index-retrievers-bm25 = "^0.3.0"
llama-index-experimental = "^0.3.0"
llama-index-vector-stores-chroma = "^0.1.10"
llama-index-readers-json = "^0.1.5"
llama-index-retrievers-bm25 = "^0.2.2"
duckduckgo_search = "^6.2.10"
duckduckgo_search = "^6.2.6"
[tool.poetry.dependencies.uvicorn]
extras = [ "standard" ]
version = "^0.30.6"
version = "^0.23.2"
[tool.poetry.dependencies.llama-index-readers-database]
version = "^0.2.0"
version = "^0.1.3"
[tool.poetry.dependencies.pymysql]
version = "^1.1.1"
version = "^1.1.0"
extras = [ "rsa" ]
#[tool.poetry.dependencies.psycopg2]
@@ -70,30 +59,9 @@ extras = [ "rsa" ]
version = "^0.8"
[tool.poetry.dependencies.e2b_code_interpreter]
version = "^0.0.7"
[[tool.poetry.source]]
name = "ali"
url = "https://mirrors.aliyun.com/pypi/simple/"
priority = "primary"
[[tool.poetry.source]]
name = "tencent"
url = "https://mirrors.cloud.tencent.com/pypi/simple/"
priority = "primary"
[[tool.poetry.source]]
name = "tsinghua"
url = "https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple/"
priority = "primary"
version = "0.0.7"
[build-system]
requires = [ "poetry-core" ]
build-backend = "poetry.core.masonry.api"
tiktoken
-138
View File
@@ -1,138 +0,0 @@
import nest_asyncio
nest_asyncio.apply()
from llama_index.core import SimpleDirectoryReader
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import VectorStoreIndex
from llama_index.core.evaluation import (
FaithfulnessEvaluator,
DatasetGenerator,
CorrectnessEvaluator,
SemanticSimilarityEvaluator,
)
from llama_index.experimental.param_tuner import ParamTuner
from llama_index.experimental.param_tuner.base import RunResult
from llama_index.llms.openai import OpenAI
import asyncio
# 初始化环境
from app.observability import init_observability
from app.settings import init_settings
from dotenv import load_dotenv
load_dotenv()
init_settings()
init_observability()
# 读取文档
documents = SimpleDirectoryReader("D:/LLM_model/text2sql/zjdataai-app-test/backend/data-test").load_data()
# 参数字典
param_dict = {
"chunk_size": [512, 1024],
"top_k": [1, 5],
"temperature": [0.1, 1.0]
}
# 辅助函数
def _build_index(chunk_size, documents):
# 构建索引
splitter = SentenceSplitter(chunk_size=chunk_size)
vector_index = VectorStoreIndex.from_documents(
documents, transformations=[splitter],
)
return vector_index
# 评估函数
def evaluate_query_engine(query_engine, questions):
loop = asyncio.get_event_loop()
correct, total = loop.run_until_complete(_evaluate_query_engine_async(query_engine, questions))
return correct, total
async def _evaluate_query_engine_async(query_engine, questions):
c = [query_engine.aquery(q) for q in questions]
gathering_future = asyncio.gather(*c)
results = await gathering_future
total_correct = 0
for r in results:
eval_result = (
1 if FaithfulnessEvaluator().evaluate_response(response=r).passing else 0
)
total_correct += eval_result
return total_correct, len(results)
# 生成问题
question_generator = DatasetGenerator.from_documents(documents)
eval_questions = question_generator.generate_questions_from_nodes(1) # 假设生成10个问题
# 打印生成的问题
for i, q in enumerate(eval_questions, start=1):
print(f"问题 {i}: {q}")
# 目标函数
def objective_function(params_dict, documents, questions):
chunk_size = params_dict["chunk_size"]
top_k = params_dict["top_k"]
temperature = params_dict["temperature"]
# 构建索引
vector_index = _build_index(chunk_size, documents)
# 查询引擎
query_engine = vector_index.as_query_engine(
similarity_top_k=top_k, temperature=temperature
)
# 评估查询引擎
correct, total = 0, len(questions)
question_answers = [] # 添加列表来收集问题和答案
for question in questions:
response = query_engine.query(question)
if response is not None:
question_answers.append((question, response.response))
eval_result = FaithfulnessEvaluator().evaluate_response(response=response, query_str=question)
if eval_result.passing:
correct += 1
# 计算分数
score = correct / total if total > 0 else 0
return RunResult(score=score, params=params_dict, question_answers=question_answers)
# 创建 ParamTuner 实例
param_tuner = ParamTuner(
param_fn=lambda params_dict: objective_function(params_dict, documents, eval_questions),
param_dict=param_dict,
show_progress=True,
)
# 调用 tune 方法
results = param_tuner.tune()
best_result = results.best_run_result
best_top_k = best_result.params["top_k"]
best_chunk_size = best_result.params["chunk_size"]
best_temperature = best_result.params["temperature"]
print(f"得分: {best_result.score}")
print(f"Top-k: {best_top_k}")
print(f"文本块大小: {best_chunk_size}")
print(f"温度: {best_temperature}")
# 使用最佳参数再次运行查询引擎,并打印问题与答案
best_vector_index = _build_index(best_chunk_size, documents)
best_query_engine = best_vector_index.as_query_engine(
similarity_top_k=best_top_k, temperature=best_temperature
)
best_question_answers = []
for question in eval_questions:
response = best_query_engine.query(question)
if response is not None:
best_question_answers.append((question, response.response))
# 打印最佳参数下的问题与答案
for i, (question, answer) in enumerate(best_question_answers, start=1):
print(f"最佳参数 - 问题 {i}: {question}\n答案: {answer}\n")
-81
View File
@@ -1,81 +0,0 @@
from app.observability import init_observability
from app.settings import init_settings
from dotenv import load_dotenv
import nest_asyncio
nest_asyncio.apply()
load_dotenv()
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import (
VectorStoreIndex,
SimpleDirectoryReader,
Response,
)
from llama_index.core.evaluation import (
FaithfulnessEvaluator,
DatasetGenerator,
CorrectnessEvaluator,
SemanticSimilarityEvaluator,)
init_settings()
init_observability()
faith_evaluator_qwen = FaithfulnessEvaluator() #诚实度评测
corr_evaluator_qwen = CorrectnessEvaluator() #准确率评测
Seman_evaluator_qwen = SemanticSimilarityEvaluator()#嵌入相似度评估
documents = SimpleDirectoryReader("D:/LLM_model/text2sql/zjdataai-app-test/backend/data-test").load_data()
splitter = SentenceSplitter(chunk_size=512)
vector_index = VectorStoreIndex.from_documents(
documents, transformations=[splitter],
)
# # 运行评估
# query_engine = vector_index.as_query_engine()
# response_vector = query_engine.query("工程监理费的金额是多少?")
# eval_result = evaluator_qwen.evaluate_response(response=response_vector)
# print(response_vector)
# print(eval_result)
question_generator = DatasetGenerator.from_documents(documents)
eval_questions = question_generator.generate_questions_from_nodes(5)
print(eval_questions)
import asyncio
async def evaluate_query_engine_async(query_engine, questions):
c = [query_engine.aquery(q) for q in questions]
gathering_future = asyncio.gather(*c)
results = await gathering_future
#print(results)
total_correct = 0
for r in results:
eval_result = (
1 if faith_evaluator_qwen.evaluate_response(response=r).passing else 0
)
total_correct += eval_result
return total_correct, len(results)
def evaluate_query_engine(query_engine, questions):
loop = asyncio.get_event_loop()
correct, total = loop.run_until_complete(evaluate_query_engine_async(query_engine, questions))
return correct, total
# 使用 evaluate_query_engine 函数
vector_query_engine = vector_index.as_query_engine()
correct, total = evaluate_query_engine(vector_query_engine, eval_questions[:5])
print(f"score: {correct}/{total}")
+1 -2
View File
@@ -8,8 +8,7 @@ from llama_index.core import VectorStoreIndex, SQLDatabase
from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
from app.api.routers.chat import generate_filters
from app.engine import get_index
from app.engine.engine import makeDescriptionByEngine
from app.engine import get_index, makeDescriptionByEngine
from app.engine.loaders.db import CustomDatabaseReader
from app.engine.vectordb import get_vector_store
from app.observability import init_observability
+1 -2
View File
@@ -7,8 +7,7 @@ from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
from sqlalchemy import create_engine
from app.api.routers.chat import generate_filters
from app.engine import get_index
from app.engine.engine import makeDescriptionByEngine
from app.engine import get_index, makeDescriptionByEngine
from app.engine.vectordb import get_vector_store
from app.observability import init_observability
from app.settings import init_settings
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "人工费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "临时设施费的费率是多少?",
"answer": "费率是6.3500000000"
},
{
"question": "乙供装置性材料费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "甲供装置性材料费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "夜间施工增加费的费率是多少?",
"answer": "费率是0E-10"
},
{
"question": "装置性材料费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "冬雨季施工增加费的费率是多少?",
"answer": "费率是3.5700000000"
},
{
"question": "材料费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "机械价差的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "规费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "直接工程费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "安全文明施工费的费率是多少?",
"answer": "费率是3.5500000000"
},
{
"question": "企业管理费的费率是多少?",
"answer": "费率是35.7600000000"
},
{
"question": "税金的费率是多少?",
"answer": "费率是9.0000000000"
},
{
"question": "直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "安全文明施工费的费率是多少?",
"answer": "费率是3.5500000000"
},
{
"question": "合计的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "税金的费率是多少?",
"answer": "费率是9.0000000000"
},
{
"question": "安全文明施工费的费率是多少?",
"answer": "费率是3.5500000000"
},
{
"question": "直接工程费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "税金的费率是多少?",
"answer": "费率是9.0000000000"
},
{
"question": "社会保险费的费率是多少?",
"answer": "费率是15.0000000000"
},
{
"question": "间接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "合计的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "临时设施费的费率是多少?",
"answer": "费率是0E-10"
},
{
"question": "利润的费率是多少?",
"answer": "费率是5.2400000000"
},
{
"question": "税金的费率是多少?",
"answer": "费率是9.0000000000"
},
{
"question": "社会保险费的费率是多少?",
"answer": "费率是15.0000000000"
},
{
"question": "直接工程费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "乙供设备不含税价的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "企业管理费的费率是多少?",
"answer": "费率是17.1300000000"
},
{
"question": "企业管理费的费率是多少?",
"answer": "费率是35.7600000000"
},
{
"question": "夜间施工增加费的费率是多少?",
"answer": "费率是0E-10"
},
{
"question": "直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "夜间施工增加费的费率是多少?",
"answer": "费率是0E-10"
},
{
"question": "甲供设备含税价的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "施工机械使用费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "安全文明施工费的费率是多少?",
"answer": "费率是3.5500000000"
},
{
"question": "定额直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "主材费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "直接费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "施工企业配合调试费的费率是多少?",
"answer": "费率是0E-10"
},
{
"question": "施工机械使用费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "临时设施费的费率是多少?",
"answer": "费率是6.3500000000"
},
{
"question": "施工工具用具使用费的费率是多少?",
"answer": "费率是3.8200000000"
},
{
"question": "措施费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "材料价差的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "措施费的费率是多少?",
"answer": "费率是100.0000000000"
}
]
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "前期工作管理费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "特种设备安全监测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "工程监理费的金额是多少?",
"answer": "金额是131009.9200000000"
},
{
"question": "水土保持方案编审费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "生产准备费的金额是多少?",
"answer": "金额是472373669.4635599852"
},
{
"question": "电力工程技术经济标准编制费的金额是多少?",
"answer": "金额是84352440.9756360054"
},
{
"question": "项目建设技术服务费的金额是多少?",
"answer": "金额是16855957065.4302005768"
},
{
"question": "工程保险费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "其他的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "施工图文件评审费的金额是多少?",
"answer": "金额是24940.0000000000"
},
{
"question": "节能评估费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "桩基检测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "项目前期工作费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "其他的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "项目法人管理费的金额是多少?",
"answer": "金额是986923559.4149370193"
},
{
"question": "专业爆破服务费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "节能评估费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "用地预审费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "设备材料监造费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "环境监测及环境保护验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "环境监测及环境保护验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "设备材料监造费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "勘察费的金额是多少?",
"answer": "金额是12122154260.0000000000"
},
{
"question": "项目法人管理费的金额是多少?",
"answer": "金额是986923559.4149370193"
},
{
"question": "社会稳定风险评估费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "勘察费的金额是多少?",
"answer": "金额是12122154260.0000000000"
},
{
"question": "环境影响评价费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "水土保持方案编审费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "使用林地可行性研究费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "环境监测及环境保护验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "桩基检测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "设计费的金额是多少?",
"answer": "金额是4042055949.4299998283"
},
{
"question": "环境监测及环境保护验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "建设场地征用及清理费的金额是多少?",
"answer": "金额是16831284.2287110016"
},
{
"question": "施工图文件评审费的金额是多少?",
"answer": "金额是24940.0000000000"
},
{
"question": "项目后评价费的金额是多少?",
"answer": "金额是421762204.8781780005"
},
{
"question": "水土保持方案编审费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "勘察设计费的金额是多少?",
"answer": "金额是16164210209.4300003052"
},
{
"question": "前期工作管理费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "节能评估费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "初步设计文件评审费的金额是多少?",
"answer": "金额是18560.0000000000"
},
{
"question": "特种设备安全监测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "初步设计文件评审费的金额是多少?",
"answer": "金额是18560.0000000000"
},
{
"question": "桩基检测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "矿产压覆评估费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "设计费的金额是多少?",
"answer": "金额是4042055949.4299998283"
},
{
"question": "水土保持方案编审费用的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "电力工程技术经济标准编制费的金额是多少?",
"answer": "金额是84352440.9756360054"
},
{
"question": "桩基检测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "矿产压覆评估费用的金额是多少?",
"answer": "金额是0E-10"
}
]
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "新增项目名称的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "预制基础的合价是多少?",
"answer": "合价是40567.2639480000"
},
{
"question": "绝缘子串及金具安装的合价是多少?",
"answer": "合价是2897171.9878110001"
},
{
"question": "杆塔工程材料工地运输的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "基础防护的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "护坡、挡土墙及排洪沟土石方工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "新增项目名称的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "(1)拆除后能利用的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "地基处理的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "灌注桩基础的合价是多少?",
"answer": "合价是43466660.0544390008"
},
{
"question": "(1)拆除后能利用的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "悬垂绝缘子串及金具安装的合价是多少?",
"answer": "合价是1251465.0340440001"
},
{
"question": "护坡、挡土墙及排洪沟土石方工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "附件安装工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "导地线跨越架设的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "辅助工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "新增项目名称的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "绝缘子串及金具安装的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "护坡、挡土墙及排洪沟砌筑的合价是多少?",
"answer": "合价是709931.9013930000"
},
{
"question": "锚杆基础的合价是多少?",
"answer": "合价是15344967.9002950005"
},
{
"question": "建筑工程的合价是多少?",
"answer": "合价是25411.2790780000"
},
{
"question": "辅助工程的合价是多少?",
"answer": "合价是1046253.4135240000"
},
{
"question": "导地线跨越架设的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "电缆工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "输、送电线路试运的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "基础土石方工程的合价是多少?",
"answer": "合价是32872843180.7429008484"
},
{
"question": "基础永久性围堰的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "基础永久性围堰的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "混凝土及钢筋混凝土结构的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "输、送电线路试运的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "混合结构的合价是多少?",
"answer": "合价是16967.5193850000"
},
{
"question": "杆塔组立的合价是多少?",
"answer": "合价是2253906.0859830002"
},
{
"question": "附件安装工程的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "接地工程材料工地运输的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "新增项目名称的合价是多少?",
"answer": "合价是27148.0310160000"
},
{
"question": "导地线架设的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "护坡、挡土墙及排洪沟的合价是多少?",
"answer": "合价是709931.9013930000"
},
{
"question": "(1)拆除后能利用的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "基础永久性围堰砌筑的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "(2)拆除后不能利用的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "安装工程的合价是多少?",
"answer": "合价是65324.9496330000"
},
{
"question": "尖峰、施工基面土石方工程的合价是多少?",
"answer": "合价是325205.4178770000"
},
{
"question": "架线工程的合价是多少?",
"answer": "合价是4844399648.0778598785"
},
{
"question": "杆塔组立的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "架线工程材料工地运输的合价是多少?",
"answer": "合价是2088570123.2409000397"
},
{
"question": "导地线架设的合价是多少?",
"answer": "合价是0E-10"
},
{
"question": "耐张绝缘子串及金具安装的合价是多少?",
"answer": "合价是1645706.9537680000"
},
{
"question": "架线工程材料工地运输的合价是多少?",
"answer": "合价是2088570123.2409000397"
},
{
"question": "其他基础的合价是多少?",
"answer": "合价是3839666.7656879998"
},
{
"question": "架线工程材料工地运输的合价是多少?",
"answer": "合价是0E-10"
}
]
@@ -1,202 +0,0 @@
[
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是440877984.9458540082"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是1086586.9018659999"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是51486.7898090000"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是3321.8139230000"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是78005.0340730000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是3535892767.0972299576"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是24045.2334060000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是336253.7482950000"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是142270.1346780000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是61049.8665780000"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是933061.7795919999"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "的直接费是多少?",
"answer": "直接费是182949.5997350000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(余物清理)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是21220645.1637400016"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是933061.7795919999"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是2501470269.7231497765"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是51486.7898090000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是55265.9111100000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是442897633.6273120046"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "的直接费是多少?",
"answer": "直接费是1057484.3306960000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是442897633.6273120046"
},
{
"question": "的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是21220645.1637400016"
},
{
"question": "线路取费表(余物清理)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "的直接费是多少?",
"answer": "直接费是336253.7482950000"
},
{
"question": "的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "的直接费是多少?",
"answer": "直接费是61049.8665780000"
},
{
"question": "线路取费表(余物清理)(1)的直接费是多少?",
"answer": "直接费是61049.8665780000"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是24045.2334060000"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(余物清理)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是659466.5955000001"
},
{
"question": "线路取费表(拆除)的直接费是多少?",
"answer": "直接费是0E-10"
},
{
"question": "线路取费表的直接费是多少?",
"answer": "直接费是2501470269.7231497765"
}
]
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "降阻剂_数量的属性值是多少?",
"answer": "属性值是f"
},
{
"question": "导线2_单位单价的属性值是多少?",
"answer": "属性值是9"
},
{
"question": "导线_单公里用量的属性值是多少?",
"answer": "属性值是36"
},
{
"question": "线路参数_导地线防震措施的属性值是多少?",
"answer": "属性值是457"
},
{
"question": "合成绝缘子_数量的属性值是多少?",
"answer": "属性值是5"
},
{
"question": "基础垫层的属性值是多少?",
"answer": "属性值是"
},
{
"question": "其中:基础护壁用量的属性值是多少?",
"answer": "属性值是74394.212"
},
{
"question": "铺石加混凝土的属性值是多少?",
"answer": "属性值是0.0"
},
{
"question": "导线用量(西北)的属性值是多少?",
"answer": "属性值是-795976.0855"
},
{
"question": "导线单公里用量(西北)的属性值是多少?",
"answer": "属性值是-159195.2171"
},
{
"question": "灰土垫层单公里用量(西北)的属性值是多少?",
"answer": "属性值是8.0"
},
{
"question": "地线瓷绝缘子单公里用量(西北)的属性值是多少?",
"answer": "属性值是738.253"
},
{
"question": "地形条件_高山的属性值是多少?",
"answer": "属性值是7"
},
{
"question": "流砂坑比例的属性值是多少?",
"answer": "属性值是0.001"
},
{
"question": "碎石_数量的属性值是多少?",
"answer": "属性值是12"
},
{
"question": "线路参数_导地线防震措施的属性值是多少?",
"answer": "属性值是457"
},
{
"question": "灰土垫层的属性值是多少?",
"answer": "属性值是40.0"
},
{
"question": "交叉跨越_弱电线路的属性值是多少?",
"answer": "属性值是45"
},
{
"question": "地线1_根数的属性值是多少?",
"answer": "属性值是12"
},
{
"question": "土质比例_岩石(人凿)的属性值是多少?",
"answer": "属性值是49"
},
{
"question": "耐张混凝土杆基数的属性值是多少?",
"answer": "属性值是26.0"
},
{
"question": "设计单位的属性值是多少?",
"answer": "属性值是3"
},
{
"question": "接地钢的属性值是多少?",
"answer": "属性值是"
},
{
"question": "间隔棒_单公里用量的属性值是多少?",
"answer": "属性值是r"
},
{
"question": "导线其中:跳线和导线弧垂单公里用量(西北)的属性值是多少?",
"answer": "属性值是159203.0171"
},
{
"question": "桩基础的属性值是多少?",
"answer": "属性值是310.0"
},
{
"question": "降阻剂的属性值是多少?",
"answer": "属性值是"
},
{
"question": "可抵扣增值税(万元)的属性值是多少?",
"answer": "属性值是2005241.808822"
},
{
"question": "主要技术经济指标2的属性值是多少?",
"answer": "属性值是"
},
{
"question": "合成绝缘子_数量的属性值是多少?",
"answer": "属性值是5"
},
{
"question": "土质比例_水坑的属性值是多少?",
"answer": "属性值是47"
},
{
"question": "基础_插入式的属性值是多少?",
"answer": "属性值是3"
},
{
"question": "耐张角钢塔比例的属性值是多少?",
"answer": "属性值是250%"
},
{
"question": "地线的属性值是多少?",
"answer": "属性值是"
},
{
"question": "回路数的属性值是多少?",
"answer": "属性值是三回"
},
{
"question": "导线其中:跳线和导线弧垂用量的属性值是多少?",
"answer": "属性值是796015.0855"
},
{
"question": "OPGW用量(西北)的属性值是多少?",
"answer": "属性值是2904.737"
},
{
"question": "现浇混凝土_单公里用量的属性值是多少?",
"answer": "属性值是22"
},
{
"question": "架线工程费用(万元)(含价差)的属性值是多少?",
"answer": "属性值是3203726.0"
},
{
"question": "耐张钢管塔比例的属性值是多少?",
"answer": "属性值是300%"
},
{
"question": "单公里土石方量_基面的属性值是多少?",
"answer": "属性值是8*8"
},
{
"question": "地线2的属性值是多少?",
"answer": "属性值是"
},
{
"question": "降阻剂的属性值是多少?",
"answer": "属性值是"
},
{
"question": "土质比例的属性值是多少?",
"answer": "属性值是"
},
{
"question": "地线1_单位单价的属性值是多少?",
"answer": "属性值是113"
},
{
"question": "绝缘子串型式_悬垂串的属性值是多少?",
"answer": "属性值是48"
},
{
"question": "基坑土石方量(西北)的属性值是多少?",
"answer": "属性值是405403506.156"
},
{
"question": "基坑坚土的属性值是多少?",
"answer": "属性值是25585167.713"
},
{
"question": "基坑普通土的属性值是多少?",
"answer": "属性值是313873965.334"
},
{
"question": "瓷绝缘子单公里用量(西北)的属性值是多少?",
"answer": "属性值是201.0"
}
]
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "电杆坑、塔坑、拉线坑人工挖方(或爆破)及回填 水坑 坑深2.0m以内的编码是多少?",
"answer": "编码是YX2-72"
},
{
"question": "钢筋加工及制作的编码是多少?",
"answer": "编码是YX3-43"
},
{
"question": "船舶运输 线材 每件重400kg以内 运输的编码是多少?",
"answer": "编码是YX1-132"
},
{
"question": "船舶运输 钢管塔材 运输的编码是多少?",
"answer": "编码是YX1-152"
},
{
"question": "碎石的编码是多少?",
"answer": "编码是C10020103"
},
{
"question": "混凝土(保护帽)的编码是多少?",
"answer": "编码是ZH1001"
},
{
"question": "船舶运输 金具、绝缘子、零星钢材 运输的编码是多少?",
"answer": "编码是YX1-144"
},
{
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
"answer": "编码是YX1-1"
},
{
"question": "船舶运输 线材 每件重1000kg以内 运输的编码是多少?",
"answer": "编码是YX1-136"
},
{
"question": "混凝土搅拌及浇制 每基基础联系梁混凝土量20m³以内的编码是多少?",
"answer": "编码是YX3-69"
},
{
"question": "索道运输 循环式 塔材 荷载1t以内 装卸的编码是多少?",
"answer": "编码是YX1-185"
},
{
"question": "人力运输 混凝土预制品 每件重100kg以内的编码是多少?",
"answer": "编码是YX1-6"
},
{
"question": "船舶运输 混凝土杆 每件重1500kg以上 运输的编码是多少?",
"answer": "编码是YX1-118"
},
{
"question": "碎石的编码是多少?",
"answer": "编码是C10020103"
},
{
"question": "电杆坑、塔坑、拉线坑人工挖方(或爆破)及回填 泥水 坑深8.0m以上的编码是多少?",
"answer": "编码是YX2-55"
},
{
"question": "机械施工土方 场地平整的编码是多少?",
"answer": "编码是GT1-1"
},
{
"question": "汽车运输 混凝土预制品 每件重100kg以内 装卸的编码是多少?",
"answer": "编码是YX1-69"
},
{
"question": "汽车运输 其他建筑安装材料 运输的编码是多少?",
"answer": "编码是YX1-108"
},
{
"question": "钻孔灌注桩基础 混凝土搅拌及浇制 孔深10m以内的编码是多少?",
"answer": "编码是YX3-171"
},
{
"question": "线路复测及分坑 直线双杆及拉线塔的编码是多少?",
"answer": "编码是YX2-3"
},
{
"question": "氧化锌避雷器安装 35kV的编码是多少?",
"answer": "编码是YX7-32"
},
{
"question": "混凝土(保护帽)的编码是多少?",
"answer": "编码是ZH1002"
},
{
"question": "汽车运输 其他建筑安装材料 装卸的编码是多少?",
"answer": "编码是YX1-107"
},
{
"question": "船舶运输 混凝土杆 每件重500kg以内 装卸的编码是多少?",
"answer": "编码是YX1-109"
},
{
"question": "混凝土(保护帽)的编码是多少?",
"answer": "编码是ZH1001"
},
{
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
"answer": "编码是YX1-1"
},
{
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
"answer": "编码是YX1-1"
},
{
"question": "普通硅酸盐水泥的编码是多少?",
"answer": "编码是C09010102"
},
{
"question": "拖拉机运输 钢管塔材 运输的编码是多少?",
"answer": "编码是YX1-44"
},
{
"question": "尖峰及施工基面挖方(或爆破) 普通土的编码是多少?",
"answer": "编码是YX2-226"
},
{
"question": "汽车运输 角钢塔材 装卸的编码是多少?",
"answer": "编码是YX1-103"
},
{
"question": "接地槽挖方(或爆破)及回填 普通土的编码是多少?",
"answer": "编码是YX2-213"
},
{
"question": "水的编码是多少?",
"answer": "编码是C21010101"
},
{
"question": "直线(直线换位、直线转角)杆塔绝缘子串悬挂安装 35kV 针式单联串(悬垂串)的编码是多少?",
"answer": "编码是YX6-21"
},
{
"question": "直线(直线换位、直线转角)杆塔绝缘子串悬挂安装 35kV I型双联串(悬垂串)的编码是多少?",
"answer": "编码是YX6-22"
},
{
"question": "钻孔灌注桩基础 机械推钻成孔 砂砾石 孔深20m以内 孔径1.0m以内的编码是多少?",
"answer": "编码是YX3-117"
},
{
"question": "线路复测及分坑 直线自立塔的编码是多少?",
"answer": "编码是YX2-6"
},
{
"question": "钻孔灌注桩基础 凿桩头 桩径0.8m以上的编码是多少?",
"answer": "编码是YX3-180"
},
{
"question": "线路复测及分坑 耐张(转角)单杆的编码是多少?",
"answer": "编码是YX2-2"
},
{
"question": "中砂的编码是多少?",
"answer": "编码是C10010101"
},
{
"question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
"answer": "编码是YX1-1"
},
{
"question": "带电跨越电力线 被跨线电压等级 35kV的编码是多少?",
"answer": "编码是YX5-186"
},
{
"question": "人工挖土方 普土 深2m以内的编码是多少?",
"answer": "编码是YT1-1"
},
{
"question": "混凝土杆的编码是多少?",
"answer": "编码是"
},
{
"question": "接地模块安装的编码是多少?",
"answer": "编码是YX3-213"
},
{
"question": "拖拉机运输 线材 每件重400kg以内 运输的编码是多少?",
"answer": "编码是YX1-34"
},
{
"question": "拖拉机运输 其他建筑安装材料 装卸的编码是多少?",
"answer": "编码是YX1-45"
},
{
"question": "普通硅酸盐水泥的编码是多少?",
"answer": "编码是C09010102"
},
{
"question": "船舶运输 线材 每件重4000kg以内 装卸的编码是多少?",
"answer": "编码是YX1-139"
},
{
"question": "水的编码是多少?",
"answer": "编码是C21010101"
}
]
-202
View File
@@ -1,202 +0,0 @@
[
{
"question": "架空输电线路本体工程的金额是多少?",
"answer": "金额是55105688268.5176010132"
},
{
"question": "价差预备费的金额是多少?",
"answer": "金额是22731130869.6655998230"
},
{
"question": "工程静态投资的金额是多少?",
"answer": "金额是715035853336.3909912109"
},
{
"question": "工程动态投资的金额是多少?",
"answer": "金额是776282009093.5660400391"
},
{
"question": "其中:工程建设检测费的金额是多少?",
"answer": "金额是185575370.1463980079"
},
{
"question": "工程静态投资的金额是多少?",
"answer": "金额是715035853336.3909912109"
},
{
"question": "建设期贷款利息的金额是多少?",
"answer": "金额是38515024887.5095977783"
},
{
"question": "特殊项目的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "动态费用的金额是多少?",
"answer": "金额是61246155757.1752014160"
},
{
"question": "动态费用的金额是多少?",
"answer": "金额是61246155757.1752014160"
},
{
"question": "小计的金额是多少?",
"answer": "金额是458257942570.3129882812"
},
{
"question": "其他费用的金额是多少?",
"answer": "金额是210942912572.8689880371"
},
{
"question": "基本预备费的金额是多少?",
"answer": "金额是14020310849.7332000732"
},
{
"question": "其中:水土保持监测及验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "其中:工程建设检测费的金额是多少?",
"answer": "金额是185575370.1463980079"
},
{
"question": "其中:特种设备安全监测费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "工程静态投资的金额是多少?",
"answer": "金额是715035853336.3909912109"
},
{
"question": "其中:水土保持监测及验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "架空输电线路本体工程的金额是多少?",
"answer": "金额是55105688268.5176010132"
},
{
"question": "基本预备费的金额是多少?",
"answer": "金额是14020310849.7332000732"
},
{
"question": "其中:水土保持监测及验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "小计的金额是多少?",
"answer": "金额是458257942570.3129882812"
},
{
"question": "编制基准期价差的金额是多少?",
"answer": "金额是29246752707.1180000305"
},
{
"question": "其中:水土保持监测及验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "小计的金额是多少?",
"answer": "金额是458257942570.3129882812"
},
{
"question": "其他费用的金额是多少?",
"answer": "金额是210942912572.8689880371"
},
{
"question": "特殊项目的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "编制基准期价差的金额是多少?",
"answer": "金额是29246752707.1180000305"
},
{
"question": "特殊项目的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "小计的金额是多少?",
"answer": "金额是458257942570.3129882812"
},
{
"question": "工程动态投资的金额是多少?",
"answer": "金额是776282009093.5660400391"
},
{
"question": "其中:建设场地征用及清理费的金额是多少?",
"answer": "金额是16831284.2287110016"
},
{
"question": "其中:可抵扣增值税额的金额是多少?",
"answer": "金额是20069645492.2888984680"
},
{
"question": "小计的金额是多少?",
"answer": "金额是458257942570.3129882812"
},
{
"question": "动态费用的金额是多少?",
"answer": "金额是61246155757.1752014160"
},
{
"question": "建设期贷款利息的金额是多少?",
"answer": "金额是38515024887.5095977783"
},
{
"question": "工程静态投资的金额是多少?",
"answer": "金额是715035853336.3909912109"
},
{
"question": "其中:建设场地征用及清理费的金额是多少?",
"answer": "金额是16831284.2287110016"
},
{
"question": "建设期贷款利息的金额是多少?",
"answer": "金额是38515024887.5095977783"
},
{
"question": "工程动态投资的金额是多少?",
"answer": "金额是776282009093.5660400391"
},
{
"question": "架空输电线路本体工程的金额是多少?",
"answer": "金额是55105688268.5176010132"
},
{
"question": "其中:工程建设检测费的金额是多少?",
"answer": "金额是185575370.1463980079"
},
{
"question": "其中:水土保持监测及验收费的金额是多少?",
"answer": "金额是0E-10"
},
{
"question": "工程动态投资的金额是多少?",
"answer": "金额是776282009093.5660400391"
},
{
"question": "其中:可抵扣增值税额的金额是多少?",
"answer": "金额是20069645492.2888984680"
},
{
"question": "价差预备费的金额是多少?",
"answer": "金额是22731130869.6655998230"
},
{
"question": "一般线路本体工程的金额是多少?",
"answer": "金额是55105688268.5176010132"
},
{
"question": "其中:工程建设检测费的金额是多少?",
"answer": "金额是185575370.1463980079"
},
{
"question": "基本预备费的金额是多少?",
"answer": "金额是14020310849.7332000732"
},
{
"question": "设备购置费的金额是多少?",
"answer": "金额是2567934636.3574500084"
}
]
-118
View File
@@ -1,118 +0,0 @@
import json
from dotenv import load_dotenv
import asyncio
import nest_asyncio
nest_asyncio.apply()
from llama_index.core.prompts import (
ChatMessage,
ChatPromptTemplate,
MessageRole
)
DEFAULT_SYSTEM_TEMPLATE = """
您是一个问答聊天机器人的专业评估系统。
您将获得以下信息:
- 用户查询,
- 生成的回答,
也可能提供一个参考答案作为评估的依据。
您的任务是判断生成回答的相关性和正确性。
输出一个代表全面评估的单一分数。
您必须在一行中仅返回该分数。
不要以其他任何格式返回答案。
在单独的一行提供给定分数的理由。
请遵循以下评分指南:
- 您的分数必须在1到5之间,其中1是最差,5是最好的。
-如果生成的回答与用户查询不相关,您应该给出1分。
-如果生成的回答相关但包含错误,您应该给出2到3分之间的分数。
-如果生成的回答相关且完全正确,您应该给出4到5分之间的分数。
示例响应:
4.0
生成的回答与参考答案的指标完全相同,但不够精炼。
"""
DEFAULT_USER_TEMPLATE = """
## User Query
{query}
## Reference Answer
{reference_answer}
## Generated Answer
{generated_answer}
"""
DEFAULT_EVAL_TEMPLATE = ChatPromptTemplate(
message_templates=[
ChatMessage(role=MessageRole.SYSTEM, content=DEFAULT_SYSTEM_TEMPLATE),
ChatMessage(role=MessageRole.USER, content=DEFAULT_USER_TEMPLATE),
]
)
from app.api.routers.models import ChatData, Message
from llama_index.core.chat_engine.types import BaseChatEngine, NodeWithScore
from llama_index.core.vector_stores.types import MetadataFilter, MetadataFilters
from llama_index.core.evaluation import CorrectnessEvaluator
from app.engine import get_chat_engine
from app.api.routers.chat import generate_filters
from app.engine.index import get_index
from app.observability import init_observability
from app.settings import init_settings
load_dotenv()
init_settings()
init_observability()
index = get_index()
# 初始化聊天引擎和评估器
chat_engine = get_chat_engine()
corr_evaluator_qwen = CorrectnessEvaluator()
# 加载本地问题回答文件
file_path = 'D:/LLM_model/text2sql/zjdataai-app-test/backend/unit_test/test.json'
output_file_path = file_path.replace('.json', '_test.json')
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
# 异步函数用于评估查询
async def evaluate_query(question, answer, index, output_file):
response = await chat_engine.astream_chat(question)
content_str = str(response.sources[0])
result = corr_evaluator_qwen.evaluate(
query=question,
response=content_str,
reference=answer,
)
result_dict = {
"编号": index,
"问题": question,
"答案": answer,
"回答": result.response,
"得分(0~5)": result.score,
"评价": result.feedback
}
with open(output_file, 'a', encoding='utf-8') as f:
f.write(json.dumps(result_dict, ensure_ascii=False, indent=4))
f.write(',')
# 主异步函数
async def main():
for index, item in enumerate(data, start=1):
await evaluate_query(item['question'], item['answer'], index, output_file_path)
# 运行主协程
asyncio.run(main())
-10
View File
@@ -1,10 +0,0 @@
[
{
"question": "人工费的费率是多少?",
"answer": "费率是100.0000000000"
},
{
"question": "临时设施费的费率是多少?",
"answer": "费率是6.3500000000"
}
]
-18
View File
@@ -1,18 +0,0 @@
{
"编号": 1,
"问题": "人工费的费率是多少?",
"答案": "费率是100.0000000000",
"实际回答": "人工费的费率是100.0。",
"得分": 4.5,
"评价": "生成的答案与参考答案一致,只是表达方式略有不同,但完全正确且相关。"
}
********************
{
"编号": 2,
"问题": "临时设施费的费率是多少?",
"答案": "费率是6.3500000000",
"实际回答": "在新的上下文中,临时设施费的计算应当基于取费定额人工费和取费定额机械费,但当前费率被设置为0.0,这意味着需要重新确认或调整费率。在没有具体费率的情况下,不能直接给出确定的费率值。如果需要计算临时设施费,应根据实际的取费定额人工费和取费定额机械费进行计算。参考之前的6.35%的费率可能是一个起点,但请注意,这需要根据项目的具体情况和最新的费用标准进行调整。",
"得分": 1.0,
"评价": "生成的答案与用户查询不相关,用户询问的是临时设施费的费率,而生成的答案提供的是一个关于如何计算临时设施费的解释,且提到了一个不相关的0.0费率,这与用户的问题不符。同时,即使提到了6.35%的费率,也没有明确指出这就是用户想要的答案,反而强调了需要根据项目具体情况调整,这增加了用户的困惑。"
}
********************
-43
View File
@@ -1,43 +0,0 @@
from typing import Dict, List
class ClsRegister:
clsLst:Dict[str,Dict[str,str]] = {}
@classmethod
def add(cls,catalog,name,obj) -> None:
if catalog in cls.clsLst:
registry = cls.clsLst[catalog]
registry[name] = obj
else:
registry:Dict[str,str] = {}
registry[name] = obj
cls.clsLst[catalog] = registry
@classmethod
def get(cls,catalog,name,fuzzy:bool=False) -> None:
if catalog in cls.clsLst:
registry = cls.clsLst[catalog]
for key,value in registry.items():
if fuzzy:
if key in name:
return value
else:
if key == name:
return value
return None
@classmethod
def getClsList(cls,catalog) -> None:
res_Lst = []
if catalog in cls.clsLst:
registry = cls.clsLst[catalog]
for key,value in registry.items():
res_Lst.append(value)
return res_Lst
def register(catalog,name):
def decorator(className):
ClsRegister.add(catalog,name,className)
return className
return decorator
+2 -3
View File
@@ -1,10 +1,9 @@
import os
from dotenv import load_dotenv
load_dotenv()
import phoenix as px
os.environ['PHOENIX_HOST'] = "0.0.0.0"
session = px.launch_app(use_temp_dir=False)
import msvcrt