Compare commits
41 Commits
dev
...
6f5548ee61
| Author | SHA1 | Date | |
|---|---|---|---|
| 6f5548ee61 | |||
| 331595cc57 | |||
| cb34fde995 | |||
| 123693c9a0 | |||
| a897f0c6de | |||
| adce2a3809 | |||
| 7875e2cbcc | |||
| 95fbc820b9 | |||
| 54f19a20fc | |||
| bc124c5513 | |||
| 1c773924db | |||
| 60b0f11ca2 | |||
| 21fdc16259 | |||
| 64ba7efcdc | |||
| 9a09e9f79f | |||
| 9ac53011e0 | |||
| f171282a0c | |||
| 626ff1e632 | |||
| 5189d4368f | |||
| 5f182075aa | |||
| b1ef410638 | |||
| 7b040ae248 | |||
| 786c4d05f6 | |||
| a8db51e844 | |||
| 545fbc732b | |||
| 56cb36dfc9 | |||
| a6c5988408 | |||
| c4cf09a28f | |||
| 75fde3598b | |||
| aba6475c5a | |||
| ae19725d72 | |||
| 97a486e631 | |||
| 728ee06c5a | |||
| a4dd385368 | |||
| 24c808d66d | |||
| ced3199550 | |||
| c4088fe963 | |||
| e7628809ad | |||
| 73565b26e4 | |||
| e9ccd7db35 | |||
| 4020b603b1 |
+53
-28
@@ -1,42 +1,62 @@
|
|||||||
JIEBA_DATA=./nltk_data
|
|
||||||
NLTK_DATA=./nltk_data
|
|
||||||
SQLITE_DATABASE_URL=sqlite:///./source.db
|
|
||||||
DATA_SOURCE_CACHE=./restapi
|
|
||||||
|
|
||||||
# The Llama Cloud API key.
|
# The Llama Cloud API key.
|
||||||
# LLAMA_CLOUD_API_KEY=
|
# LLAMA_CLOUD_API_KEY=
|
||||||
SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
|
SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
|
||||||
#SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
|
#SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
|
||||||
SQLITE_DATABASE_URL=sqlite:///./source.db
|
SQLITE_DATABASE_URL=sqlite:///./source.db
|
||||||
|
|
||||||
DASHSCOPE_API_KEY=sk-02c8540e86d84b7ca0e6f4f51bac6e60
|
# The number of similar embeddings to return when retrieving documents.
|
||||||
# The provider for the AI models to use.
|
TOP_K=10
|
||||||
MODEL_PROVIDER=dashscope
|
#--------------------------
|
||||||
# The name of LLM model to use.
|
# 是否启用混合检索
|
||||||
MODEL=qwen-max
|
HYBRID_ENABLED = true
|
||||||
|
# 混合检索阈值
|
||||||
|
HYBRID_ALPHA = 0.6
|
||||||
# 是否启用检索重排功能
|
# 是否启用检索重排功能
|
||||||
ENABLE_RERANK=true
|
RERANK_ENABLED=true
|
||||||
# Name of the embedding model to use.
|
|
||||||
EMBEDDING_MODEL=text-embedding-v2
|
#---------- rerank- Xinference ----------------
|
||||||
|
#RERANK_PROVIDER=xinference
|
||||||
|
#RERANK_MODEL=bge-reranker-v2-m3
|
||||||
|
#RERANK_BASE_URL=http://10.1.16.39:9995
|
||||||
|
#RERANK_TOP_N=5
|
||||||
|
#RERANK_THRESHOLD=0.3
|
||||||
|
|
||||||
|
|
||||||
|
#---------- rerank- ollama ----------------
|
||||||
|
RERANK_PROVIDER=ollama
|
||||||
|
RERANK_MODEL= /models/bge-reranker-base
|
||||||
|
RERANK_TOP_N=5
|
||||||
|
RERANK_THRESHOLD=0.3
|
||||||
|
|
||||||
|
#---------- model - Xinference ----------------
|
||||||
|
#MODEL_PROVIDER=xinference
|
||||||
|
#OPENAI_API_KEY=xinference
|
||||||
|
#BASE_URL=http://172.20.0.145:9995
|
||||||
|
#MODEL=Qwen2-72B-Instruct-GPTQ-Int8
|
||||||
|
## Temperature for sampling from the model.
|
||||||
|
#LLM_TEMPERATURE=0.1
|
||||||
|
|
||||||
|
#---------- model - dashscope ----------------
|
||||||
|
MODEL_PROVIDER=dashscope
|
||||||
|
DASHSCOPE_API_KEY=sk-221d2d202e104618a56002ce2e7dc0d0
|
||||||
|
MODEL=qwen2-math-72b-instruct
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#---------- embedding - Xinference ----------------
|
||||||
|
#EMBEDDING_PROVIDER=xinference
|
||||||
|
#EMBEDDING_MODEL=bge-m3
|
||||||
|
#EMBEDDING_BASE_URL=http://10.1.16.39:9995
|
||||||
|
#EMBEDDING_DIM=1024
|
||||||
|
|
||||||
|
---------- embedding - dashscope ----------------
|
||||||
|
EMBEDDING_PROVIDER=dashscope
|
||||||
|
EMBEDDING_MODEL=text-embedding-v1
|
||||||
|
|
||||||
# Dimension of the embedding model to use.
|
|
||||||
EMBEDDING_DIM=1024
|
|
||||||
|
|
||||||
# The questions to help users get started (multi-line).
|
# The questions to help users get started (multi-line).
|
||||||
CONVERSATION_STARTERS=本工程指什么?\n总算表有哪些费用?\n项目划分哪些内容构成?\n其他费用表有哪些内容?
|
CONVERSATION_STARTERS=本工程指什么?\n总算表有哪些费用?\n项目划分哪些内容构成?\n其他费用表有哪些内容?
|
||||||
|
|
||||||
# The OpenAI API key to use.
|
|
||||||
# OPENAI_API_KEY=
|
|
||||||
|
|
||||||
# Temperature for sampling from the model.
|
|
||||||
# LLM_TEMPERATURE=
|
|
||||||
|
|
||||||
# Maximum number of tokens to generate.
|
|
||||||
# LLM_MAX_TOKENS=
|
|
||||||
|
|
||||||
# The number of similar embeddings to return when retrieving documents.
|
|
||||||
TOP_K=5
|
|
||||||
|
|
||||||
# The time in milliseconds to wait for the stream to return a response.
|
# The time in milliseconds to wait for the stream to return a response.
|
||||||
STREAM_TIMEOUT=60000
|
STREAM_TIMEOUT=60000
|
||||||
|
|
||||||
@@ -58,7 +78,6 @@ VECTOR_STORE_PATH=./storage_vector
|
|||||||
BM_RETRIEVER_PATH =./storage_bm
|
BM_RETRIEVER_PATH =./storage_bm
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
PHOENIX_API_KEY=123456
|
PHOENIX_API_KEY=123456
|
||||||
PHOENIX_URL=http://localhost:6006/v1/traces
|
PHOENIX_URL=http://localhost:6006/v1/traces
|
||||||
PHOENIX_PROJECT_NAME=ly_zjapp
|
PHOENIX_PROJECT_NAME=ly_zjapp
|
||||||
@@ -85,4 +104,10 @@ SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weath
|
|||||||
- You can install any pip package (if it exists) by running a cell with pip install.
|
- You can install any pip package (if it exists) by running a cell with pip install.
|
||||||
"
|
"
|
||||||
|
|
||||||
|
PRJTOJSON_URL = 'http://10.1.6.60:8092'
|
||||||
PROJECT_TITLE = "您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!"
|
PROJECT_TITLE = "您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!"
|
||||||
|
|
||||||
|
CHAT_UPLOAD_FILECACHE = "./output/uploaded"
|
||||||
|
|
||||||
|
JIEBA_DATA=./nltk_data
|
||||||
|
NLTK_DATA=./nltk_data
|
||||||
+17
-13
@@ -19,27 +19,28 @@ HYBRID_ALPHA = 0.6
|
|||||||
#--------------------------
|
#--------------------------
|
||||||
# 是否启用检索重排功能
|
# 是否启用检索重排功能
|
||||||
RERANK_ENABLED=true
|
RERANK_ENABLED=true
|
||||||
# Rerank model
|
|
||||||
|
#---------- rerank- Xinference ----------------
|
||||||
|
RERANK_PROVIDER=xinference
|
||||||
RERANK_MODEL=bge-reranker-v2-m3
|
RERANK_MODEL=bge-reranker-v2-m3
|
||||||
RERANK_BASE_URL=http://10.1.16.39:9995
|
RERANK_BASE_URL=http://10.1.16.39:9995
|
||||||
RERANK_TOP_N=5
|
RERANK_TOP_N=5
|
||||||
RERANK_THRESHOLD=0.3
|
RERANK_THRESHOLD=0.3
|
||||||
#---------- Xinference ----------------
|
|
||||||
# The provider for the AI models to use.
|
#---------- model - Xinference ----------------
|
||||||
MODEL_PROVIDER=xinference
|
MODEL_PROVIDER=xinference # The provider for the AI models to use.
|
||||||
# The OpenAI API key to use.
|
OPENAI_API_KEY=xinference # The OpenAI API key to use.
|
||||||
OPENAI_API_KEY=xinference
|
|
||||||
BASE_URL=http://10.1.0.142:9995
|
BASE_URL=http://10.1.0.142:9995
|
||||||
MODEL=Qwen2-72B-Instruct-GPTQ-Int8
|
MODEL=Qwen2-72B-Instruct-GPTQ-Int8
|
||||||
# Temperature for sampling from the model.
|
LLM_TEMPERATURE=0.1 # Temperature for sampling from the model.
|
||||||
LLM_TEMPERATURE=0.1
|
#LLM_MAX_TOKENS= # Maximum number of tokens to generate.
|
||||||
# Maximum number of tokens to generate.
|
|
||||||
#LLM_MAX_TOKENS=
|
|
||||||
# Name of the embedding model to use.
|
#---------- embedding - Xinference ----------------
|
||||||
|
EMBEDDING_PROVIDER=xinference
|
||||||
EMBEDDING_MODEL=bge-m3
|
EMBEDDING_MODEL=bge-m3
|
||||||
EMBEDDING_BASE_URL=http://10.1.16.39:9995
|
EMBEDDING_BASE_URL=http://10.1.16.39:9995
|
||||||
# Dimension of the embedding model to use.
|
EMBEDDING_DIM=1024 # Dimension of the embedding model to use.
|
||||||
EMBEDDING_DIM=1024
|
|
||||||
|
|
||||||
##---------- OpenAI ----------------
|
##---------- OpenAI ----------------
|
||||||
## The provider for the AI models to use.
|
## The provider for the AI models to use.
|
||||||
@@ -116,4 +117,7 @@ SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weath
|
|||||||
- You can install any pip package (if it exists) by running a cell with pip install.
|
- You can install any pip package (if it exists) by running a cell with pip install.
|
||||||
"
|
"
|
||||||
|
|
||||||
|
|
||||||
|
PRJTOJSON_URL = 'http://10.1.6.60:8092'
|
||||||
PROJECT_TITLE = "您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!"
|
PROJECT_TITLE = "您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!"
|
||||||
|
CHAT_UPLOAD_FILECACHE = "./output/uploaded"
|
||||||
+194
-68
@@ -1,37 +1,64 @@
|
|||||||
|
|
||||||
import asyncio
|
import asyncio
|
||||||
import json
|
import json
|
||||||
import logging
|
import logging
|
||||||
import time
|
import time
|
||||||
from typing import Dict, List, Any, Optional, AsyncGenerator
|
from typing import Dict, List, Any, Optional, AsyncGenerator
|
||||||
from collections import deque
|
|
||||||
|
|
||||||
from aiostream import stream
|
from aiostream import stream
|
||||||
from fastapi import APIRouter, Request
|
from fastapi import APIRouter, Request,HTTPException
|
||||||
from fastapi.responses import StreamingResponse
|
from fastapi.responses import StreamingResponse
|
||||||
from llama_index.core import BaseCallbackHandler
|
from llama_index.core import BaseCallbackHandler
|
||||||
from llama_index.core.base.llms.types import ChatMessage
|
from llama_index.core.base.llms.types import ChatMessage
|
||||||
from llama_index.core.callbacks import CBEventType
|
from llama_index.core.callbacks import CBEventType
|
||||||
from llama_index.core.chat_engine.types import StreamingAgentChatResponse
|
from llama_index.core.chat_engine.types import StreamingAgentChatResponse
|
||||||
from llama_index.core.tools import ToolOutput
|
from llama_index.core.tools import ToolOutput
|
||||||
|
from llama_index.core.schema import NodeWithScore
|
||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
from app.api.routers.request.base import userMng, conversations,message,parameter,feedback
|
from app.api.routers.request.base import userMng, conversations,message,ProjectInfo,feedback
|
||||||
from app.api.routers.request.baseConfig import *
|
from app.api.routers.request.baseConfig import *
|
||||||
from app.api.routers.request.models import ChatRequestData,ChatFileUploadRequest
|
from app.api.routers.request.models import ChatRequestData,ChatFileUploadRequest
|
||||||
from app.engine import get_chat_engine
|
from app.engine import get_chat_engine
|
||||||
import uuid
|
import uuid
|
||||||
|
from app.api.routers.services.fileServices import PrjFileLoadService,ChatFileService
|
||||||
|
from app.api.routers.services.suggestion import NextQuestionSuggestion
|
||||||
|
import time
|
||||||
|
from llama_index.core.settings import Settings
|
||||||
|
|
||||||
logger = logging.getLogger("uvicorn")
|
logger = logging.getLogger("uvicorn")
|
||||||
|
|
||||||
api_router = r = APIRouter()
|
|
||||||
v1_router = v = APIRouter()
|
v1_router = v = APIRouter()
|
||||||
|
|
||||||
|
|
||||||
|
gEvent_handler = None
|
||||||
|
|
||||||
|
|
||||||
|
CH_Event_map={
|
||||||
|
'CHUNKING':'文本切片',
|
||||||
|
'NODE_PARSING':'节点解析',
|
||||||
|
'EMBEDDING':'生成向量',
|
||||||
|
'LLM':'知识问答',
|
||||||
|
'QUERY':'查询',
|
||||||
|
'RETRIEVE':'检索',
|
||||||
|
'SYNTHESIZE':'答案合成',
|
||||||
|
'TREE':'总结',
|
||||||
|
'SUB_QUESTION':'问题分解',
|
||||||
|
'TEMPLATING':'生成提示词模板',
|
||||||
|
'FUNCTION_CALL':'函数调用',
|
||||||
|
'RERANKING':'节点重排',
|
||||||
|
'EXCEPTION':'执行异常',
|
||||||
|
'AGENT_STEP':'单步执行'
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
class ChatCallbackEvent(BaseModel):
|
class ChatCallbackEvent(BaseModel):
|
||||||
event_type: ChatEventType
|
event_type: ChatEventType
|
||||||
payload: Optional[Dict[str, Any]] = None
|
payload: Optional[Dict[str, Any]] = None
|
||||||
|
|
||||||
def get_common_param(self)-> dict:
|
def get_common_param(self)-> dict:
|
||||||
return {
|
return {
|
||||||
'event': self.event_type.name,
|
'event': self.event_type.value,
|
||||||
'conversation_id':self.payload.get("conversation_id"),
|
'conversation_id':self.payload.get("conversation_id"),
|
||||||
'message_id': self.payload.get("message_id"),
|
'message_id': self.payload.get("message_id"),
|
||||||
'created_at': int(time.time()),
|
'created_at': int(time.time()),
|
||||||
@@ -47,7 +74,7 @@ class ChatCallbackEvent(BaseModel):
|
|||||||
"workflow_id": self.payload.get('workflow_id'),
|
"workflow_id": self.payload.get('workflow_id'),
|
||||||
"sequence_number": 1709,
|
"sequence_number": 1709,
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"sys.query": self.payload.get('query'),
|
"sys.query": f"开始查询 {self.payload.get('query')}",
|
||||||
"sys.files": [],
|
"sys.files": [],
|
||||||
"sys.conversation_id": self.payload.get('conversation_id'),
|
"sys.conversation_id": self.payload.get('conversation_id'),
|
||||||
"sys.user_id": self.payload.get('use_id')
|
"sys.user_id": self.payload.get('use_id')
|
||||||
@@ -92,7 +119,7 @@ class ChatCallbackEvent(BaseModel):
|
|||||||
"id": self.payload.get('nodeid'),
|
"id": self.payload.get('nodeid'),
|
||||||
"node_id": self.payload.get('nodeid'),
|
"node_id": self.payload.get('nodeid'),
|
||||||
"node_type": "http-request",
|
"node_type": "http-request",
|
||||||
"title": self.payload.get('title'),
|
"title": CH_Event_map[self.payload.get('title')],
|
||||||
"index": self.payload.get('index'),
|
"index": self.payload.get('index'),
|
||||||
"predecessor_node_id": self.payload.get('predecessor_node_id'),
|
"predecessor_node_id": self.payload.get('predecessor_node_id'),
|
||||||
"inputs": '',
|
"inputs": '',
|
||||||
@@ -110,7 +137,7 @@ class ChatCallbackEvent(BaseModel):
|
|||||||
"id": self.payload.get('nodeid'),
|
"id": self.payload.get('nodeid'),
|
||||||
"node_id": self.payload.get('nodeid'),
|
"node_id": self.payload.get('nodeid'),
|
||||||
"node_type": "http-request",
|
"node_type": "http-request",
|
||||||
"title": self.payload.get('title'),
|
"title": CH_Event_map[self.payload.get('title')],
|
||||||
"index": self.payload.get('index'),
|
"index": self.payload.get('index'),
|
||||||
"predecessor_node_id": self.payload.get('predecessor_node_id'),
|
"predecessor_node_id": self.payload.get('predecessor_node_id'),
|
||||||
"inputs": '',
|
"inputs": '',
|
||||||
@@ -137,15 +164,54 @@ class ChatCallbackEvent(BaseModel):
|
|||||||
|
|
||||||
def get_MessageEnd_param(self) -> dict:
|
def get_MessageEnd_param(self) -> dict:
|
||||||
params = self.get_common_param()
|
params = self.get_common_param()
|
||||||
|
nodeInfos = []
|
||||||
|
source_nodes = self.payload.get('source_node')
|
||||||
|
if source_nodes is not None:
|
||||||
|
for i in range(len(source_nodes)):
|
||||||
|
source_node:NodeWithScore = source_nodes[i]
|
||||||
|
metadata:dict = source_node.node.metadata
|
||||||
|
nodeInfo = {
|
||||||
|
"position": i,
|
||||||
|
"dataset_id": metadata.get("pipeline_id"),
|
||||||
|
"dataset_name": metadata.get("file_name"),
|
||||||
|
"document_id": source_node.node_id,
|
||||||
|
"document_name": metadata.get("file_name"),
|
||||||
|
"data_source_type": "upload_file",
|
||||||
|
"segment_id": source_node.node_id,
|
||||||
|
"retriever_from": "workflow",
|
||||||
|
"score": source_node.score,
|
||||||
|
"hit_count": 1,
|
||||||
|
"word_count": 632,
|
||||||
|
"segment_position": i,
|
||||||
|
"index_node_hash": "",
|
||||||
|
"content": source_node.text
|
||||||
|
}
|
||||||
|
nodeInfos.append(nodeInfo)
|
||||||
params.update({
|
params.update({
|
||||||
'id':self.payload.get('message_id'),
|
'id':self.payload.get('message_id'),
|
||||||
'metadata':self.payload.get('metadata')
|
'metadata':{
|
||||||
|
"retriever_resources":nodeInfos,
|
||||||
|
"usage":{
|
||||||
|
"prompt_tokens": 4972,
|
||||||
|
"prompt_unit_price": "0.0",
|
||||||
|
"prompt_price_unit": "0.0",
|
||||||
|
"prompt_price": "0.0",
|
||||||
|
"completion_tokens": 332,
|
||||||
|
"completion_unit_price": "0.0",
|
||||||
|
"completion_price_unit": "0.0",
|
||||||
|
"completion_price": "0.0",
|
||||||
|
"total_tokens": 5304,
|
||||||
|
"total_price": "0.0",
|
||||||
|
"currency": "USD",
|
||||||
|
"latency": 4.897703120019287
|
||||||
|
}
|
||||||
|
}
|
||||||
})
|
})
|
||||||
return params
|
return params
|
||||||
|
|
||||||
def to_response(self)-> dict|None:
|
def to_response(self)-> dict|None:
|
||||||
try:
|
try:
|
||||||
match self.event_type:
|
match self.event_type.value:
|
||||||
case "workflow_started":
|
case "workflow_started":
|
||||||
return self.get_WorkflowStart_param()
|
return self.get_WorkflowStart_param()
|
||||||
case "workflow_finished":
|
case "workflow_finished":
|
||||||
@@ -168,7 +234,7 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
|||||||
_aqueue: asyncio.Queue
|
_aqueue: asyncio.Queue
|
||||||
is_done: bool = False
|
is_done: bool = False
|
||||||
|
|
||||||
def __init__(self,**params):
|
def __init__(self):
|
||||||
"""Initialize the base callback handler."""
|
"""Initialize the base callback handler."""
|
||||||
ignored_events = [
|
ignored_events = [
|
||||||
# CBEventType.CHUNKING,
|
# CBEventType.CHUNKING,
|
||||||
@@ -179,23 +245,19 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
|||||||
]
|
]
|
||||||
super().__init__(ignored_events, ignored_events)
|
super().__init__(ignored_events, ignored_events)
|
||||||
self._aqueue = asyncio.Queue()
|
self._aqueue = asyncio.Queue()
|
||||||
self._response:str = ''
|
self._response: StreamingAgentChatResponse = None
|
||||||
self._params:Dict[str,Any] = params
|
self._ids:Dict[str,Any] = {}
|
||||||
self._nodeStack:deque = deque()
|
self._chatData:ChatRequestData = None
|
||||||
|
self._nodeStack:List[str] = []
|
||||||
|
self._firstEventID:str = None
|
||||||
|
|
||||||
#添加工作流开始事件
|
def setInitParams(self,ids:dict,data:ChatRequestData):
|
||||||
data:ChatRequestData = self._params['data']
|
self._ids = ids
|
||||||
args:Dict[str,Any] = self._params['ids']
|
self._chatData = data
|
||||||
args.update(
|
self._firstEventID = None
|
||||||
{
|
|
||||||
'use_id': data.user,
|
def setResponse(self,response: StreamingAgentChatResponse):
|
||||||
'query': data.query,
|
self._response = response
|
||||||
'conversation_id': data.conversation_id
|
|
||||||
}
|
|
||||||
)
|
|
||||||
wf_event = ChatCallbackEvent(event_type = ChatEventType.WORKFLOW_START,payload = args)
|
|
||||||
if wf_event.to_response() is not None:
|
|
||||||
self._aqueue.put_nowait(wf_event)
|
|
||||||
|
|
||||||
def on_event_start(
|
def on_event_start(
|
||||||
self,
|
self,
|
||||||
@@ -204,11 +266,15 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
|||||||
event_id: str = "",
|
event_id: str = "",
|
||||||
**kwargs: Any,
|
**kwargs: Any,
|
||||||
) -> str:
|
) -> str:
|
||||||
|
if self._firstEventID is None:
|
||||||
|
self._firstEventID = event_id
|
||||||
|
self.start()
|
||||||
|
|
||||||
logger.info("event_start:{} type:{} payload:{}\n".format(event_id, event_type, payload))
|
logger.info("event_start:{} type:{} payload:{}\n".format(event_id, event_type, payload))
|
||||||
|
|
||||||
self._nodeStack.append(event_id)
|
self._nodeStack.append(event_id)
|
||||||
nindex = self._nodeStack.count() - 1
|
nindex = len(self._nodeStack) - 1
|
||||||
args:Dict[str,Any] = self._params['ids']
|
args:Dict[str,Any] = self._ids
|
||||||
args.update(
|
args.update(
|
||||||
{
|
{
|
||||||
'nodeid':event_id,
|
'nodeid':event_id,
|
||||||
@@ -221,7 +287,6 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
|||||||
if nd_event.to_response() is not None:
|
if nd_event.to_response() is not None:
|
||||||
self._aqueue.put_nowait(nd_event)
|
self._aqueue.put_nowait(nd_event)
|
||||||
|
|
||||||
|
|
||||||
def on_event_end(
|
def on_event_end(
|
||||||
self,
|
self,
|
||||||
event_type: CBEventType,
|
event_type: CBEventType,
|
||||||
@@ -232,10 +297,10 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
|||||||
logger.info("event_end:{} type:{} payload:{}\n".format(event_id, event_type, payload))
|
logger.info("event_end:{} type:{} payload:{}\n".format(event_id, event_type, payload))
|
||||||
|
|
||||||
#self.response = payload.get("response","")
|
#self.response = payload.get("response","")
|
||||||
args:Dict[str,Any] = self._params['ids']
|
args:Dict[str,Any] = self._ids
|
||||||
nodeID = self._nodeStack[-1]
|
nodeID = self._nodeStack[-1]
|
||||||
if nodeID == event_id:
|
if nodeID == event_id:
|
||||||
nindex = self._nodeStack.count() - 1
|
nindex = len(self._nodeStack) - 1
|
||||||
args.update(
|
args.update(
|
||||||
{
|
{
|
||||||
'nodeid':event_id,
|
'nodeid':event_id,
|
||||||
@@ -249,6 +314,8 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
|||||||
self._aqueue.put_nowait(nd_event)
|
self._aqueue.put_nowait(nd_event)
|
||||||
self._nodeStack.pop()
|
self._nodeStack.pop()
|
||||||
|
|
||||||
|
if self._firstEventID is not None and self._firstEventID == event_id:
|
||||||
|
self.finished()
|
||||||
|
|
||||||
def start_trace(self, trace_id: Optional[str] = None) -> None:
|
def start_trace(self, trace_id: Optional[str] = None) -> None:
|
||||||
"""No-op."""
|
"""No-op."""
|
||||||
@@ -261,23 +328,6 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
|||||||
) -> None:
|
) -> None:
|
||||||
"""No-op."""
|
"""No-op."""
|
||||||
logger.info("trace_end:{} trace_map:{}\n".format(trace_id, trace_map))
|
logger.info("trace_end:{} trace_map:{}\n".format(trace_id, trace_map))
|
||||||
data:ChatRequestData = self._params['data']
|
|
||||||
args:Dict[str,Any] = self._params['ids']
|
|
||||||
args.update(
|
|
||||||
{
|
|
||||||
'response':self._response,
|
|
||||||
'conversation_id': data.conversation_id
|
|
||||||
}
|
|
||||||
)
|
|
||||||
wf_event = ChatCallbackEvent(event_type = ChatEventType.WORKFLOW_FINISHED,payload = args)
|
|
||||||
if wf_event.to_response() is not None:
|
|
||||||
self._aqueue.put_nowait(wf_event)
|
|
||||||
|
|
||||||
|
|
||||||
args:Dict[str,Any] = self._params['ids']
|
|
||||||
msgEnt_event = ChatCallbackEvent(event_type = ChatEventType.MESSAGE_END,payload = args)
|
|
||||||
if msgEnt_event.to_response() is not None:
|
|
||||||
self._aqueue.put_nowait(msgEnt_event)
|
|
||||||
|
|
||||||
async def async_event_gen(self) -> AsyncGenerator[ChatCallbackEvent, None]:
|
async def async_event_gen(self) -> AsyncGenerator[ChatCallbackEvent, None]:
|
||||||
while not self._aqueue.empty() or not self.is_done:
|
while not self._aqueue.empty() or not self.is_done:
|
||||||
@@ -286,6 +336,51 @@ class ChatEventCallbackHandler(BaseCallbackHandler):
|
|||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def makeWorkflow_startEvent(self)->ChatCallbackEvent:
|
||||||
|
args:Dict[str,Any] = self._ids
|
||||||
|
args.update(
|
||||||
|
{
|
||||||
|
'use_id': self._chatData.user,
|
||||||
|
'query': self._chatData.query,
|
||||||
|
'conversation_id': self._chatData.conversation_id
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return ChatCallbackEvent(event_type = ChatEventType.WORKFLOW_START,payload = args)
|
||||||
|
|
||||||
|
def makeWorkflow_finishedEvent(self)->ChatCallbackEvent:
|
||||||
|
args:Dict[str,Any] = self._ids
|
||||||
|
args.update(
|
||||||
|
{
|
||||||
|
'response': '',
|
||||||
|
'conversation_id': self._chatData.conversation_id
|
||||||
|
}
|
||||||
|
)
|
||||||
|
return ChatCallbackEvent(event_type = ChatEventType.WORKFLOW_FINISHED,payload = args)
|
||||||
|
|
||||||
|
def makeMessage_EndEvent(self)->ChatCallbackEvent:
|
||||||
|
args:Dict[str,Any] = self._ids
|
||||||
|
if self._response is not None:
|
||||||
|
args.update({
|
||||||
|
'source_node': self._response.source_nodes
|
||||||
|
})
|
||||||
|
msgEnt_event = ChatCallbackEvent(event_type = ChatEventType.MESSAGE_END,payload = args)
|
||||||
|
return msgEnt_event
|
||||||
|
|
||||||
|
def start(self):
|
||||||
|
#添加工作流开始事件
|
||||||
|
wf_event = self.makeWorkflow_startEvent()
|
||||||
|
if wf_event.to_response() is not None:
|
||||||
|
self._aqueue.put_nowait(wf_event)
|
||||||
|
|
||||||
|
def finished(self):
|
||||||
|
wf_event = self.makeWorkflow_finishedEvent()
|
||||||
|
if wf_event.to_response() is not None:
|
||||||
|
self._aqueue.put_nowait(wf_event)
|
||||||
|
|
||||||
|
msgEnt_event = self.makeMessage_EndEvent()
|
||||||
|
if msgEnt_event.to_response() is not None:
|
||||||
|
self._aqueue.put_nowait(msgEnt_event)
|
||||||
|
|
||||||
class IDManager:
|
class IDManager:
|
||||||
def createID(self):
|
def createID(self):
|
||||||
return {
|
return {
|
||||||
@@ -353,6 +448,7 @@ class ChatStreamResponse(StreamingResponse):
|
|||||||
|
|
||||||
# the text_generator is the leading stream, once it's finished, also finish the event stream
|
# the text_generator is the leading stream, once it's finished, also finish the event stream
|
||||||
event_handler.is_done = True
|
event_handler.is_done = True
|
||||||
|
event_handler.setResponse(response)
|
||||||
|
|
||||||
# Yield the events from the event handler
|
# Yield the events from the event handler
|
||||||
async def _event_generator():
|
async def _event_generator():
|
||||||
@@ -374,33 +470,36 @@ class ChatStreamResponse(StreamingResponse):
|
|||||||
break
|
break
|
||||||
|
|
||||||
@v.post("/chat-messages")
|
@v.post("/chat-messages")
|
||||||
async def post_conversations(request: Request, data: ChatRequestData):
|
async def post_chatmessages(request: Request, data: ChatRequestData):
|
||||||
|
global gEvent_handler
|
||||||
userMng.findNoExistCreate(data.user)
|
userMng.findNoExistCreate(data.user)
|
||||||
data.conversation_id = data.conversation_id if data.conversation_id else str(uuid.uuid4())
|
data.conversation_id = data.conversation_id if data.conversation_id else str(uuid.uuid4())
|
||||||
|
|
||||||
conversaObj = conversations()
|
conversaObj = conversations()
|
||||||
conversationinfo = conversaObj.get(data.conversation_id)
|
conversationinfo = conversaObj.get(data.conversation_id)
|
||||||
if conversationinfo is None:
|
if conversationinfo is None:
|
||||||
conversationinfo = conversaObj.add(data.conversation_id, data.user, "新建会话")
|
conversationinfo = conversaObj.add(data.conversation_id, data.user, "新建会话",inputs= data.inputs)
|
||||||
|
|
||||||
# 生成聊天参数
|
# 生成聊天参数
|
||||||
last_message_content = ChatMessage.from_str(data.query)
|
last_message_content = ChatMessage.from_str(data.query)
|
||||||
filters = None
|
filters = None
|
||||||
params = data.inputs or {}
|
params = data.inputs or {}
|
||||||
|
|
||||||
# 获取聊天引擎对象
|
|
||||||
chat_engine = get_chat_engine(filters=filters, params=params)
|
|
||||||
|
|
||||||
# 启动聊天事件监听
|
# 启动聊天事件监听
|
||||||
ids = IDManager().createID()
|
ids = IDManager().createID()
|
||||||
event_handler = ChatEventCallbackHandler(ids = ids,data = data)
|
if gEvent_handler is None:
|
||||||
chat_engine.callback_manager.handlers.append(event_handler) # type: ignore
|
gEvent_handler = ChatEventCallbackHandler()
|
||||||
|
Settings.llm.callback_manager.handlers.append(gEvent_handler)
|
||||||
|
|
||||||
|
if gEvent_handler is not None:
|
||||||
|
gEvent_handler.setInitParams(ids = ids,data = data)
|
||||||
|
|
||||||
|
# 获取聊天引擎对象
|
||||||
|
chat_engine = get_chat_engine(filters=filters, params=params)
|
||||||
# 执行异步聊天
|
# 执行异步聊天
|
||||||
response = await chat_engine.astream_chat(data.query)
|
response = await chat_engine.astream_chat(data.query)
|
||||||
|
|
||||||
# 返回异步消息回应
|
# 返回异步消息回应
|
||||||
return ChatStreamResponse(request, event_handler, response, data,ids)
|
return ChatStreamResponse(request, gEvent_handler, response, data,ids)
|
||||||
|
|
||||||
@v.get("/messages")
|
@v.get("/messages")
|
||||||
async def query_messages(user:str, conversation_id:str):
|
async def query_messages(user:str, conversation_id:str):
|
||||||
@@ -467,24 +566,51 @@ async def query_conversations(user:str, first_id:str = None, limit:str = None, p
|
|||||||
|
|
||||||
@v.get("/parameters")
|
@v.get("/parameters")
|
||||||
async def query_parameters(user:str):
|
async def query_parameters(user:str):
|
||||||
params = parameter().get(user)
|
prjObj = ProjectInfo()
|
||||||
if len(params) == 0:
|
return BaseConfig().ParamterCfg(projectInfo = prjObj.projectNames())
|
||||||
params = BaseConfig().ParamterCfg()
|
|
||||||
return params
|
|
||||||
|
|
||||||
@v.post("/messages/{message_id}/feedbacks")
|
@v.post("/messages/{message_id}/feedbacks")
|
||||||
async def post_feedbacks(request: Request,message_id:str,params:Dict[str,Any]):
|
async def post_feedbacks(request: Request,message_id:str,params:Dict[str,Any]):
|
||||||
if params['rating'] =='null':
|
if params['rating'] is None:
|
||||||
feedback().delete(message_id)
|
feedback().delete(message_id)
|
||||||
else:
|
else:
|
||||||
condition = {'id':message_id}
|
results = message().query(message_id)
|
||||||
results = message().query(**condition)
|
|
||||||
if len(results) > 0:
|
if len(results) > 0:
|
||||||
result = results[0]
|
result = results[0]
|
||||||
feedback().add(message_id=message_id,query=result['query'],
|
feedback().add(message_id=message_id,query=result['query'],
|
||||||
answer=result['answer'],rating=params['rating'])
|
answer=result['answer'],rating=params['rating'])
|
||||||
|
|
||||||
@r.post("")
|
@v.post("/files/upload")
|
||||||
def upload_file(request: ChatFileUploadRequest) -> List[str]:
|
def upload_file(request: ChatFileUploadRequest):
|
||||||
pass
|
try:
|
||||||
|
logger.info("Processing file")
|
||||||
|
resluts = ChatFileService.process_file(request.base64)
|
||||||
|
return {
|
||||||
|
'id':resluts.get('id'),
|
||||||
|
'name': resluts.get('name'),
|
||||||
|
'size': resluts.get('size'),
|
||||||
|
'extension':resluts.get('extension'),
|
||||||
|
'mime_type':resluts.get('mime_type'),
|
||||||
|
'created_by':str(uuid.uuid4()),
|
||||||
|
'created_at':int(time.time())
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing file: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail="Error processing file")
|
||||||
|
|
||||||
|
@v.post("/project")
|
||||||
|
def upload_file(request: ChatFileUploadRequest):
|
||||||
|
try:
|
||||||
|
logger.info("Processing file")
|
||||||
|
return PrjFileLoadService.process_file(request.base64)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error processing file: {e}", exc_info=True)
|
||||||
|
raise HTTPException(status_code=500, detail="Error processing file")
|
||||||
|
|
||||||
|
@v.post("/messages/{message_id}/suggested")
|
||||||
|
async def post_suggested(request: Request,message_id:str,user:str):
|
||||||
|
questions = await NextQuestionSuggestion.suggest_next_questions(message_id)
|
||||||
|
return {
|
||||||
|
"result": "success",
|
||||||
|
"data":questions
|
||||||
|
}
|
||||||
@@ -2,7 +2,7 @@ from datetime import datetime
|
|||||||
import uuid
|
import uuid
|
||||||
from app.api.routers.request.baseConfig import BaseConfig
|
from app.api.routers.request.baseConfig import BaseConfig
|
||||||
from app.api.routers.request.dbOrm import DBManager
|
from app.api.routers.request.dbOrm import DBManager
|
||||||
|
from typing import List
|
||||||
dbManage = DBManager()
|
dbManage = DBManager()
|
||||||
|
|
||||||
class conversations:
|
class conversations:
|
||||||
@@ -24,12 +24,13 @@ class conversations:
|
|||||||
return records[0]
|
return records[0]
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def add(self,id:str, user_id:str, name:str):
|
def add(self,id:str, user_id:str, name:str,inputs:dict):
|
||||||
template = BaseConfig().ConversationCfg()
|
template = BaseConfig().ConversationCfg()
|
||||||
template['id'] = id
|
template['id'] = id
|
||||||
template['user_id'] = user_id
|
template['user_id'] = user_id
|
||||||
template['name'] = name
|
template['name'] = name
|
||||||
template['created_at'] = 1724399038
|
template['created_at'] = 1724399038
|
||||||
|
template['inputs'] = inputs
|
||||||
dbManage.addRecord(self._tableName,template)
|
dbManage.addRecord(self._tableName,template)
|
||||||
|
|
||||||
def delete(self,id:str):
|
def delete(self,id:str):
|
||||||
@@ -122,8 +123,9 @@ class message:
|
|||||||
def delete(self,user_id:str):
|
def delete(self,user_id:str):
|
||||||
dbManage.delete(self._tableName,user_id = user_id)
|
dbManage.delete(self._tableName,user_id = user_id)
|
||||||
|
|
||||||
def query(self,**condition):
|
def query(self,id:str):
|
||||||
results = []
|
results = []
|
||||||
|
condition = {'id':id}
|
||||||
records = dbManage.query(self._tableName,**condition)
|
records = dbManage.query(self._tableName,**condition)
|
||||||
for record in records:
|
for record in records:
|
||||||
results.append(record.dict())
|
results.append(record.dict())
|
||||||
@@ -153,3 +155,35 @@ class feedback:
|
|||||||
if len(records) > 0:
|
if len(records) > 0:
|
||||||
return records[0].dict()
|
return records[0].dict()
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
class ProjectInfo:
|
||||||
|
def __init__(self) -> None:
|
||||||
|
self._tableName = 'projectInfos'
|
||||||
|
dbManage.createTable(self._tableName)
|
||||||
|
|
||||||
|
def add(self,name:str,flag:str):
|
||||||
|
info = dbManage.query(self._tableName,prjFlag = flag)
|
||||||
|
if len(info) == 0:
|
||||||
|
record = {
|
||||||
|
'prjectName': name,
|
||||||
|
'prjFlag': flag
|
||||||
|
}
|
||||||
|
dbManage.addRecord(self._tableName,record)
|
||||||
|
|
||||||
|
def projectNames(self)->List[str]:
|
||||||
|
records = dbManage.query(self._tableName)
|
||||||
|
names = []
|
||||||
|
for record in records:
|
||||||
|
data:dict = record.dict()
|
||||||
|
name = data.get('prjectName')
|
||||||
|
if name !='':
|
||||||
|
names.append(name)
|
||||||
|
return names
|
||||||
|
|
||||||
|
def prjFalg(self,name:str):
|
||||||
|
records = dbManage.query(self._tableName)
|
||||||
|
for record in records:
|
||||||
|
data:dict = record.dict()
|
||||||
|
if data.get('prjectName') == name:
|
||||||
|
return data['prjFlag']
|
||||||
|
return ''
|
||||||
@@ -3,9 +3,10 @@ import os
|
|||||||
from enum import Enum
|
from enum import Enum
|
||||||
|
|
||||||
class BaseConfig(BaseModel):
|
class BaseConfig(BaseModel):
|
||||||
projectInfo:str = os.getenv("PROJECT_TITLE","您好,我是博微工程理解小助手,您可以问我有关[线路工程]工程数据的相关问题!")
|
projectInfo:str = os.getenv("PROJECT_TITLE","会话提示消息")
|
||||||
|
|
||||||
def ParamterCfg(self):
|
def ParamterCfg(self,**args):
|
||||||
|
prjItems = args.get('projectInfo')
|
||||||
questions = os.getenv("CONVERSATION_STARTERS", "dev")
|
questions = os.getenv("CONVERSATION_STARTERS", "dev")
|
||||||
return{
|
return{
|
||||||
"opening_statement": self.projectInfo,
|
"opening_statement": self.projectInfo,
|
||||||
@@ -30,7 +31,18 @@ class BaseConfig(BaseModel):
|
|||||||
"more_like_this": {
|
"more_like_this": {
|
||||||
"enabled": False
|
"enabled": False
|
||||||
},
|
},
|
||||||
"user_input_form": [],
|
"user_input_form": [
|
||||||
|
{
|
||||||
|
"select": {
|
||||||
|
"variable": "projectname",
|
||||||
|
"label": "\u5de5\u7a0b\u540d\u79f0",
|
||||||
|
"type": "select",
|
||||||
|
"max_length": 48,
|
||||||
|
"required": True,
|
||||||
|
"options": prjItems
|
||||||
|
}
|
||||||
|
}
|
||||||
|
],
|
||||||
"sensitive_word_avoidance": {
|
"sensitive_word_avoidance": {
|
||||||
"enabled": False
|
"enabled": False
|
||||||
},
|
},
|
||||||
@@ -44,8 +56,19 @@ class BaseConfig(BaseModel):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
"system_parameters": {
|
"system_parameters": {
|
||||||
"image_file_size_limit": "10"
|
"image_file_size_limit": "10",
|
||||||
}
|
"language": "",
|
||||||
|
"voice": "",
|
||||||
|
},
|
||||||
|
"retriever_resource": {
|
||||||
|
"enabled": True
|
||||||
|
},
|
||||||
|
"annotation_reply": {
|
||||||
|
"enabled": False
|
||||||
|
},
|
||||||
|
"more_like_this": {
|
||||||
|
"enabled": False
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
def ConversationCfg(self):
|
def ConversationCfg(self):
|
||||||
|
|||||||
@@ -55,6 +55,13 @@ class FeedBackOrm(Base):
|
|||||||
answer = Column(String)
|
answer = Column(String)
|
||||||
rating = Column(String)
|
rating = Column(String)
|
||||||
|
|
||||||
|
class ProjectInfoOrm(Base):
|
||||||
|
__tablename__ = "projectInfos"
|
||||||
|
|
||||||
|
prjFlag = Column(String,primary_key=True)
|
||||||
|
prjectName = Column(String)
|
||||||
|
|
||||||
|
|
||||||
#数据结构
|
#数据结构
|
||||||
class ConversationModel(BaseModel):
|
class ConversationModel(BaseModel):
|
||||||
id: str
|
id: str
|
||||||
@@ -121,6 +128,17 @@ class FeedBackModel(BaseModel):
|
|||||||
def orm(cls):
|
def orm(cls):
|
||||||
return FeedBackOrm
|
return FeedBackOrm
|
||||||
|
|
||||||
|
class ProjectInfoModel(BaseModel):
|
||||||
|
prjectName:str
|
||||||
|
prjFlag:str
|
||||||
|
|
||||||
|
class Config:
|
||||||
|
from_attributes=True
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def orm(cls):
|
||||||
|
return ProjectInfoOrm
|
||||||
|
|
||||||
class DBManager:
|
class DBManager:
|
||||||
def __init__(self) -> None:
|
def __init__(self) -> None:
|
||||||
DATABASE_URL = os.getenv("SQLITE_DATABASE_URL")
|
DATABASE_URL = os.getenv("SQLITE_DATABASE_URL")
|
||||||
@@ -160,7 +178,8 @@ class DBManager:
|
|||||||
return
|
return
|
||||||
records = session.query(ormCls).filter_by(**filter).all()
|
records = session.query(ormCls).filter_by(**filter).all()
|
||||||
if records is not None:
|
if records is not None:
|
||||||
session.delete(records)
|
for record in records:
|
||||||
|
session.delete(record)
|
||||||
session.commit()
|
session.commit()
|
||||||
|
|
||||||
def update(self,tableName:str,data:Dict[str,Any],**filter):
|
def update(self,tableName:str,data:Dict[str,Any],**filter):
|
||||||
|
|||||||
@@ -0,0 +1,134 @@
|
|||||||
|
import base64,os,mimetypes,requests,tempfile
|
||||||
|
from typing import List,Dict,Any
|
||||||
|
from uuid import uuid4
|
||||||
|
from app.settings import init_settings
|
||||||
|
from app.engine.loaders import get_document_Types, get_documents,getFileCacahePath
|
||||||
|
from app.engine.vectordb import get_vector_store
|
||||||
|
from app.engine.generate import get_doc_store,run_pipeline,persist_storage
|
||||||
|
from llama_index.core.schema import Document
|
||||||
|
from pathlib import Path
|
||||||
|
from llama_index.core.readers.file.base import (
|
||||||
|
_try_loading_included_file_formats as get_file_loaders_map,
|
||||||
|
)
|
||||||
|
from llama_index.readers.file import FlatReader
|
||||||
|
from llama_index.core.ingestion import IngestionPipeline
|
||||||
|
from llama_index.core import VectorStoreIndex
|
||||||
|
from app.engine.index import get_index
|
||||||
|
|
||||||
|
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
|
||||||
|
|
||||||
|
class PrjFileLoadService:
|
||||||
|
@staticmethod
|
||||||
|
def store_and_parse_file(file_data):
|
||||||
|
prjtoJson_url = os.getenv('PRJTOJSON_URL')
|
||||||
|
convert_url = prjtoJson_url +'/prj_convert_clt2json'
|
||||||
|
files ={'file':file_data}
|
||||||
|
response1 = requests.post(
|
||||||
|
url = convert_url,
|
||||||
|
files=files
|
||||||
|
)
|
||||||
|
if response1.text is None or response1.text=='':
|
||||||
|
return None
|
||||||
|
|
||||||
|
load_url = prjtoJson_url +'/file_download'
|
||||||
|
response2 = requests.post(
|
||||||
|
url = load_url,
|
||||||
|
data=response1.text
|
||||||
|
)
|
||||||
|
if response2.text is None or response2.content=='':
|
||||||
|
return None
|
||||||
|
|
||||||
|
try:
|
||||||
|
tempFilePath:str = tempfile.gettempdir() + f"\\{uuid4().hex}.zip"
|
||||||
|
with open(tempFilePath,'wb') as file:
|
||||||
|
file.write(response2.content)
|
||||||
|
|
||||||
|
prjID = str(uuid4())
|
||||||
|
filePath = getFileCacahePath() + f'/Projects/{prjID}'
|
||||||
|
os.makedirs(filePath)
|
||||||
|
import zipfile
|
||||||
|
with zipfile.ZipFile(tempFilePath,'r') as zip_File:
|
||||||
|
for zip_info in zip_File.infolist():
|
||||||
|
zip_info.filename = zip_info.filename.encode('cp437').decode('gbk')
|
||||||
|
zip_File.extract(zip_info,filePath)
|
||||||
|
os.remove(tempFilePath)
|
||||||
|
return f'Projects_{prjID}'
|
||||||
|
except Exception as e:
|
||||||
|
return None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def process_file(base64_content: str) -> str:
|
||||||
|
prjFlag = PrjFileLoadService.store_and_parse_file(base64_content)
|
||||||
|
if prjFlag is None:
|
||||||
|
return None
|
||||||
|
#生成向量并持久化至本地
|
||||||
|
documents = get_documents(prjFlag)
|
||||||
|
for doc in documents:
|
||||||
|
doc.metadata["private"] = "false"
|
||||||
|
docstore = get_doc_store(prjFlag)
|
||||||
|
vector_store = get_vector_store(prjFlag)
|
||||||
|
_ = run_pipeline(docstore, vector_store, documents)
|
||||||
|
persist_storage(docstore, vector_store)
|
||||||
|
return prjFlag
|
||||||
|
|
||||||
|
class ChatFileService:
|
||||||
|
PRIVATE_STORE_PATH = os.getenv('CHAT_UPLOAD_FILECACHE','output/uploaded')
|
||||||
|
resluts:Dict[str,Any] = {}
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def process_file(base64_content: str) -> dict:
|
||||||
|
file_data, extension = ChatFileService.preprocess_base64_file(base64_content)
|
||||||
|
documents = ChatFileService.store_and_parse_file(file_data, extension)
|
||||||
|
|
||||||
|
pipeline = IngestionPipeline()
|
||||||
|
nodes = pipeline.run(documents=documents)
|
||||||
|
current_index = get_index()
|
||||||
|
pipeline = IngestionPipeline()
|
||||||
|
nodes = pipeline.run(documents=documents)
|
||||||
|
if current_index is None:
|
||||||
|
current_index = VectorStoreIndex(nodes=nodes)
|
||||||
|
else:
|
||||||
|
current_index.insert_nodes(nodes=nodes)
|
||||||
|
current_index.storage_context.persist(
|
||||||
|
persist_dir=os.environ.get("STORAGE_DIR", "storage")
|
||||||
|
)
|
||||||
|
|
||||||
|
return ChatFileService.resluts
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def preprocess_base64_file(base64_content: str) -> tuple:
|
||||||
|
header, data = base64_content.split(",", 1)
|
||||||
|
mime_type = header.split(";")[0].split(":", 1)[1]
|
||||||
|
extension = mimetypes.guess_extension(mime_type)
|
||||||
|
ChatFileService.resluts['mime_type'] = mime_type
|
||||||
|
ChatFileService.resluts['extension'] = extension
|
||||||
|
return base64.b64decode(data), extension
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def store_and_parse_file(file_data, extension) -> List[Document]:
|
||||||
|
os.makedirs(ChatFileService.PRIVATE_STORE_PATH, exist_ok=True)
|
||||||
|
fileID = uuid4().hex
|
||||||
|
file_name = f"{fileID}{extension}"
|
||||||
|
file_path = Path(os.path.join(ChatFileService.PRIVATE_STORE_PATH, file_name))
|
||||||
|
ChatFileService.resluts['id'] = fileID
|
||||||
|
ChatFileService.resluts['file_name'] = file_name
|
||||||
|
|
||||||
|
with open(file_path, "wb") as f:
|
||||||
|
f.write(file_data)
|
||||||
|
|
||||||
|
ChatFileService.resluts['size'] = os.path.getsize(file_path)
|
||||||
|
reader_cls = ChatFileService.default_file_loaders_map().get(extension)
|
||||||
|
if reader_cls is None:
|
||||||
|
raise ValueError(f"File extension {extension} is not supported")
|
||||||
|
reader = reader_cls()
|
||||||
|
documents = reader.load_data(file_path)
|
||||||
|
for doc in documents:
|
||||||
|
doc.metadata["file_name"] = file_name
|
||||||
|
doc.metadata["private"] = "true"
|
||||||
|
return documents
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def default_file_loaders_map():
|
||||||
|
default_loaders = get_file_loaders_map()
|
||||||
|
default_loaders[".txt"] = FlatReader
|
||||||
|
return default_loaders
|
||||||
@@ -0,0 +1,43 @@
|
|||||||
|
from typing import List
|
||||||
|
|
||||||
|
from app.api.routers.request.base import message
|
||||||
|
from llama_index.core.prompts import PromptTemplate
|
||||||
|
from llama_index.core.settings import Settings
|
||||||
|
from pydantic import BaseModel
|
||||||
|
|
||||||
|
NEXT_QUESTIONS_SUGGESTION_PROMPT = PromptTemplate(
|
||||||
|
"你是一个乐于助人的助手!你的任务是对用户可能会问的下一个问题给出建议。 "
|
||||||
|
"\n这是对话历史记录"
|
||||||
|
"\n---------------------\n{conversation}\n---------------------"
|
||||||
|
"考虑到对话历史记录,仅限于现在知识库已有内容, 请给我 $number_of_questions 个你接下来可能会问题的问题!"
|
||||||
|
)
|
||||||
|
N_QUESTION_TO_GENERATE = 3
|
||||||
|
|
||||||
|
|
||||||
|
class NextQuestions(BaseModel):
|
||||||
|
"""A list of questions that user might ask next"""
|
||||||
|
|
||||||
|
questions: List[str]
|
||||||
|
|
||||||
|
|
||||||
|
class NextQuestionSuggestion:
|
||||||
|
@staticmethod
|
||||||
|
async def suggest_next_questions(
|
||||||
|
message_id: str,
|
||||||
|
number_of_questions: int = N_QUESTION_TO_GENERATE,
|
||||||
|
) -> List[str]:
|
||||||
|
last_user_message = None
|
||||||
|
last_assistant_message = None
|
||||||
|
results = message().query(message_id)
|
||||||
|
if len(results) > 0:
|
||||||
|
last_user_message = results[0]['query']
|
||||||
|
last_assistant_message = results[0]['answer']
|
||||||
|
conversation: str = f"{last_user_message}\n{last_assistant_message}"
|
||||||
|
output: NextQuestions = await Settings.llm.astructured_predict(
|
||||||
|
NextQuestions,
|
||||||
|
prompt=NEXT_QUESTIONS_SUGGESTION_PROMPT,
|
||||||
|
conversation=conversation,
|
||||||
|
nun_questions=number_of_questions,
|
||||||
|
)
|
||||||
|
return output.questions
|
||||||
|
return []
|
||||||
@@ -6,16 +6,25 @@ from llama_index.core.tools.query_engine import QueryEngineTool
|
|||||||
|
|
||||||
from app.engine.engine import create_query_engine, create_summary_query_engine
|
from app.engine.engine import create_query_engine, create_summary_query_engine
|
||||||
from app.engine.index import get_index
|
from app.engine.index import get_index
|
||||||
|
from app.engine.prompt import ReActChatFormatter_messages, tree_summary_query_engine_tool_messages, \
|
||||||
|
query_engine_tool_messages, summary_query_tool_messages
|
||||||
#from app.engine.loaders.db import makeDescriptionByEngine
|
#from app.engine.loaders.db import makeDescriptionByEngine
|
||||||
from app.engine.tools import ToolFactory
|
from app.engine.tools import ToolFactory
|
||||||
|
from app.api.routers.request.base import ProjectInfo
|
||||||
|
from llama_index.core.response_synthesizers import ResponseMode
|
||||||
|
|
||||||
|
def getPrjFalg(params:dict=None)->str:
|
||||||
|
prjFlag = ''
|
||||||
|
if params is not None:
|
||||||
|
prjFlag = ProjectInfo().prjFalg(params.get('projectname'))
|
||||||
|
return prjFlag
|
||||||
|
|
||||||
|
|
||||||
def get_chat_engine(filters=None, params=None):
|
def get_chat_engine(filters=None, params:dict=None):
|
||||||
system_prompt = os.getenv("SYSTEM_PROMPT")
|
system_prompt = os.getenv("SYSTEM_PROMPT")
|
||||||
top_k = int(os.getenv("TOP_K", "3"))
|
top_k = int(os.getenv("TOP_K", "3"))
|
||||||
use_reranker = os.getenv("RERANK_ENABLED")
|
use_reranker = os.getenv("RERANK_ENABLED")
|
||||||
tools = []
|
tools = []
|
||||||
|
|
||||||
# 创建SQL查询工具
|
# 创建SQL查询工具
|
||||||
# sql_query_engine = create_summary_query_engine(index)
|
# sql_query_engine = create_summary_query_engine(index)
|
||||||
# sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine,
|
# sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine,
|
||||||
@@ -25,31 +34,31 @@ def get_chat_engine(filters=None, params=None):
|
|||||||
#tools.append(sql_query_tool)
|
#tools.append(sql_query_tool)
|
||||||
|
|
||||||
# Add query tool if index exists
|
# Add query tool if index exists
|
||||||
index = get_index()
|
index = get_index(getPrjFalg(params))
|
||||||
if index is not None:
|
if index is not None:
|
||||||
|
|
||||||
|
|
||||||
summary_query_engine = create_summary_query_engine(index,top_k,use_reranker,filters)
|
summary_query_engine = create_summary_query_engine(index,top_k,use_reranker,filters)
|
||||||
summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool",
|
summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool",
|
||||||
description="适用于任何需要进行全面总结、概括的要求。",
|
description=summary_query_tool_messages,
|
||||||
)
|
)
|
||||||
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = "COMPACT")
|
|
||||||
|
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = ResponseMode.TREE_SUMMARIZE)
|
||||||
query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool",
|
query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool",
|
||||||
description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后建议使用“zjdata_query_tool”工具。",
|
description=query_engine_tool_messages)
|
||||||
)
|
|
||||||
|
|
||||||
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = "TREE_SUMMARIZE")
|
query_engine = create_query_engine(index,top_k,use_reranker,filters,response_mode = ResponseMode.TREE_SUMMARIZE)
|
||||||
query_engine_tool_1 = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool_1",
|
query_engine_tool_1 = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool_1",
|
||||||
description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后,且在询问工程中单位的具体数值,例如用量,费率,合计,金额等的时候建议使用“zj_query_tool_1”工具。",
|
description=tree_summary_query_engine_tool_messages)
|
||||||
)
|
|
||||||
|
|
||||||
tools.append(summary_query_tool)
|
|
||||||
tools.append(query_engine_tool)
|
tools.append(query_engine_tool)
|
||||||
tools.append(query_engine_tool_1)
|
tools.append(query_engine_tool_1)
|
||||||
|
tools.append(summary_query_tool)
|
||||||
|
|
||||||
# Add additional tools
|
# Add additional tools
|
||||||
tools += ToolFactory.from_env()
|
tools += ToolFactory.from_env()
|
||||||
|
|
||||||
prefix_messages = ("""您的设计旨在帮助完成各种任务,从回答问题到提供其他类型分析的摘要。\n\n##工具\n\n你可以访问各种工具。你有责任按照你认为合适的顺序使用这些工具来完成当前的任务。\n这可能需要将任务分解为子任务,并使用不同的工具来完成每个子任务。\n\n你可以访问以下工具:\n{tool_desc}\n\n\n##输出格式\n\n请用与问题相同的语言回答,并使用以下格式:\n\n \nThought: 用户当前的语言是:(user's language)。我需要使用工具来帮助我回答问题。\nAction: 如果使用工具,则为工具名称(one of {tool_names})。\nAction Input: 输入给工具的内容,使用JSON格式表示kwargs(例如{{\"input\": \"hello world\", \"num_beams\": 5}})\n \n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n切勿用Markdown代码标记包围你的响应。如果需要,可以在响应中使用代码标记。\n\n请为Action Input使用有效的JSON格式。不要这样做{{\'input\': \'hello world\', \'num_beams\': 5}}。\n\n如果使用此格式,用户将以下面的格式进行回应:\n\n \nObservation: 工具响应\n \n\n你应该继续重复上述格式,直到你有足够的信息来回答问题而无需使用更多工具。此时,你必须使用以下两种格式之一进行回答:\n\n \nThought: 我可以不用任何工具来回答。我将使用用户的语言来回答。\nAnswer: [你的答案(与用户问题相同的语言)]\n \n\n \nThought: 我无法使用提供的工具回答问题。\nAnswer: [你的答案(与用户问题相同的语言)]\n \n\n##如果从工具中得到的回应是Empty Response,那么只需要回答“我不知道”,不需要额外回答别的内容。## 当前对话\n\n以下是当前对话,由人类和助手的消息交替组成。\n""")
|
react_chat_formatter = ReActChatFormatter.from_defaults(ReActChatFormatter_messages)
|
||||||
react_chat_formatter = ReActChatFormatter.from_defaults(prefix_messages)
|
|
||||||
agentrunner = AgentRunner.from_llm(
|
agentrunner = AgentRunner.from_llm(
|
||||||
llm=Settings.llm,
|
llm=Settings.llm,
|
||||||
tools=tools,
|
tools=tools,
|
||||||
@@ -58,6 +67,7 @@ def get_chat_engine(filters=None, params=None):
|
|||||||
verbose=True,
|
verbose=True,
|
||||||
)
|
)
|
||||||
return agentrunner
|
return agentrunner
|
||||||
|
|
||||||
# create the function calling worker for reasoning
|
# create the function calling worker for reasoning
|
||||||
# worker = FunctionCallingAgentWorker.from_tools(
|
# worker = FunctionCallingAgentWorker.from_tools(
|
||||||
# tools, verbose=True
|
# tools, verbose=True
|
||||||
|
|||||||
@@ -7,10 +7,28 @@ from llama_index.core.query_engine import RetrieverQueryEngine
|
|||||||
from llama_index.core.response_synthesizers import ResponseMode
|
from llama_index.core.response_synthesizers import ResponseMode
|
||||||
from llama_index.readers.database import DatabaseReader
|
from llama_index.readers.database import DatabaseReader
|
||||||
from sqlalchemy import create_engine
|
from sqlalchemy import create_engine
|
||||||
|
from util.register import *
|
||||||
from app.engine.prompt import text_qa_template, refine_template, summary_template, simple_template
|
from app.engine.prompt import text_qa_template, refine_template, summary_template, simple_template
|
||||||
from app.engine.retriever.HybridRetriever import HybridRetriever
|
from app.engine.retriever.HybridRetriever import HybridRetriever
|
||||||
from app.settings import get_node_postprocessors
|
from app.engine.response.treeSummResponse import CustomTreeResponse
|
||||||
|
from llama_index.core.settings import Settings
|
||||||
|
|
||||||
|
ModelPlateCategory = '模型平台'
|
||||||
|
|
||||||
|
def get_node_postprocessors():
|
||||||
|
rerank_enabled = os.getenv("RERANK_ENABLED").title()
|
||||||
|
if rerank_enabled is None or rerank_enabled == 'False':
|
||||||
|
return []
|
||||||
|
|
||||||
|
Rerank_provider = os.getenv("RERANK_PROVIDER")
|
||||||
|
modelPaltCls = ClsRegister.get(ModelPlateCategory,Rerank_provider)
|
||||||
|
postprocess = None
|
||||||
|
if modelPaltCls is not None:
|
||||||
|
modelPalt = modelPaltCls()
|
||||||
|
postprocess = modelPalt.rerank()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid rerank provider: {Rerank_provider}")
|
||||||
|
return postprocess
|
||||||
|
|
||||||
def makeDescriptionByEngine(sql_database:SQLDatabase):
|
def makeDescriptionByEngine(sql_database:SQLDatabase):
|
||||||
reader = DatabaseReader(sql_database)
|
reader = DatabaseReader(sql_database)
|
||||||
@@ -49,6 +67,14 @@ def get_Retriever(index,**kwargs):
|
|||||||
return retriever
|
return retriever
|
||||||
|
|
||||||
|
|
||||||
|
def get_synthesizer():
|
||||||
|
return CustomTreeResponse(
|
||||||
|
llm=Settings.llm,
|
||||||
|
summary_template=summary_template,
|
||||||
|
use_async=True,
|
||||||
|
streaming=False,
|
||||||
|
)
|
||||||
|
|
||||||
sql_database = None
|
sql_database = None
|
||||||
sql_obj_index = None
|
sql_obj_index = None
|
||||||
|
|
||||||
@@ -81,7 +107,7 @@ def create_summary_query_engine(index, top_k=3, use_reranker=False, filters=None
|
|||||||
summary_query_engine = summary_index.as_query_engine(
|
summary_query_engine = summary_index.as_query_engine(
|
||||||
response_mode=ResponseMode.TREE_SUMMARIZE,
|
response_mode=ResponseMode.TREE_SUMMARIZE,
|
||||||
use_async=True,
|
use_async=True,
|
||||||
streaming=True,
|
streaming=False,
|
||||||
)
|
)
|
||||||
return summary_query_engine
|
return summary_query_engine
|
||||||
|
|
||||||
@@ -102,8 +128,8 @@ def create_query_engine(index, top_k=3, use_reranker=False, filters=None, respon
|
|||||||
simple_template = simple_template,
|
simple_template = simple_template,
|
||||||
node_postprocessors=postprocess,
|
node_postprocessors=postprocess,
|
||||||
use_async=True,
|
use_async=True,
|
||||||
streaming=True,
|
streaming=False,
|
||||||
ResponseMode = response_mode
|
response_mode = response_mode
|
||||||
)
|
)
|
||||||
|
|
||||||
return query_engine
|
return query_engine
|
||||||
@@ -5,12 +5,12 @@ load_dotenv()
|
|||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
|
|
||||||
from app.engine.loaders import get_documents
|
from app.engine.loaders import get_document_Types, get_documents
|
||||||
from app.engine.vectordb import get_vector_store
|
from app.engine.vectordb import get_vector_store
|
||||||
from app.settings import init_settings
|
from app.settings import init_settings
|
||||||
from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
|
from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
|
||||||
from llama_index.core.ingestion import IngestionPipeline
|
from llama_index.core.ingestion import IngestionPipeline
|
||||||
from llama_index.core.node_parser import SentenceSplitter
|
from llama_index.core.node_parser import SentenceSplitter,MarkdownNodeParser
|
||||||
from llama_index.core.settings import Settings
|
from llama_index.core.settings import Settings
|
||||||
from llama_index.core.storage import StorageContext
|
from llama_index.core.storage import StorageContext
|
||||||
from llama_index.core.storage.docstore import SimpleDocumentStore
|
from llama_index.core.storage.docstore import SimpleDocumentStore
|
||||||
@@ -21,12 +21,13 @@ logger = logging.getLogger()
|
|||||||
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
|
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
|
||||||
|
|
||||||
|
|
||||||
def get_doc_store():
|
def get_doc_store(docType:str):
|
||||||
|
|
||||||
# If the storage directory is there, load the document store from it.
|
# If the storage directory is there, load the document store from it.
|
||||||
# If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
|
# If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
|
||||||
if os.path.exists(STORAGE_DIR):
|
storeDir = os.path.join(STORAGE_DIR,docType)
|
||||||
return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
|
if os.path.exists(storeDir):
|
||||||
|
return SimpleDocumentStore.from_persist_dir(storeDir)
|
||||||
else:
|
else:
|
||||||
return SimpleDocumentStore()
|
return SimpleDocumentStore()
|
||||||
|
|
||||||
@@ -34,10 +35,11 @@ def get_doc_store():
|
|||||||
def run_pipeline(docstore, vector_store, documents):
|
def run_pipeline(docstore, vector_store, documents):
|
||||||
pipeline = IngestionPipeline(
|
pipeline = IngestionPipeline(
|
||||||
transformations=[
|
transformations=[
|
||||||
SentenceSplitter(
|
#SentenceSplitter(
|
||||||
chunk_size=Settings.chunk_size,
|
#chunk_size=Settings.chunk_size,
|
||||||
chunk_overlap=Settings.chunk_overlap,
|
#chunk_overlap=Settings.chunk_overlap,
|
||||||
),
|
#),
|
||||||
|
#MarkdownNodeParser(),
|
||||||
Settings.embed_model,
|
Settings.embed_model,
|
||||||
],
|
],
|
||||||
docstore=docstore,
|
docstore=docstore,
|
||||||
@@ -61,8 +63,9 @@ def persist_storage(docstore, vector_store):
|
|||||||
|
|
||||||
def persist_BMRetriever(vector_store):
|
def persist_BMRetriever(vector_store):
|
||||||
STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
|
STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
|
||||||
top_k = int(os.getenv("TOP_K", "3"))
|
nodes = vector_store.get_nodes([])
|
||||||
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes=vector_store.get_nodes([]))
|
top_k = min(int(os.getenv("TOP_K", "3")),len(nodes))
|
||||||
|
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes = nodes)
|
||||||
bmRetriver.persist(STORAGE_DIR)
|
bmRetriver.persist(STORAGE_DIR)
|
||||||
|
|
||||||
|
|
||||||
@@ -71,12 +74,14 @@ def generate_datasource():
|
|||||||
logger.info("Generate index for the provided data")
|
logger.info("Generate index for the provided data")
|
||||||
|
|
||||||
# Get the stores and documents or create new ones
|
# Get the stores and documents or create new ones
|
||||||
documents = get_documents()
|
docTypes = get_document_Types()
|
||||||
|
for docType in docTypes:
|
||||||
|
documents = get_documents(docType)
|
||||||
# Set private=false to mark the document as public (required for filtering)
|
# Set private=false to mark the document as public (required for filtering)
|
||||||
for doc in documents:
|
for doc in documents:
|
||||||
doc.metadata["private"] = "false"
|
doc.metadata["private"] = "false"
|
||||||
docstore = get_doc_store()
|
docstore = get_doc_store(docType)
|
||||||
vector_store = get_vector_store()
|
vector_store = get_vector_store(docType)
|
||||||
|
|
||||||
# Run the ingestion pipeline
|
# Run the ingestion pipeline
|
||||||
_ = run_pipeline(docstore, vector_store, documents)
|
_ = run_pipeline(docstore, vector_store, documents)
|
||||||
|
|||||||
@@ -1,22 +1,15 @@
|
|||||||
import logging
|
import logging
|
||||||
from llama_index.core.indices import VectorStoreIndex
|
from llama_index.core.indices import VectorStoreIndex
|
||||||
from app.engine.vectordb import get_vector_store
|
from app.engine.vectordb import get_vector_store
|
||||||
|
from app.engine.loaders import get_document_Types
|
||||||
|
from typing import Dict,Any
|
||||||
logger = logging.getLogger("uvicorn")
|
logger = logging.getLogger("uvicorn")
|
||||||
|
|
||||||
index = None
|
def get_index(prjFlag:str):
|
||||||
|
if prjFlag is None or prjFlag == '':
|
||||||
def get_index(params=None):
|
raise ValueError('无效的工程标识')
|
||||||
global index
|
|
||||||
if index is None:
|
|
||||||
logger.info("Connecting vector store...")
|
logger.info("Connecting vector store...")
|
||||||
|
store = get_vector_store(prjFlag)
|
||||||
store = get_vector_store()
|
|
||||||
# Load the index from the vector store
|
|
||||||
# If you are using a vector store that doesn't store text,
|
|
||||||
# you must load the index from both the vector store and the document store
|
|
||||||
index = VectorStoreIndex.from_vector_store(store)
|
index = VectorStoreIndex.from_vector_store(store)
|
||||||
logger.info("Finished load index from vector store.")
|
logger.info("Finished load index from vector store.")
|
||||||
|
|
||||||
return index
|
return index
|
||||||
|
|||||||
@@ -3,17 +3,86 @@ import yaml
|
|||||||
from app.engine.loaders.db import DBLoaderConfig, get_db_documents
|
from app.engine.loaders.db import DBLoaderConfig, get_db_documents
|
||||||
from app.engine.loaders.file import FileLoaderConfig, get_file_documents
|
from app.engine.loaders.file import FileLoaderConfig, get_file_documents
|
||||||
from app.engine.loaders.web import WebLoaderConfig, get_web_documents
|
from app.engine.loaders.web import WebLoaderConfig, get_web_documents
|
||||||
|
from app.engine.loaders.file import getProjectName
|
||||||
|
import os
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
def load_configs():
|
def load_configs():
|
||||||
with open("config/loaders.yaml",encoding='UTF-8') as f:
|
with open("config/loaders.yaml",encoding='utf-8') as f:
|
||||||
configs = yaml.safe_load(f)
|
configs = yaml.safe_load(f)
|
||||||
return configs
|
return configs
|
||||||
|
|
||||||
|
def path_difference(path1:str, path2:str):
|
||||||
|
import os
|
||||||
|
path1 = os.path.abspath(path1)
|
||||||
|
path2 = os.path.abspath(path2)
|
||||||
|
|
||||||
def get_documents():
|
path1_parts = path1.split(os.path.sep)
|
||||||
|
path2_parts = path2.split(os.path.sep)
|
||||||
|
|
||||||
|
for i, part in enumerate(path1_parts):
|
||||||
|
if part != path2_parts[i]:
|
||||||
|
break
|
||||||
|
else:
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
pathKey = ''
|
||||||
|
for j in range(i,len(path2_parts)):
|
||||||
|
pathKey+=path2_parts[j] + '_'
|
||||||
|
return pathKey[0:-1]
|
||||||
|
|
||||||
|
def getFileCacahePath():
|
||||||
|
rootPath = 'data'
|
||||||
|
configs = load_configs()
|
||||||
|
if configs is not None and len(configs.items()) > 0:
|
||||||
|
for loader_type, loader_config in configs.items():
|
||||||
|
if loader_type == "file":
|
||||||
|
rootPath = FileLoaderConfig(**loader_config).data_dir
|
||||||
|
break
|
||||||
|
return rootPath
|
||||||
|
|
||||||
|
def get_document_Types():
|
||||||
|
rootPath = getFileCacahePath()
|
||||||
|
types = []
|
||||||
|
dirStack = [rootPath]
|
||||||
|
while len(dirStack) > 0:
|
||||||
|
curDir = dirStack.pop()
|
||||||
|
dirs = [os.path.join(curDir, d) for d in os.listdir(curDir) if os.path.isdir(os.path.join(curDir, d))]
|
||||||
|
if len(dirs) > 0:
|
||||||
|
for dir in dirs:
|
||||||
|
dirStack.append(dir)
|
||||||
|
else:
|
||||||
|
types.append(path_difference(rootPath,curDir))
|
||||||
|
return types
|
||||||
|
|
||||||
|
def getProjectInfos():
|
||||||
|
config = load_configs()
|
||||||
|
if config is None or len(config.items()) == 0:
|
||||||
|
return None
|
||||||
|
|
||||||
|
prjDir = None
|
||||||
|
for loader_type, loader_config in config.items():
|
||||||
|
if loader_config.get('enable', True):
|
||||||
|
loader_config = loader_config or []
|
||||||
|
config = FileLoaderConfig(**loader_config)
|
||||||
|
prjDir = config.data_dir
|
||||||
|
break
|
||||||
|
if prjDir is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
prjInfos = []
|
||||||
|
prjFlags = get_document_Types()
|
||||||
|
for prjFlag in prjFlags:
|
||||||
|
fileDir = os.path.join(config.data_dir,prjFlag.replace('_','\\'))
|
||||||
|
prjInfo = {}
|
||||||
|
prjInfo['flag'] = prjFlag
|
||||||
|
prjInfo['name'] = getProjectName(fileDir)
|
||||||
|
prjInfos.append(prjInfo)
|
||||||
|
return prjInfos
|
||||||
|
|
||||||
|
def get_documents(docType:str):
|
||||||
documents = []
|
documents = []
|
||||||
config = load_configs()
|
config = load_configs()
|
||||||
|
|
||||||
@@ -29,7 +98,7 @@ def get_documents():
|
|||||||
loader_config = loader_config or []
|
loader_config = loader_config or []
|
||||||
match loader_type:
|
match loader_type:
|
||||||
case "file":
|
case "file":
|
||||||
document = get_file_documents(FileLoaderConfig(**loader_config))
|
document = get_file_documents(FileLoaderConfig(**loader_config),docType)
|
||||||
case "web":
|
case "web":
|
||||||
document = get_web_documents(WebLoaderConfig(**loader_config))
|
document = get_web_documents(WebLoaderConfig(**loader_config))
|
||||||
case "db":
|
case "db":
|
||||||
|
|||||||
@@ -6,6 +6,9 @@ from llama_index.core.readers.base import BaseReader
|
|||||||
from llama_index.core.readers.json import JSONReader
|
from llama_index.core.readers.json import JSONReader
|
||||||
from llama_parse import LlamaParse
|
from llama_parse import LlamaParse
|
||||||
from pydantic import BaseModel, validator
|
from pydantic import BaseModel, validator
|
||||||
|
from app.engine.loaders.markdownReader import ChunkMarkdownReader
|
||||||
|
from app.engine.loaders.projectJson import ProjectJson
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -20,7 +23,6 @@ class FileLoaderConfig(BaseModel):
|
|||||||
raise ValueError(f"Directory '{v}' does not exist")
|
raise ValueError(f"Directory '{v}' does not exist")
|
||||||
return v
|
return v
|
||||||
|
|
||||||
|
|
||||||
def llama_parse_parser():
|
def llama_parse_parser():
|
||||||
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
|
if os.getenv("LLAMA_CLOUD_API_KEY") is None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@@ -35,7 +37,6 @@ def llama_parse_parser():
|
|||||||
)
|
)
|
||||||
return parser
|
return parser
|
||||||
|
|
||||||
|
|
||||||
def llama_parse_extractor() -> Dict[str, LlamaParse]:
|
def llama_parse_extractor() -> Dict[str, LlamaParse]:
|
||||||
from llama_parse.utils import SUPPORTED_FILE_TYPES
|
from llama_parse.utils import SUPPORTED_FILE_TYPES
|
||||||
|
|
||||||
@@ -43,10 +44,13 @@ def llama_parse_extractor() -> Dict[str, LlamaParse]:
|
|||||||
return {file_type: parser for file_type in SUPPORTED_FILE_TYPES}
|
return {file_type: parser for file_type in SUPPORTED_FILE_TYPES}
|
||||||
|
|
||||||
def llama_local_extractor() -> Dict[str, BaseReader]:
|
def llama_local_extractor() -> Dict[str, BaseReader]:
|
||||||
return {".json" : JSONReader(clean_json=False,levels_back=0)}
|
parser = {
|
||||||
|
".json" : JSONReader(clean_json=False,levels_back=0),
|
||||||
|
".md" : ChunkMarkdownReader(),
|
||||||
|
}
|
||||||
|
return parser
|
||||||
|
|
||||||
|
def get_file_documents(config: FileLoaderConfig,childPath: str):
|
||||||
def get_file_documents(config: FileLoaderConfig):
|
|
||||||
from llama_index.core.readers import SimpleDirectoryReader
|
from llama_index.core.readers import SimpleDirectoryReader
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -63,7 +67,7 @@ def get_file_documents(config: FileLoaderConfig):
|
|||||||
file_extractor = llama_local_extractor()
|
file_extractor = llama_local_extractor()
|
||||||
|
|
||||||
reader = SimpleDirectoryReader(
|
reader = SimpleDirectoryReader(
|
||||||
config.data_dir,
|
os.path.join(config.data_dir,childPath.replace('_','\\')),
|
||||||
recursive=True,
|
recursive=True,
|
||||||
filename_as_id=True,
|
filename_as_id=True,
|
||||||
raise_on_error=True,
|
raise_on_error=True,
|
||||||
@@ -86,3 +90,32 @@ def get_file_documents(config: FileLoaderConfig):
|
|||||||
else:
|
else:
|
||||||
# Raise the error if it is not the case of empty data dir
|
# Raise the error if it is not the case of empty data dir
|
||||||
raise e
|
raise e
|
||||||
|
|
||||||
|
def prjFileSuffix(dir:str):
|
||||||
|
entries = os.listdir(dir)
|
||||||
|
file_names = [entry for entry in entries if os.path.isfile(os.path.join(dir, entry))]
|
||||||
|
if len(file_names) > 0:
|
||||||
|
return os.path.splitext(file_names[0])[1]
|
||||||
|
return ''
|
||||||
|
|
||||||
|
def getProjectName(dir:str):
|
||||||
|
suffix = prjFileSuffix(dir)
|
||||||
|
if suffix== '.json':
|
||||||
|
prjJson = ProjectJson(dir)
|
||||||
|
prjJson.parse()
|
||||||
|
tb = prjJson.table('工程属性')
|
||||||
|
records = tb.records()
|
||||||
|
for record in records:
|
||||||
|
name = record.value('名称')
|
||||||
|
if name == '工程名称':
|
||||||
|
return record.value('值')
|
||||||
|
elif suffix == '.md':
|
||||||
|
md_files = [f for f in os.listdir(dir) if f.endswith('.md')]
|
||||||
|
for md_file in md_files:
|
||||||
|
prjPath = os.path.join(dir, md_file)
|
||||||
|
basename = os.path.splitext(md_file)[0]
|
||||||
|
if basename =='工程属性':
|
||||||
|
rd = ChunkMarkdownReader()
|
||||||
|
rd.load_data(prjPath)
|
||||||
|
return rd.findValue("名称=='工程名称'",'值')
|
||||||
|
return ''
|
||||||
@@ -0,0 +1,64 @@
|
|||||||
|
from app.engine.loaders.projectJson import *
|
||||||
|
|
||||||
|
class MarkDown:
|
||||||
|
def __init__(self,table:JsonTable,path:str) -> None:
|
||||||
|
self._table = table
|
||||||
|
self._path = path
|
||||||
|
|
||||||
|
def build(self):
|
||||||
|
flds:Dict[str,Field] = self._table.fields()
|
||||||
|
records:List[Record] = self._table.records()
|
||||||
|
columns:list = []
|
||||||
|
colComments:list = []
|
||||||
|
ignores:List[str] = []
|
||||||
|
for name,fld in flds.items():
|
||||||
|
if name =='_id' or name =='nodeType' or name =='relTbId':
|
||||||
|
ignores.append(name)
|
||||||
|
continue
|
||||||
|
|
||||||
|
columns.append(fld.value('name'))
|
||||||
|
colComments.append(fld.value('alias'))
|
||||||
|
|
||||||
|
rowdatas = []
|
||||||
|
for record in records:
|
||||||
|
datas = []
|
||||||
|
for col in columns:
|
||||||
|
if col in ignores:
|
||||||
|
continue
|
||||||
|
txt:str = record.value(col)
|
||||||
|
datas.append(txt.replace('\n'," "))
|
||||||
|
rowdatas.append(datas)
|
||||||
|
|
||||||
|
content = self.convert(self._table.name(),self._table.comment(),columns,colComments,rowdatas)
|
||||||
|
with open(self._path, 'w',encoding='utf-8') as file:
|
||||||
|
file.write(content)
|
||||||
|
|
||||||
|
def convert(self,tableName:str,tableComment:str,columns:list,colComments:list,rowdatas:list):
|
||||||
|
strTitle = "# " + tableName + '\n'
|
||||||
|
if tableName!='':
|
||||||
|
strTitle+= f"备注:{tableComment}" + '\n'
|
||||||
|
|
||||||
|
for i in range(len(columns)):
|
||||||
|
strTitle+= f"- 字段名称:{columns[i]}" + '\n'
|
||||||
|
comment = colComments[i]
|
||||||
|
if comment!='':
|
||||||
|
strTitle+= f" - 备注:{comment}" + '\n'
|
||||||
|
|
||||||
|
markdown_table = "|"
|
||||||
|
# 添加列标题
|
||||||
|
markdown_table += "|".join(columns) + "|\n"
|
||||||
|
# 添加分隔行
|
||||||
|
markdown_table += "|" + "|".join(['---' for _ in columns]) + "|\n"
|
||||||
|
# 遍历每个数据行
|
||||||
|
for row in rowdatas:
|
||||||
|
# 添加数据行
|
||||||
|
markdown_table += "|" + "|".join(row) + "|\n"
|
||||||
|
return strTitle + "\n" + markdown_table
|
||||||
|
|
||||||
|
|
||||||
|
prjSon = ProjectJson('')
|
||||||
|
prjSon.parse()
|
||||||
|
tables = prjSon.tables()
|
||||||
|
for name,table in tables.items():
|
||||||
|
mdObj = MarkDown(table,f'')
|
||||||
|
mdObj.build()
|
||||||
@@ -0,0 +1,89 @@
|
|||||||
|
from llama_index.readers.file.markdown import MarkdownReader
|
||||||
|
from typing import Any, Dict, List, Optional, Tuple
|
||||||
|
import re
|
||||||
|
from llama_index.core.utils import get_tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
class ChunkMarkdownReader(MarkdownReader):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
*args: Any,
|
||||||
|
chunkSize:int = 2048,
|
||||||
|
**kwargs: Any,
|
||||||
|
) -> None:
|
||||||
|
self._chunkSize = chunkSize
|
||||||
|
self._tokenizer = get_tokenizer()
|
||||||
|
self._colheader = ''
|
||||||
|
self._rows = []
|
||||||
|
super().__init__(*args,**kwargs)
|
||||||
|
|
||||||
|
def markdown_to_tups(self, markdown_text: str) -> List[Tuple[Optional[str], str]]:
|
||||||
|
markdown_tups: List[Tuple[Optional[str], str]] = []
|
||||||
|
lines = markdown_text.split("\n")
|
||||||
|
|
||||||
|
strTitle = ''
|
||||||
|
tokensNum:int = 0
|
||||||
|
current_lines = []
|
||||||
|
strheader:str = ''
|
||||||
|
headerSize:int = 0
|
||||||
|
for line in lines:
|
||||||
|
tokensNum += self._token_size(line)
|
||||||
|
if tokensNum > self._chunkSize and len(current_lines) > 0:
|
||||||
|
if len(markdown_tups) == 0:
|
||||||
|
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
||||||
|
else:
|
||||||
|
markdown_tups.append((strheader , "\n".join(current_lines)))
|
||||||
|
tokensNum = headerSize
|
||||||
|
current_lines.clear()
|
||||||
|
current_lines.append(line)
|
||||||
|
if strTitle!='' and strheader!='':
|
||||||
|
self._rows.append(line)
|
||||||
|
|
||||||
|
if line == '\n' or line == '\r':
|
||||||
|
if tokensNum > self._chunkSize:
|
||||||
|
raise ValueError('标题Token数大于chunkSize大小')
|
||||||
|
strTitle = "\n".join(current_lines)
|
||||||
|
#headerSize = headerSize + self._token_size(strTitle)
|
||||||
|
current_lines.clear()
|
||||||
|
|
||||||
|
if line.startswith("|---"):
|
||||||
|
self._colheader = current_lines[0]
|
||||||
|
strheader = "\n".join(current_lines)
|
||||||
|
headerSize= headerSize + self._token_size(strheader)
|
||||||
|
current_lines.clear()
|
||||||
|
|
||||||
|
|
||||||
|
if len(current_lines) > 0:
|
||||||
|
if len(markdown_tups) == 0:
|
||||||
|
markdown_tups.append((strTitle + strheader , "\n".join(current_lines)))
|
||||||
|
else:
|
||||||
|
markdown_tups.append((strheader , "\n".join(current_lines)))
|
||||||
|
|
||||||
|
return [
|
||||||
|
(
|
||||||
|
key if key is None else re.sub(r"#", "", key).strip(),
|
||||||
|
re.sub(r"<.*?>", "", value),
|
||||||
|
)
|
||||||
|
for key, value in markdown_tups
|
||||||
|
]
|
||||||
|
|
||||||
|
def _token_size(self, text: str) -> int:
|
||||||
|
return len(self._tokenizer(text))
|
||||||
|
|
||||||
|
def findValue(self,expression:str,Field:str):
|
||||||
|
cols = self._colheader.split('|')
|
||||||
|
cols = [item for item in cols if item]
|
||||||
|
|
||||||
|
for row in self._rows:
|
||||||
|
rowtrs = row.split('|')
|
||||||
|
rowdatas = [item for item in rowtrs if item and (item!='\r' or item!='\n')]
|
||||||
|
if len(rowdatas) == 0:
|
||||||
|
continue
|
||||||
|
gData = {}
|
||||||
|
for cName,rValue in zip(cols,rowdatas):
|
||||||
|
gData[cName] = rValue
|
||||||
|
if eval(expression,gData):
|
||||||
|
return gData[Field]
|
||||||
|
return ''
|
||||||
|
|
||||||
|
|
||||||
@@ -0,0 +1,94 @@
|
|||||||
|
from typing import Dict,List,Any
|
||||||
|
import json,os
|
||||||
|
|
||||||
|
class Record:
|
||||||
|
def __init__(self,datas:Dict[str,Any]) -> None:
|
||||||
|
self._datas:Dict[str,Any] = datas
|
||||||
|
|
||||||
|
def value(self,key:str):
|
||||||
|
if key in self._datas:
|
||||||
|
return self._datas.get(key)
|
||||||
|
return ''
|
||||||
|
|
||||||
|
class Field:
|
||||||
|
def __init__(self,datas:Dict[str,Any]) -> None:
|
||||||
|
self._datas:Dict[str,Any] = datas
|
||||||
|
|
||||||
|
def value(self,key:str):
|
||||||
|
if key in self._datas:
|
||||||
|
return self._datas.get(key)
|
||||||
|
return ''
|
||||||
|
|
||||||
|
class JsonTable:
|
||||||
|
def __init__(self,filePth:str) -> None:
|
||||||
|
self._filePth = filePth
|
||||||
|
self._fields:Dict[str,Field] = {}
|
||||||
|
self._records:List[Record] = []
|
||||||
|
self._fileName = os.path.splitext(os.path.basename(filePth))[0]
|
||||||
|
self._name = ''
|
||||||
|
self._comment = ''
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
with open(self._filePth, 'r',encoding='utf-8') as file:
|
||||||
|
jsObj = json.load(file)
|
||||||
|
data:dict = jsObj.get('table')
|
||||||
|
self._name = data.get('name')
|
||||||
|
self._comment = data.get('comment')
|
||||||
|
Jsfields = data.get('fields')
|
||||||
|
for jsfiled in Jsfields:
|
||||||
|
field = Field(jsfiled)
|
||||||
|
self._fields[field.value('name')] =field
|
||||||
|
|
||||||
|
JsRecords = data.get('records')
|
||||||
|
for jsRecord in JsRecords:
|
||||||
|
self._records.append(Record(jsRecord))
|
||||||
|
|
||||||
|
def records(self):
|
||||||
|
return self._records
|
||||||
|
|
||||||
|
def fields(self):
|
||||||
|
return self._fields
|
||||||
|
|
||||||
|
def name(self):
|
||||||
|
return self._fileName
|
||||||
|
|
||||||
|
def comment(self):
|
||||||
|
return self._comment
|
||||||
|
|
||||||
|
|
||||||
|
class ProjectJson:
|
||||||
|
def __init__(self,dir:str) -> None:
|
||||||
|
self._dir = dir
|
||||||
|
self._tables:Dict[str,JsonTable] = {}
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
json_files = [f for f in os.listdir(self._dir) if f.endswith('.json')]
|
||||||
|
for json_file in json_files:
|
||||||
|
prjPath = os.path.join(self._dir, json_file)
|
||||||
|
tb = JsonTable(prjPath)
|
||||||
|
tb.parse()
|
||||||
|
basename = os.path.splitext(json_file)[0]
|
||||||
|
self._tables[basename] = tb
|
||||||
|
|
||||||
|
def table(self,tableName:str):
|
||||||
|
return self._tables[tableName]
|
||||||
|
|
||||||
|
def tables(self):
|
||||||
|
return self._tables
|
||||||
|
|
||||||
|
def getProjectName(dir:str):
|
||||||
|
result = dir.split('\\')
|
||||||
|
if len(result) > 0:
|
||||||
|
return result[-1]
|
||||||
|
return "未知工程名称"
|
||||||
|
|
||||||
|
prjJson = ProjectJson(dir)
|
||||||
|
prjJson.parse()
|
||||||
|
tb:JsonTable = prjJson.table('工程属性')
|
||||||
|
records = tb.records()
|
||||||
|
for record in records:
|
||||||
|
name = record.value('名称')
|
||||||
|
if name == '工程名称':
|
||||||
|
return record.value('值')
|
||||||
|
return ''
|
||||||
|
|
||||||
@@ -2,39 +2,31 @@ from llama_index.core import PromptTemplate
|
|||||||
|
|
||||||
text_qa_template_str = (
|
text_qa_template_str = (
|
||||||
"# 角色\n"
|
"# 角色\n"
|
||||||
"你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。"
|
"你是一名擅长博微造价工程数据问答的专家,可以根据电力工程文件中的内容回答用户问题。\n"
|
||||||
"你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答,"
|
"\n"
|
||||||
"如同直接从文件中提取的内容。\n"
|
"# 任务描述:\n"
|
||||||
"知识库中已经导入一个工程的全部数据,请你站在当前工程的角度回答用户关于工程文件的问题。\n"
|
"请仔细阅读所给的文档片段,并根据其内容回答问题。\n"
|
||||||
"例如:询问“此工程”指当前导入的工程。询问“此工程名称”指当前导入的工程的工程名称。\n"
|
"您需要判断文档的内容是否可以回答问题,不要强行回答。如果可以回答,答案必须严格遵循文档内容,即使与事实不符。\n"
|
||||||
|
"如果答案与事实不符,直接给出答案,不要做解释。\n"
|
||||||
"## 技能\n"
|
"\n"
|
||||||
"### 技能 1: 数据查询与提供\n"
|
"# 回答规则:\n"
|
||||||
"- 准确回答所有关于电力工程造价的相关问题。\n"
|
"- 请使用与文档材料相同的语言回答问题。\n"
|
||||||
"- 提供具体数据,如成本估算、材料清单、劳动力需求等。\n"
|
"- 评估文档是否含有足够信息回答问题。无关时不要回答。\n"
|
||||||
"- 确保提供的信息严格基于工程文档中的记录。\n"
|
"- 如果问题能被回答,你的回答必须严格遵循文档内容,即使与事实不符。一定不要做多余解释。\n"
|
||||||
|
"- 如果问题能被回答,直接引用文档的相关信息保证答案准确、完整,并追求简洁。\n"
|
||||||
"### 技能 2: 技术性解释\n"
|
"- 当文档中只有少量信息与问题相关时,重点关注这部分信息,这种情况下一定回答。\n"
|
||||||
"- 解释造价工程中的技术术语和概念。\n"
|
"- 当文档中信息与问题无关时,请不要额外发散回答,只需要回答“我不知道这个问题的答案”。\n"
|
||||||
"- 为复杂的工程细节提供清晰易懂的说明。\n"
|
"\n"
|
||||||
|
"来自多个来源的文档片段如下,请充分理解以下参考资料内容,组织出满足用户提问的条理清晰的回复。\n"
|
||||||
"## 约束\n"
|
|
||||||
"- 仅回答与电力工程造价文件相关的具体问题。\n"
|
|
||||||
"- 不进行任何超出文件内容的猜测或假设。\n"
|
|
||||||
"- 所有回答均基于文件内容,采用客观和技术性的语言。\n"
|
|
||||||
"- 请基于这些信息回答问题。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。\n"
|
|
||||||
"以下为上下文信息\n"
|
|
||||||
"---------------------\n"
|
"---------------------\n"
|
||||||
"{context_str}\n"
|
"{context_str}\n"
|
||||||
"---------------------\n"
|
"---------------------\n"
|
||||||
"请根据上下文信息而非先前知识回答我的问题或回复我的指令。前面的上下文信息可能有用,也可能没用,你需要从我给出的上下文信息中选出与我的问题最相关的那些,来为你的回答提供依据。回答一定要忠于原文,简洁但不丢信息,不要胡乱编造。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。我的问题或指令是什么语种,你就用什么语种回复。\n"
|
"鉴于来自多个来源的文档片段而非先验知识,回答查询。\n"
|
||||||
"如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
|
"如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
|
||||||
|
"Query: {query_str}\n"
|
||||||
"问题:{query_str}\n"
|
"Answer: "
|
||||||
"你的回复: "
|
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
text_qa_template = PromptTemplate(text_qa_template_str)
|
text_qa_template = PromptTemplate(text_qa_template_str)
|
||||||
|
|
||||||
refine_template_str = (
|
refine_template_str = (
|
||||||
@@ -58,31 +50,26 @@ refine_template = PromptTemplate(refine_template_str)
|
|||||||
|
|
||||||
summary_template_str = (
|
summary_template_str = (
|
||||||
"# 角色\n"
|
"# 角色\n"
|
||||||
"你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。"
|
"你是一名擅长博微造价工程数据问答的专家,可以根据电力工程文件中的内容回答用户问题。\n"
|
||||||
"你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答,"
|
"\n"
|
||||||
"如同直接从文件中提取的内容。\n"
|
"# 任务描述:\n"
|
||||||
|
"请仔细阅读所给的文档片段,并根据其内容回答问题。\n"
|
||||||
"## 技能\n"
|
"您需要判断文档的内容是否可以回答问题,不要强行回答。如果可以回答,答案必须严格遵循文档内容,即使与事实不符。\n"
|
||||||
"### 技能 1: 数据查询与提供\n"
|
"如果答案与事实不符,直接给出答案,不要做解释。\n"
|
||||||
"- 准确回答所有关于电力工程造价的相关问题。\n"
|
"\n"
|
||||||
"- 提供具体数据,如成本估算、材料清单、劳动力需求等。\n"
|
"# 回答规则:\n"
|
||||||
"- 确保提供的信息严格基于工程文档中的记录。\n"
|
"- 请使用与文档材料相同的语言回答问题。\n"
|
||||||
|
"- 评估文档是否含有足够信息回答问题。无关时不要回答。\n"
|
||||||
"### 技能 2: 技术性解释\n"
|
"- 如果问题能被回答,你的回答必须严格遵循文档内容,即使与事实不符。一定不要做多余解释。\n"
|
||||||
"- 解释造价工程中的技术术语和概念。\n"
|
"- 如果问题能被回答,直接引用文档的相关信息保证答案准确、完整,并追求简洁。\n"
|
||||||
"- 为复杂的工程细节提供清晰易懂的说明。\n"
|
"- 当文档中只有少量信息与问题相关时,重点关注这部分信息,这种情况下一定回答。\n"
|
||||||
|
"- 当文档中信息与问题无关时,请不要额外发散回答,只需要回答“我不知道这个问题的答案”。\n"
|
||||||
"## 约束\n"
|
"\n"
|
||||||
"- 仅回答与电力工程造价文件相关的具体问题。\n"
|
"来自多个来源的文档片段如下,请充分理解以下参考资料内容,组织出满足用户提问的条理清晰的回复。\n"
|
||||||
"- 不进行任何超出文件内容的猜测或假设。\n"
|
|
||||||
"- 所有回答均基于文件内容,采用客观和技术性的语言。\n"
|
|
||||||
"- 请基于这些信息回答问题。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。\n"
|
|
||||||
"来自多个来源的上下文信息如下。\n"
|
|
||||||
"---------------------\n"
|
"---------------------\n"
|
||||||
"{context_str}\n"
|
"{context_str}\n"
|
||||||
"---------------------\n"
|
"---------------------\n"
|
||||||
"鉴于来自多个来源的信息而非先验知识, "
|
"鉴于来自多个来源的文档片段而非先验知识,回答查询。\n"
|
||||||
"回答查询。\n"
|
|
||||||
"如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
|
"如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
|
||||||
"Query: {query_str}\n"
|
"Query: {query_str}\n"
|
||||||
"Answer: "
|
"Answer: "
|
||||||
@@ -93,3 +80,40 @@ simple_template_str = (
|
|||||||
"{query_str}"
|
"{query_str}"
|
||||||
)
|
)
|
||||||
simple_template = PromptTemplate(simple_template_str)
|
simple_template = PromptTemplate(simple_template_str)
|
||||||
|
|
||||||
|
ReActChatFormatter_messages = (
|
||||||
|
"您的设计旨在帮助完成各种任务,从回答问题到提供其他类型分析的摘要。\n\n"
|
||||||
|
"##工具\n\n"
|
||||||
|
"你可以访问各种工具。你有责任按照你认为合适的顺序使用这些工具来完成当前的任务。\n"
|
||||||
|
"这可能需要将任务分解为子任务,并使用不同的工具来完成每个子任务。\n\n"
|
||||||
|
"你可以访问以下工具:\n"
|
||||||
|
"{tool_desc}\n\n\n"
|
||||||
|
"##输出格式\n\n"
|
||||||
|
"请用与问题相同的语言回答,并使用以下格式:\n\n"
|
||||||
|
"'''\n"
|
||||||
|
"Thought: 用户当前的语言是:(user's language)。我需要使用工具来帮助我回答问题。\n"
|
||||||
|
"Action: 如果使用工具,则为工具名称(one of {tool_names})。\n"
|
||||||
|
"Action Input: 输入给工具的内容,使用JSON格式表示kwargs(例如{{\"input\": \"hello world\", \"num_beams\": 5}})\n"
|
||||||
|
"'''\n\n"
|
||||||
|
"请始终以Thought开始。\n\n"
|
||||||
|
"切勿用Markdown代码标记包围你的响应。如果需要,可以在响应中使用代码标记。\n\n"
|
||||||
|
"请为Action Input使用有效的JSON格式。不要这样做{{\'input\': \'hello world\', \'num_beams\': 5}}。\n\n"
|
||||||
|
"如果使用此格式,用户将以下面的格式进行回应:\n\n"
|
||||||
|
"'''\n"
|
||||||
|
"Observation: 工具响应\n"
|
||||||
|
"'''\n\n"
|
||||||
|
"你应该继续重复上述格式,直到你有足够的信息来回答问题而无需使用更多工具。此时,你必须使用以下两种格式之一进行回答:\n\n"
|
||||||
|
"'''\nThought: 我可以不用任何工具来回答。我将使用用户的语言来回答。\n"
|
||||||
|
"Answer: [你的答案(与用户问题相同的语言)]\n"
|
||||||
|
"'''\n\n"
|
||||||
|
"'''\n"
|
||||||
|
"Thought: 我无法使用提供的工具回答问题。\n"
|
||||||
|
"Answer: [你的答案(与用户问题相同的语言)]\n"
|
||||||
|
"'''\n\n##如果从工具中得到的回应是Empty Response,那么只需要回答“我不知道”,不需要额外回答别的内容。## 当前对话\n\n"
|
||||||
|
"以下是当前对话,由人类和助手的消息交替组成。\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
summary_query_tool_messages = "适用于任何需要进行全面总结、概括的要求。"
|
||||||
|
query_engine_tool_messages = "适用于回答任何问题。"
|
||||||
|
tree_summary_query_engine_tool_messages = "在询问工程中单位的具体数值,例如用量,费率,合计,金额等的时候建议使用本工具。"
|
||||||
|
|||||||
@@ -0,0 +1,70 @@
|
|||||||
|
from typing import Any, List, Optional
|
||||||
|
from llama_index.core.postprocessor import SentenceTransformerRerank
|
||||||
|
from llama_index.core.schema import MetadataMode, NodeWithScore, QueryBundle
|
||||||
|
from llama_index.core.callbacks import CBEventType, EventPayload
|
||||||
|
from llama_index.core.bridge.pydantic import PrivateAttr
|
||||||
|
|
||||||
|
class OllamaRerank(SentenceTransformerRerank):
|
||||||
|
_score_threshold: float = PrivateAttr()
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
top_n: int = 2,
|
||||||
|
model: str = "cross-encoder/stsb-distilroberta-base",
|
||||||
|
device: Optional[str] = None,
|
||||||
|
keep_retrieval_score: Optional[bool] = False,
|
||||||
|
score_threshold:float = 0.3
|
||||||
|
):
|
||||||
|
self._score_threshold = score_threshold
|
||||||
|
super().__init__(top_n,model,device,keep_retrieval_score)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def class_name(cls) -> str:
|
||||||
|
return "OllamaRerank"
|
||||||
|
|
||||||
|
def _postprocess_nodes(
|
||||||
|
self,
|
||||||
|
nodes: List[NodeWithScore],
|
||||||
|
query_bundle: Optional[QueryBundle] = None,
|
||||||
|
) -> List[NodeWithScore]:
|
||||||
|
if query_bundle is None:
|
||||||
|
raise ValueError("Missing query bundle in extra info.")
|
||||||
|
if len(nodes) == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
query_and_nodes = [
|
||||||
|
(
|
||||||
|
query_bundle.query_str,
|
||||||
|
node.node.get_content(metadata_mode=MetadataMode.EMBED),
|
||||||
|
)
|
||||||
|
for node in nodes
|
||||||
|
]
|
||||||
|
|
||||||
|
with self.callback_manager.event(
|
||||||
|
CBEventType.RERANKING,
|
||||||
|
payload={
|
||||||
|
EventPayload.NODES: nodes,
|
||||||
|
EventPayload.MODEL_NAME: self.model,
|
||||||
|
EventPayload.QUERY_STR: query_bundle.query_str,
|
||||||
|
EventPayload.TOP_K: self.top_n,
|
||||||
|
},
|
||||||
|
) as event:
|
||||||
|
scores = self._model.predict(query_and_nodes)
|
||||||
|
|
||||||
|
assert len(scores) == len(nodes)
|
||||||
|
|
||||||
|
for node, score in zip(nodes, scores):
|
||||||
|
if self.keep_retrieval_score:
|
||||||
|
node.node.metadata["retrieval_score"] = node.score
|
||||||
|
node.score = score
|
||||||
|
|
||||||
|
for i in range(len(nodes)-1,-1,-1):
|
||||||
|
node = nodes[i]
|
||||||
|
if node.score < self._score_threshold:
|
||||||
|
nodes.remove(node)
|
||||||
|
|
||||||
|
new_nodes = sorted(nodes, key=lambda x: -x.score if x.score else 0)[
|
||||||
|
: self.top_n
|
||||||
|
]
|
||||||
|
event.on_end(payload={EventPayload.NODES: new_nodes})
|
||||||
|
|
||||||
|
return new_nodes
|
||||||
@@ -0,0 +1,234 @@
|
|||||||
|
from llama_index.core.response_synthesizers.tree_summarize import TreeSummarize
|
||||||
|
from typing import Any, Optional, Sequence,List
|
||||||
|
import asyncio
|
||||||
|
from llama_index.core.callbacks.base import CallbackManager
|
||||||
|
from llama_index.core.indices.prompt_helper import PromptHelper
|
||||||
|
from llama_index.core.prompts import BasePromptTemplate
|
||||||
|
from llama_index.core.service_context import ServiceContext
|
||||||
|
from llama_index.core.service_context_elements.llm_predictor import LLMPredictorType
|
||||||
|
from llama_index.core.types import BaseModel,RESPONSE_TEXT_TYPE
|
||||||
|
from llama_index.core.async_utils import run_async_tasks
|
||||||
|
from llama_index.core.utils import get_tokenizer
|
||||||
|
from llama_index.core.prompts.prompt_utils import get_empty_prompt_txt
|
||||||
|
|
||||||
|
class CustomTreeResponse(TreeSummarize):
|
||||||
|
def __init__(
|
||||||
|
self,
|
||||||
|
llm: Optional[LLMPredictorType] = None,
|
||||||
|
callback_manager: Optional[CallbackManager] = None,
|
||||||
|
prompt_helper: Optional[PromptHelper] = None,
|
||||||
|
summary_template: Optional[BasePromptTemplate] = None,
|
||||||
|
output_cls: Optional[BaseModel] = None,
|
||||||
|
streaming: bool = False,
|
||||||
|
use_async: bool = False,
|
||||||
|
verbose: bool = False,
|
||||||
|
service_context: Optional[ServiceContext] = None,
|
||||||
|
) -> None:
|
||||||
|
self._tokenizer = get_tokenizer()
|
||||||
|
super().__init__(llm,callback_manager,prompt_helper,summary_template,output_cls
|
||||||
|
,streaming,use_async,verbose,service_context)
|
||||||
|
|
||||||
|
async def aget_response(
|
||||||
|
self,
|
||||||
|
query_str: str,
|
||||||
|
text_chunks: Sequence[str],
|
||||||
|
**response_kwargs: Any,
|
||||||
|
) -> RESPONSE_TEXT_TYPE:
|
||||||
|
"""Get tree summarize response."""
|
||||||
|
summary_template = self._summary_template.partial_format(query_str=query_str)
|
||||||
|
|
||||||
|
text_chunks = self.repack(text_chunks=text_chunks)
|
||||||
|
|
||||||
|
if self._verbose:
|
||||||
|
print(f"{len(text_chunks)} text chunks after repacking")
|
||||||
|
|
||||||
|
|
||||||
|
# give final response if there is only one chunk
|
||||||
|
if len(text_chunks) == 1:
|
||||||
|
response: RESPONSE_TEXT_TYPE
|
||||||
|
if self._streaming:
|
||||||
|
response = await self._llm.astream(
|
||||||
|
summary_template, context_str=text_chunks[0], **response_kwargs
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if self._output_cls is None:
|
||||||
|
response = await self._llm.apredict(
|
||||||
|
summary_template,
|
||||||
|
context_str=text_chunks[0],
|
||||||
|
**response_kwargs,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = await self._llm.astructured_predict(
|
||||||
|
self._output_cls,
|
||||||
|
summary_template,
|
||||||
|
context_str=text_chunks[0],
|
||||||
|
**response_kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
# return pydantic object if output_cls is specified
|
||||||
|
return response
|
||||||
|
|
||||||
|
else:
|
||||||
|
# summarize each chunk
|
||||||
|
if self._output_cls is None:
|
||||||
|
tasks = [
|
||||||
|
self._llm.apredict(
|
||||||
|
summary_template,
|
||||||
|
context_str=text_chunk,
|
||||||
|
**response_kwargs,
|
||||||
|
)
|
||||||
|
for text_chunk in text_chunks
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
tasks = [
|
||||||
|
self._llm.astructured_predict(
|
||||||
|
self._output_cls,
|
||||||
|
summary_template,
|
||||||
|
context_str=text_chunk,
|
||||||
|
**response_kwargs,
|
||||||
|
)
|
||||||
|
for text_chunk in text_chunks
|
||||||
|
]
|
||||||
|
|
||||||
|
summary_responses = await asyncio.gather(*tasks)
|
||||||
|
if self._output_cls is not None:
|
||||||
|
summaries = [summary.json() for summary in summary_responses]
|
||||||
|
else:
|
||||||
|
summaries = summary_responses
|
||||||
|
|
||||||
|
# recursively summarize the summaries
|
||||||
|
return await self.aget_response(
|
||||||
|
query_str=query_str,
|
||||||
|
text_chunks=summaries,
|
||||||
|
**response_kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
def get_response(
|
||||||
|
self,
|
||||||
|
query_str: str,
|
||||||
|
text_chunks: Sequence[str],
|
||||||
|
**response_kwargs: Any,
|
||||||
|
) -> RESPONSE_TEXT_TYPE:
|
||||||
|
"""Get tree summarize response."""
|
||||||
|
summary_template = self._summary_template.partial_format(query_str=query_str)
|
||||||
|
text_chunks = self.repack(text_chunks=text_chunks)
|
||||||
|
|
||||||
|
if self._verbose:
|
||||||
|
print(f"{len(text_chunks)} text chunks after repacking")
|
||||||
|
|
||||||
|
# give final response if there is only one chunk
|
||||||
|
if len(text_chunks) == 1:
|
||||||
|
response: RESPONSE_TEXT_TYPE
|
||||||
|
if self._streaming:
|
||||||
|
response = self._llm.stream(
|
||||||
|
summary_template, context_str=text_chunks[0], **response_kwargs
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if self._output_cls is None:
|
||||||
|
response = self._llm.predict(
|
||||||
|
summary_template,
|
||||||
|
context_str=text_chunks[0],
|
||||||
|
**response_kwargs,
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
response = self._llm.structured_predict(
|
||||||
|
self._output_cls,
|
||||||
|
summary_template,
|
||||||
|
context_str=text_chunks[0],
|
||||||
|
**response_kwargs,
|
||||||
|
)
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
|
else:
|
||||||
|
# summarize each chunk
|
||||||
|
if self._use_async:
|
||||||
|
if self._output_cls is None:
|
||||||
|
tasks = [
|
||||||
|
self._llm.apredict(
|
||||||
|
summary_template,
|
||||||
|
context_str=text_chunk,
|
||||||
|
**response_kwargs,
|
||||||
|
)
|
||||||
|
for text_chunk in text_chunks
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
tasks = [
|
||||||
|
self._llm.astructured_predict(
|
||||||
|
self._output_cls,
|
||||||
|
summary_template,
|
||||||
|
context_str=text_chunk,
|
||||||
|
**response_kwargs,
|
||||||
|
)
|
||||||
|
for text_chunk in text_chunks
|
||||||
|
]
|
||||||
|
|
||||||
|
summary_responses = run_async_tasks(tasks)
|
||||||
|
|
||||||
|
if self._output_cls is not None:
|
||||||
|
summaries = [summary.json() for summary in summary_responses]
|
||||||
|
else:
|
||||||
|
summaries = summary_responses
|
||||||
|
else:
|
||||||
|
if self._output_cls is None:
|
||||||
|
summaries = [
|
||||||
|
self._llm.predict(
|
||||||
|
summary_template,
|
||||||
|
context_str=text_chunk,
|
||||||
|
**response_kwargs,
|
||||||
|
)
|
||||||
|
for text_chunk in text_chunks
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
summaries = [
|
||||||
|
self._llm.structured_predict(
|
||||||
|
self._output_cls,
|
||||||
|
summary_template,
|
||||||
|
context_str=text_chunk,
|
||||||
|
**response_kwargs,
|
||||||
|
)
|
||||||
|
for text_chunk in text_chunks
|
||||||
|
]
|
||||||
|
summaries = [summary.json() for summary in summaries]
|
||||||
|
|
||||||
|
# recursively summarize the summaries
|
||||||
|
return self.get_response(
|
||||||
|
query_str=query_str, text_chunks=summaries, **response_kwargs
|
||||||
|
)
|
||||||
|
|
||||||
|
def repack( self,text_chunks: Sequence[str],) ->List[str]:
|
||||||
|
prompt_str = get_empty_prompt_txt(self._summary_template)
|
||||||
|
num_prompt_tokens = self._token_size(prompt_str)
|
||||||
|
avaliableSize = self._get_available_context_size(num_prompt_tokens)
|
||||||
|
ava_chunks = []
|
||||||
|
sumSize = 0
|
||||||
|
results = []
|
||||||
|
for text_chunk in text_chunks:
|
||||||
|
one_chunk_size = self._token_size(text_chunk)
|
||||||
|
if one_chunk_size > avaliableSize:
|
||||||
|
raise ValueError("文本块大小大于可用上下文大小")
|
||||||
|
sumSize = sumSize + one_chunk_size
|
||||||
|
if sumSize > avaliableSize:
|
||||||
|
results.append(self._merge_chunks(ava_chunks))
|
||||||
|
ava_chunks.clear()
|
||||||
|
sumSize = 0
|
||||||
|
ava_chunks.append(text_chunk)
|
||||||
|
if len(ava_chunks) > 0:
|
||||||
|
results.append(self._merge_chunks(ava_chunks))
|
||||||
|
return results
|
||||||
|
|
||||||
|
def _get_available_context_size(self, num_prompt_tokens: int) -> int:
|
||||||
|
llm_metadata = self._llm.metadata
|
||||||
|
context_size_tokens = llm_metadata.context_window - num_prompt_tokens - llm_metadata.num_output
|
||||||
|
if context_size_tokens < 0:
|
||||||
|
raise ValueError(
|
||||||
|
f"Calculated available context size {context_size_tokens} was"
|
||||||
|
" not non-negative."
|
||||||
|
)
|
||||||
|
return context_size_tokens
|
||||||
|
|
||||||
|
def _token_size(self, text: str) -> int:
|
||||||
|
return len(self._tokenizer(text))
|
||||||
|
|
||||||
|
def _merge_chunks(self,ava_chunks:list):
|
||||||
|
return "\n\n".join([c.strip() for c in ava_chunks if c.strip()])
|
||||||
@@ -24,13 +24,15 @@ class HybridRetriever(BaseRetriever):
|
|||||||
self._vecRetriever = vector_index.as_retriever(
|
self._vecRetriever = vector_index.as_retriever(
|
||||||
similarity_top_k=similarity_top_k,filters = filters
|
similarity_top_k=similarity_top_k,filters = filters
|
||||||
)
|
)
|
||||||
|
self._bm25Retriever = None
|
||||||
STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
|
STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
|
||||||
if os.path.exists(STORAGE_DIR) and len(os.listdir(STORAGE_DIR)) > 0:
|
if os.path.exists(STORAGE_DIR) and len(os.listdir(STORAGE_DIR)) > 0:
|
||||||
self._bm25Retriever = CHBM25Retriever.from_persist_dir(STORAGE_DIR)
|
self._bm25Retriever = CHBM25Retriever.from_persist_dir(STORAGE_DIR)
|
||||||
else:
|
else:
|
||||||
bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=similarity_top_k,nodes=self._vector_index.vector_store.get_nodes(None))
|
nodes = self._vector_index.vector_store.get_nodes(None)
|
||||||
bmRetriver.persist(STORAGE_DIR)
|
similarity_top_k = min(len(nodes),similarity_top_k)
|
||||||
|
self._bm25Retriever = CHBM25Retriever.from_defaults(similarity_top_k=similarity_top_k,nodes=nodes)
|
||||||
|
self._bm25Retriever.persist(STORAGE_DIR)
|
||||||
self._alpha = alpha
|
self._alpha = alpha
|
||||||
|
|
||||||
|
|
||||||
@@ -43,6 +45,16 @@ class HybridRetriever(BaseRetriever):
|
|||||||
for node in bmNodes:
|
for node in bmNodes:
|
||||||
bmDic[node.node_id] = node
|
bmDic[node.node_id] = node
|
||||||
|
|
||||||
|
vecScores = [node_with_score.score for node_with_score in vecNodes]
|
||||||
|
bmSores = [node_with_score.score for node_with_score in bmNodes]
|
||||||
|
|
||||||
|
vec_min_score = min(vecScores) if len(vecScores) > 0 else 0
|
||||||
|
vec_max_score = max(vecScores) if len(vecScores) > 0 else 0
|
||||||
|
|
||||||
|
bm_min_score = min(bmSores) if len(bmSores) > 0 else 0
|
||||||
|
bm_max_score = max(bmSores) if len(bmSores) > 0 else 0
|
||||||
|
|
||||||
|
|
||||||
result_tups = []
|
result_tups = []
|
||||||
for i in range(len(vecNodes)):
|
for i in range(len(vecNodes)):
|
||||||
node = vecNodes[i]
|
node = vecNodes[i]
|
||||||
@@ -52,7 +64,11 @@ class HybridRetriever(BaseRetriever):
|
|||||||
bmDic.pop(node.node_id)
|
bmDic.pop(node.node_id)
|
||||||
else:
|
else:
|
||||||
bmScore = 0.0
|
bmScore = 0.0
|
||||||
full_similarity = (self._alpha * node.score) + (
|
|
||||||
|
bmScore = self.normal_score(bmScore,bm_min_score,bm_max_score)
|
||||||
|
vecScore = self.normal_score(node.score,vec_min_score,vec_max_score)
|
||||||
|
|
||||||
|
full_similarity = (self._alpha * vecScore) + (
|
||||||
(1 - self._alpha) * bmScore
|
(1 - self._alpha) * bmScore
|
||||||
)
|
)
|
||||||
result_tups.append((full_similarity, node))
|
result_tups.append((full_similarity, node))
|
||||||
@@ -65,3 +81,9 @@ class HybridRetriever(BaseRetriever):
|
|||||||
for full_score, node in result_tups:
|
for full_score, node in result_tups:
|
||||||
node.score = full_score
|
node.score = full_score
|
||||||
return [n for _, n in result_tups][:self._out_top_k]
|
return [n for _, n in result_tups][:self._out_top_k]
|
||||||
|
|
||||||
|
def normal_score(self,score,min,max):
|
||||||
|
if min == max:
|
||||||
|
return 1.0 if score > 0 else 0.0
|
||||||
|
else:
|
||||||
|
return (score - min) / (max - min)
|
||||||
@@ -5,12 +5,13 @@ from qdrant_client import qdrant_client
|
|||||||
|
|
||||||
qclient = None
|
qclient = None
|
||||||
|
|
||||||
def get_qdrant_vector_store():
|
def get_qdrant_vector_store(docType:str):
|
||||||
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
|
collection_name = docType
|
||||||
vector_store_path = os.getenv("VECTOR_STORE_PATH")
|
vector_store_path = os.getenv("VECTOR_STORE_PATH")
|
||||||
host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"),
|
host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"),
|
||||||
port=int(os.getenv("VECTOR_STORE_PORT", "6333")),
|
port=int(os.getenv("VECTOR_STORE_PORT", "6333")),
|
||||||
|
|
||||||
|
vector_store_path =os.path.join(vector_store_path,docType)
|
||||||
if not vector_store_path or not host:
|
if not vector_store_path or not host:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
|
"Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
|
||||||
@@ -32,9 +33,9 @@ def get_qdrant_vector_store():
|
|||||||
vector_store = QdrantVectorStore(client=qclient, collection_name=collection_name)
|
vector_store = QdrantVectorStore(client=qclient, collection_name=collection_name)
|
||||||
return vector_store
|
return vector_store
|
||||||
|
|
||||||
def get_chroma_vector_store():
|
def get_chroma_vector_store(docType:str):
|
||||||
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
|
collection_name = docType
|
||||||
vector_store_path = os.getenv("VECTOR_STORE_PATH")
|
vector_store_path =os.path.join(os.getenv("VECTOR_STORE_PATH"),docType)
|
||||||
# if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path
|
# if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path
|
||||||
# otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
|
# otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
|
||||||
if vector_store_path:
|
if vector_store_path:
|
||||||
@@ -55,16 +56,16 @@ def get_chroma_vector_store():
|
|||||||
)
|
)
|
||||||
return store
|
return store
|
||||||
|
|
||||||
def get_vector_store():
|
def get_vector_store(docType:str):
|
||||||
store_type=os.getenv("VECTOR_STORE_TYPE")
|
store_type=os.getenv("VECTOR_STORE_TYPE")
|
||||||
|
|
||||||
store = None
|
store = None
|
||||||
|
|
||||||
match store_type:
|
match store_type:
|
||||||
case "chroma":
|
case "chroma":
|
||||||
store = get_chroma_vector_store()
|
store = get_chroma_vector_store(docType)
|
||||||
case "qdrant":
|
case "qdrant":
|
||||||
store = get_qdrant_vector_store()
|
store = get_qdrant_vector_store(docType)
|
||||||
case _:
|
case _:
|
||||||
raise ValueError(f"Invalid vector store type: {store_type}")
|
raise ValueError(f"Invalid vector store type: {store_type}")
|
||||||
|
|
||||||
|
|||||||
+215
-114
@@ -1,18 +1,114 @@
|
|||||||
import os
|
import os
|
||||||
from typing import Dict
|
from typing import Dict
|
||||||
|
from abc import abstractmethod
|
||||||
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||||
from llama_index.core.settings import Settings
|
from llama_index.core.settings import Settings
|
||||||
from app.xinference.base import XinferenceEmbedding, XinferenceRerank
|
from llama_index.embeddings.xinference import XinferenceEmbedding
|
||||||
from llama_index.llms.xinference import Xinference
|
from llama_index.llms.xinference import Xinference
|
||||||
|
#from llama_index.embeddings.xinference import XinferenceEmbedding
|
||||||
from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP
|
from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP
|
||||||
|
from llama_index.postprocessor.xinference_rerank import XinferenceRerank
|
||||||
|
|
||||||
|
from app.engine.loaders import getProjectInfos
|
||||||
|
from app.api.routers.request.base import ProjectInfo
|
||||||
|
from modelProvide.customDashScope import CustomDashScope
|
||||||
|
from util.register import *
|
||||||
|
from llama_index.core.callbacks import CallbackManager
|
||||||
|
|
||||||
|
|
||||||
def get_node_postprocessors():
|
ModelPlateCategory = '模型平台'
|
||||||
rerank_enabled = os.getenv("RERANK_ENABLED").title()
|
|
||||||
if rerank_enabled is None or rerank_enabled == 'False':
|
|
||||||
return []
|
|
||||||
|
|
||||||
|
def init_settings():
|
||||||
|
model_provider = os.getenv("MODEL_PROVIDER")
|
||||||
|
modelPaltCls:ModelPlatform = ClsRegister.get(ModelPlateCategory,model_provider)
|
||||||
|
if modelPaltCls is not None:
|
||||||
|
modelPalt:ModelPlatform = modelPaltCls()
|
||||||
|
Settings.llm = modelPalt.model()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid model provider: {model_provider}")
|
||||||
|
|
||||||
|
embedding_provider = os.getenv("EMBEDDING_PROVIDER")
|
||||||
|
modelPaltCls:ModelPlatform = ClsRegister.get(ModelPlateCategory,embedding_provider)
|
||||||
|
if modelPalt is not None:
|
||||||
|
modelPalt:ModelPlatform = modelPaltCls()
|
||||||
|
Settings.embed_model = modelPalt.embedding()
|
||||||
|
else:
|
||||||
|
raise ValueError(f"Invalid embedding provider: {embedding_provider}")
|
||||||
|
|
||||||
|
Settings.llm.callback_manager = CallbackManager()
|
||||||
|
Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
|
||||||
|
Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
|
||||||
|
|
||||||
|
class ModelPlatform:
|
||||||
|
@abstractmethod
|
||||||
|
def model(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def embedding(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def rerank(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@register(ModelPlateCategory,'ollama')
|
||||||
|
class OllamaPlatform(ModelPlatform):
|
||||||
|
def model(self):
|
||||||
|
from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama
|
||||||
|
|
||||||
|
base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
|
||||||
|
request_timeout = float(
|
||||||
|
os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT)
|
||||||
|
)
|
||||||
|
Settings.llm = Ollama(
|
||||||
|
base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout
|
||||||
|
)
|
||||||
|
pass
|
||||||
|
|
||||||
|
def embedding(self):
|
||||||
|
#from llama_index.embeddings.ollama import OllamaEmbedding
|
||||||
|
# base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
|
||||||
|
# Settings.embed_model = OllamaEmbedding(
|
||||||
|
# base_url=base_url,
|
||||||
|
# model_name=os.getenv("EMBEDDING_MODEL"),
|
||||||
|
# )
|
||||||
|
pass
|
||||||
|
|
||||||
|
def rerank(self):
|
||||||
|
from app.engine.rerank.ollamRerank import OllamaRerank
|
||||||
|
modelpath = os.getcwd() + os.getenv('RERANK_MODEL')
|
||||||
|
top_n = os.getenv('RERANK_TOP_N',5)
|
||||||
|
threshold = float(os.getenv('RERANK_THRESHOLD',0.3))
|
||||||
|
rerank = OllamaRerank(
|
||||||
|
model=modelpath,
|
||||||
|
top_n=top_n,
|
||||||
|
device="cpu",
|
||||||
|
score_threshold= threshold
|
||||||
|
)
|
||||||
|
return [rerank]
|
||||||
|
|
||||||
|
|
||||||
|
@register(ModelPlateCategory,'xinference')
|
||||||
|
class XinferencePlatform(ModelPlatform):
|
||||||
|
def model(self):
|
||||||
|
base_url = os.getenv("BASE_URL")
|
||||||
|
model = os.getenv("MODEL")
|
||||||
|
max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None
|
||||||
|
temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP))
|
||||||
|
return Xinference(model, base_url, temperature, max_tokens)
|
||||||
|
|
||||||
|
def embedding(self):
|
||||||
|
base_url = os.getenv("BASE_URL")
|
||||||
|
embedding_base_url = os.getenv("EMBEDDING_BASE_URL")
|
||||||
|
embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url
|
||||||
|
|
||||||
|
embed_model_name = os.getenv("EMBEDDING_MODEL")
|
||||||
|
dimensions = os.getenv("EMBEDDING_DIM")
|
||||||
|
dimensions = int(dimensions) if dimensions is not None else None
|
||||||
|
return XinferenceEmbedding(embed_model_name, embedding_base_url)
|
||||||
|
|
||||||
|
def rerank(self):
|
||||||
rerank_model = os.getenv("RERANK_MODEL")
|
rerank_model = os.getenv("RERANK_MODEL")
|
||||||
rerank_url = os.getenv("RERANK_BASE_URL")
|
rerank_url = os.getenv("RERANK_BASE_URL")
|
||||||
rerank_top_n = os.getenv("RERANK_TOP_N")
|
rerank_top_n = os.getenv("RERANK_TOP_N")
|
||||||
@@ -22,73 +118,10 @@ def get_node_postprocessors():
|
|||||||
postprocess = [XinferenceRerank(rerank_model, rerank_url, top_n=rerank_top_n, threshold=rerank_threshold)]
|
postprocess = [XinferenceRerank(rerank_model, rerank_url, top_n=rerank_top_n, threshold=rerank_threshold)]
|
||||||
return postprocess
|
return postprocess
|
||||||
|
|
||||||
def init_settings():
|
@register(ModelPlateCategory,'openai')
|
||||||
model_provider = os.getenv("MODEL_PROVIDER")
|
class OpenAIPlatform(ModelPlatform):
|
||||||
match model_provider:
|
def model(self):
|
||||||
case "openai":
|
|
||||||
init_openai()
|
|
||||||
case "dashscope":
|
|
||||||
init_dashscope()
|
|
||||||
case "groq":
|
|
||||||
init_groq()
|
|
||||||
case "ollama":
|
|
||||||
init_ollama()
|
|
||||||
case "anthropic":
|
|
||||||
init_anthropic()
|
|
||||||
case "gemini":
|
|
||||||
init_gemini()
|
|
||||||
case "mistral":
|
|
||||||
init_mistral()
|
|
||||||
case "azure-openai":
|
|
||||||
init_azure_openai()
|
|
||||||
case "t-systems":
|
|
||||||
from .llmhub import init_llmhub
|
|
||||||
init_llmhub()
|
|
||||||
case "xinference":
|
|
||||||
init_xinference()
|
|
||||||
case _:
|
|
||||||
raise ValueError(f"Invalid model provider: {model_provider}")
|
|
||||||
|
|
||||||
Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
|
|
||||||
Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
|
|
||||||
|
|
||||||
|
|
||||||
def init_ollama():
|
|
||||||
# from llama_index.embeddings.ollama import OllamaEmbedding
|
|
||||||
# from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama
|
|
||||||
#
|
|
||||||
# base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
|
|
||||||
# request_timeout = float(
|
|
||||||
# os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT)
|
|
||||||
# )
|
|
||||||
# Settings.embed_model = OllamaEmbedding(
|
|
||||||
# base_url=base_url,
|
|
||||||
# model_name=os.getenv("EMBEDDING_MODEL"),
|
|
||||||
# )
|
|
||||||
# Settings.llm = Ollama(
|
|
||||||
# base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout
|
|
||||||
# )
|
|
||||||
pass
|
|
||||||
|
|
||||||
def init_xinference():
|
|
||||||
base_url = os.getenv("BASE_URL")
|
|
||||||
model = os.getenv("MODEL")
|
|
||||||
max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None
|
|
||||||
temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP))
|
|
||||||
|
|
||||||
Settings.llm = Xinference(model, base_url, temperature, max_tokens)
|
|
||||||
|
|
||||||
embedding_base_url = os.getenv("EMBEDDING_BASE_URL")
|
|
||||||
embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url
|
|
||||||
|
|
||||||
embed_model_name = os.getenv("EMBEDDING_MODEL")
|
|
||||||
dimensions = os.getenv("EMBEDDING_DIM")
|
|
||||||
dimensions = int(dimensions) if dimensions is not None else None
|
|
||||||
Settings.embed_model = XinferenceEmbedding(embed_model_name, embedding_base_url, dimensions=dimensions)
|
|
||||||
|
|
||||||
def init_openai():
|
|
||||||
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||||
from llama_index.embeddings.openai import OpenAIEmbedding
|
|
||||||
from llama_index.llms.openai import OpenAI
|
from llama_index.llms.openai import OpenAI
|
||||||
|
|
||||||
max_tokens = os.getenv("LLM_MAX_TOKENS")
|
max_tokens = os.getenv("LLM_MAX_TOKENS")
|
||||||
@@ -97,39 +130,41 @@ def init_openai():
|
|||||||
"temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
|
"temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
|
||||||
"max_tokens": int(max_tokens) if max_tokens is not None else None,
|
"max_tokens": int(max_tokens) if max_tokens is not None else None,
|
||||||
}
|
}
|
||||||
Settings.llm = OpenAI(**config)
|
return OpenAI(**config)
|
||||||
|
|
||||||
|
def embedding(self):
|
||||||
|
from llama_index.embeddings.openai import OpenAIEmbedding
|
||||||
dimensions = os.getenv("EMBEDDING_DIM")
|
dimensions = os.getenv("EMBEDDING_DIM")
|
||||||
config = {
|
config = {
|
||||||
"model": os.getenv("EMBEDDING_MODEL"),
|
"model": os.getenv("EMBEDDING_MODEL"),
|
||||||
"dimensions": int(dimensions) if dimensions is not None else None,
|
"dimensions": int(dimensions) if dimensions is not None else None,
|
||||||
}
|
}
|
||||||
Settings.embed_model = OpenAIEmbedding(**config)
|
return OpenAIEmbedding(**config)
|
||||||
|
|
||||||
def init_dashscope():
|
def rerank(self):
|
||||||
from llama_index.llms.dashscope import DashScope,DashScopeGenerationModels
|
pass
|
||||||
from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeBatchTextEmbeddingModels,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels
|
|
||||||
|
|
||||||
max_tokens = os.getenv("LLM_MAX_TOKENS")
|
@register(ModelPlateCategory,'dashscope')
|
||||||
config = {
|
class DashscopePlatform(ModelPlatform):
|
||||||
"model": os.getenv("MODEL"),
|
def model(self):
|
||||||
"temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
|
apikey = os.getenv('DASHSCOPE_API_KEY')
|
||||||
"max_tokens": int(max_tokens) if max_tokens is not None else None,
|
modelName = os.getenv('MODEL')
|
||||||
}
|
return CustomDashScope(model_name=modelName,api_key = apikey)
|
||||||
Settings.llm = llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX)
|
|
||||||
|
|
||||||
dimensions = os.getenv("EMBEDDING_DIM")
|
def embedding(self):
|
||||||
config = {
|
from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels
|
||||||
"model": os.getenv("EMBEDDING_MODEL"),
|
api_key = os.getenv('DASHSCOPE_API_KEY')
|
||||||
"dimensions": int(dimensions) if dimensions is not None else None,
|
modelName = os.getenv('EMBEDDING_MODEL')
|
||||||
}
|
return DashScopeEmbedding(model_name=modelName,
|
||||||
Settings.embed_model = DashScopeEmbedding(model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
|
text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY,api_key = api_key)
|
||||||
text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY)
|
|
||||||
|
|
||||||
|
def rerank(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def init_azure_openai():
|
@register(ModelPlateCategory,'azure-openai')
|
||||||
|
class AzureOpenaiPlatform(ModelPlatform):
|
||||||
|
def model(self):
|
||||||
# from llama_index.core.constants import DEFAULT_TEMPERATURE
|
# from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||||
# from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
|
|
||||||
# from llama_index.llms.azure_openai import AzureOpenAI
|
# from llama_index.llms.azure_openai import AzureOpenAI
|
||||||
#
|
#
|
||||||
# llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"]
|
# llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"]
|
||||||
@@ -145,15 +180,32 @@ def init_azure_openai():
|
|||||||
# or os.getenv("OPENAI_API_VERSION"),
|
# or os.getenv("OPENAI_API_VERSION"),
|
||||||
# }
|
# }
|
||||||
#
|
#
|
||||||
# Settings.llm = AzureOpenAI(
|
# return AzureOpenAI(
|
||||||
# model=os.getenv("MODEL"),
|
# model=os.getenv("MODEL"),
|
||||||
# max_tokens=int(max_tokens) if max_tokens is not None else None,
|
# max_tokens=int(max_tokens) if max_tokens is not None else None,
|
||||||
# temperature=float(temperature),
|
# temperature=float(temperature),
|
||||||
# deployment_name=llm_deployment,
|
# deployment_name=llm_deployment,
|
||||||
# **azure_config,
|
# **azure_config,
|
||||||
# )
|
# )
|
||||||
|
pass
|
||||||
|
|
||||||
|
def embedding(self):
|
||||||
|
# from llama_index.core.constants import DEFAULT_TEMPERATURE
|
||||||
|
# from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
|
||||||
#
|
#
|
||||||
# Settings.embed_model = AzureOpenAIEmbedding(
|
# llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"]
|
||||||
|
# embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]
|
||||||
|
# max_tokens = os.getenv("LLM_MAX_TOKENS")
|
||||||
|
# temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
|
||||||
|
# dimensions = os.getenv("EMBEDDING_DIM")
|
||||||
|
#
|
||||||
|
# azure_config = {
|
||||||
|
# "api_key": os.environ["AZURE_OPENAI_KEY"],
|
||||||
|
# "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
|
||||||
|
# "api_version": os.getenv("AZURE_OPENAI_API_VERSION")
|
||||||
|
# or os.getenv("OPENAI_API_VERSION"),
|
||||||
|
# }
|
||||||
|
# return AzureOpenAIEmbedding(
|
||||||
# model=os.getenv("EMBEDDING_MODEL"),
|
# model=os.getenv("EMBEDDING_MODEL"),
|
||||||
# dimensions=int(dimensions) if dimensions is not None else None,
|
# dimensions=int(dimensions) if dimensions is not None else None,
|
||||||
# deployment_name=embedding_deployment,
|
# deployment_name=embedding_deployment,
|
||||||
@@ -161,11 +213,17 @@ def init_azure_openai():
|
|||||||
# )
|
# )
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def rerank(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def init_fastembed():
|
@register(ModelPlateCategory,'fastembed')
|
||||||
"""
|
class FastembedPlatform(ModelPlatform):
|
||||||
Use Qdrant Fastembed as the local embedding provider.
|
@abstractmethod
|
||||||
"""
|
def model(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def embedding(self):
|
||||||
# from llama_index.embeddings.fastembed import FastEmbedEmbedding
|
# from llama_index.embeddings.fastembed import FastEmbedEmbedding
|
||||||
#
|
#
|
||||||
# embed_model_map: Dict[str, str] = {
|
# embed_model_map: Dict[str, str] = {
|
||||||
@@ -181,8 +239,14 @@ def init_fastembed():
|
|||||||
# )
|
# )
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def rerank(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def init_groq():
|
@register(ModelPlateCategory,'groq')
|
||||||
|
class GroqPlatform(ModelPlatform):
|
||||||
|
@abstractmethod
|
||||||
|
def model(self):
|
||||||
# from llama_index.llms.groq import Groq
|
# from llama_index.llms.groq import Groq
|
||||||
#
|
#
|
||||||
# model_map: Dict[str, str] = {
|
# model_map: Dict[str, str] = {
|
||||||
@@ -196,8 +260,17 @@ def init_groq():
|
|||||||
# init_fastembed()
|
# init_fastembed()
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
@abstractmethod
|
||||||
|
def embedding(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def init_anthropic():
|
@abstractmethod
|
||||||
|
def rerank(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@register(ModelPlateCategory,'anthropic')
|
||||||
|
class AnthropicPlatform(ModelPlatform):
|
||||||
|
def model(self):
|
||||||
# from llama_index.llms.anthropic import Anthropic
|
# from llama_index.llms.anthropic import Anthropic
|
||||||
#
|
#
|
||||||
# model_map: Dict[str, str] = {
|
# model_map: Dict[str, str] = {
|
||||||
@@ -213,22 +286,50 @@ def init_anthropic():
|
|||||||
# init_fastembed()
|
# init_fastembed()
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def embedding(self):
|
||||||
|
pass
|
||||||
|
|
||||||
def init_gemini():
|
def rerank(self):
|
||||||
# from llama_index.embeddings.gemini import GeminiEmbedding
|
pass
|
||||||
|
|
||||||
|
@register(ModelPlateCategory,'gemini')
|
||||||
|
class GeminiPlatform(ModelPlatform):
|
||||||
|
def model(self):
|
||||||
# from llama_index.llms.gemini import Gemini
|
# from llama_index.llms.gemini import Gemini
|
||||||
#
|
|
||||||
# model_name = f"models/{os.getenv('MODEL')}"
|
# model_name = f"models/{os.getenv('MODEL')}"
|
||||||
# embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}"
|
# return Gemini(model=model_name)
|
||||||
#
|
|
||||||
# Settings.llm = Gemini(model=model_name)
|
|
||||||
# Settings.embed_model = GeminiEmbedding(model_name=embed_model_name)
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def init_mistral():
|
def embedding(self):
|
||||||
# from llama_index.embeddings.mistralai import MistralAIEmbedding
|
# from llama_index.embeddings.gemini import GeminiEmbedding
|
||||||
# from llama_index.llms.mistralai import MistralAI
|
# embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}"
|
||||||
#
|
# return GeminiEmbedding(model_name=embed_model_name)
|
||||||
# Settings.llm = MistralAI(model=os.getenv("MODEL"))
|
|
||||||
# Settings.embed_model = MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def rerank(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
@register(ModelPlateCategory,'mistral')
|
||||||
|
class MistralPlatform(ModelPlatform):
|
||||||
|
def model(self):
|
||||||
|
# from llama_index.llms.mistralai import MistralAI
|
||||||
|
# return MistralAI(model=os.getenv("MODEL"))
|
||||||
|
pass
|
||||||
|
|
||||||
|
def embedding(self):
|
||||||
|
# from llama_index.embeddings.mistralai import MistralAIEmbedding
|
||||||
|
# return MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
|
||||||
|
pass
|
||||||
|
|
||||||
|
def rerank(self):
|
||||||
|
pass
|
||||||
|
|
||||||
|
def init_ProjectInfo():
|
||||||
|
prjObj = ProjectInfo()
|
||||||
|
prjInfos:list[tuple] = getProjectInfos()
|
||||||
|
for prjInfo in prjInfos:
|
||||||
|
prjObj.add(prjInfo['name'],prjInfo['flag'])
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,272 +0,0 @@
|
|||||||
"""Xinference embeddings file."""
|
|
||||||
|
|
||||||
import logging
|
|
||||||
from enum import Enum
|
|
||||||
from http import HTTPStatus
|
|
||||||
from typing import Any, Dict, List, Optional, Union, Tuple
|
|
||||||
|
|
||||||
from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding, dispatcher
|
|
||||||
from llama_index.core.bridge.pydantic import PrivateAttr
|
|
||||||
from llama_index.core.callbacks import CBEventType, EventPayload
|
|
||||||
from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding
|
|
||||||
from llama_index.core.instrumentation.events.rerank import ReRankStartEvent, ReRankEndEvent
|
|
||||||
from llama_index.core.postprocessor.types import BaseNodePostprocessor
|
|
||||||
from llama_index.core.schema import ImageType, NodeWithScore, QueryBundle
|
|
||||||
from pydantic import Field
|
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
|
||||||
|
|
||||||
|
|
||||||
EMBED_MAX_INPUT_LENGTH = 2048
|
|
||||||
EMBED_MAX_BATCH_SIZE = 1
|
|
||||||
|
|
||||||
|
|
||||||
class XinferenceEmbedding(BaseEmbedding):
|
|
||||||
"""Xinference class for text embedding.
|
|
||||||
|
|
||||||
"""
|
|
||||||
model_description: Dict[str, Any] = Field(
|
|
||||||
description="The model description from Xinference."
|
|
||||||
)
|
|
||||||
_generator: Any = PrivateAttr()
|
|
||||||
_model_uid: str = Field(description="The Xinference model to use.")
|
|
||||||
_endpoint: str = Field(description="The Xinference endpoint URL to use.")
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
model_uid: str,
|
|
||||||
endpoint: str,
|
|
||||||
embed_batch_size: int = EMBED_MAX_BATCH_SIZE,
|
|
||||||
dimensions: Optional[int] = None,
|
|
||||||
additional_kwargs: Optional[Dict[str, Any]] = None,
|
|
||||||
api_key: Optional[str] = None,
|
|
||||||
api_base: Optional[str] = None,
|
|
||||||
api_version: Optional[str] = None,
|
|
||||||
max_retries: int = 10,
|
|
||||||
# timeout: float = 60.0,
|
|
||||||
# reuse_client: bool = True,
|
|
||||||
# callback_manager: Optional[CallbackManager] = None,
|
|
||||||
# default_headers: Optional[Dict[str, str]] = None,
|
|
||||||
# http_client: Optional[httpx.Client] = None,
|
|
||||||
# async_http_client: Optional[httpx.AsyncClient] = None,
|
|
||||||
# num_workers: Optional[int] = None,
|
|
||||||
**kwargs: Any,
|
|
||||||
) -> None:
|
|
||||||
generator, model_description, embed_batch_size, dimensions = self.load_model(
|
|
||||||
model_uid, endpoint
|
|
||||||
)
|
|
||||||
self._generator = generator
|
|
||||||
#self._model_uid = model_uid
|
|
||||||
#self._endpoint = endpoint
|
|
||||||
super().__init__(
|
|
||||||
embed_batch_size=embed_batch_size,
|
|
||||||
dimensions=dimensions,
|
|
||||||
#callback_manager=callback_manager,
|
|
||||||
model_name=model_uid,
|
|
||||||
additional_kwargs=additional_kwargs,
|
|
||||||
api_key=api_key,
|
|
||||||
api_base=api_base,
|
|
||||||
api_version=api_version,
|
|
||||||
max_retries=max_retries,
|
|
||||||
# reuse_client=reuse_client,
|
|
||||||
# timeout=timeout,
|
|
||||||
# default_headers=default_headers,
|
|
||||||
# num_workers=num_workers,
|
|
||||||
**kwargs,
|
|
||||||
)
|
|
||||||
|
|
||||||
def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
|
|
||||||
try:
|
|
||||||
from xinference.client import RESTfulClient
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError(
|
|
||||||
"Could not import Xinference library."
|
|
||||||
'Please install Xinference with `pip install "xinference[all]"`'
|
|
||||||
)
|
|
||||||
|
|
||||||
client = RESTfulClient(endpoint)
|
|
||||||
|
|
||||||
try:
|
|
||||||
assert isinstance(client, RESTfulClient)
|
|
||||||
except AssertionError:
|
|
||||||
raise RuntimeError(
|
|
||||||
"Could not create RESTfulClient instance."
|
|
||||||
"Please make sure Xinference endpoint is running at the correct port."
|
|
||||||
)
|
|
||||||
|
|
||||||
generator = client.get_model(model_uid)
|
|
||||||
model_description = client.list_models()[model_uid]
|
|
||||||
|
|
||||||
try:
|
|
||||||
assert generator is not None
|
|
||||||
assert model_description is not None
|
|
||||||
except AssertionError:
|
|
||||||
raise RuntimeError(
|
|
||||||
"Could not get model from endpoint."
|
|
||||||
"Please make sure Xinference endpoint is running at the correct port."
|
|
||||||
)
|
|
||||||
|
|
||||||
model = model_description["model_name"]
|
|
||||||
replica = model_description['replica']
|
|
||||||
dimensions = model_description['dimensions']
|
|
||||||
max_tokens = model_description['max_tokens']
|
|
||||||
|
|
||||||
return generator, model_description, replica, dimensions
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def class_name(cls) -> str:
|
|
||||||
return "XinferenceEmbedding"
|
|
||||||
|
|
||||||
def _get_text_embedding(self, text: str) -> Embedding:
|
|
||||||
"""
|
|
||||||
Embed the input text synchronously.
|
|
||||||
|
|
||||||
Subclasses should implement this method. Reference get_text_embedding's
|
|
||||||
docstring for more information.
|
|
||||||
"""
|
|
||||||
assert self._generator is not None
|
|
||||||
|
|
||||||
response = self._generator.create_embedding(input=text)
|
|
||||||
return response['data'][0]['embedding']
|
|
||||||
|
|
||||||
def _get_query_embedding(self, query: str) -> Embedding:
|
|
||||||
"""
|
|
||||||
Embed the input query synchronously.
|
|
||||||
|
|
||||||
Subclasses should implement this method. Reference get_query_embedding's
|
|
||||||
docstring for more information.
|
|
||||||
"""
|
|
||||||
return self._get_text_embedding(query)
|
|
||||||
|
|
||||||
async def _aget_query_embedding(self, query: str) -> Embedding:
|
|
||||||
"""
|
|
||||||
Embed the input query asynchronously.
|
|
||||||
|
|
||||||
Subclasses should implement this method. Reference get_query_embedding's
|
|
||||||
docstring for more information.
|
|
||||||
"""
|
|
||||||
return self._get_query_embedding(query)
|
|
||||||
|
|
||||||
class XinferenceRerank(BaseNodePostprocessor):
|
|
||||||
"""Xinference class for rerank.
|
|
||||||
|
|
||||||
"""
|
|
||||||
model_description: Dict[str, Any] = Field(
|
|
||||||
description="The model description from Xinference."
|
|
||||||
)
|
|
||||||
_generator: Any = PrivateAttr()
|
|
||||||
_model_uid: str = Field(description="The Xinference model to use.")
|
|
||||||
_endpoint: str = Field(description="The Xinference endpoint URL to use.")
|
|
||||||
model: str = Field(description="Dashscope rerank model name.")
|
|
||||||
top_n: int = Field(description="Top N nodes to return.")
|
|
||||||
threshold: float = Field(description="threshold nodes to return.")
|
|
||||||
|
|
||||||
def __init__(
|
|
||||||
self,
|
|
||||||
model_uid: str,
|
|
||||||
endpoint: str,
|
|
||||||
top_n: int = None,
|
|
||||||
threshold: float = None,
|
|
||||||
return_documents: bool = False
|
|
||||||
):
|
|
||||||
_model_uid = model_uid
|
|
||||||
_endpoint = endpoint
|
|
||||||
_op_n = top_n
|
|
||||||
threshold = threshold
|
|
||||||
generator, model_description = self.load_model(
|
|
||||||
model_uid, endpoint
|
|
||||||
)
|
|
||||||
self._generator = generator
|
|
||||||
super().__init__(top_n=top_n, model=model_uid, model_uid=model_uid, threshold = threshold, return_documents=return_documents)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def class_name(cls) -> str:
|
|
||||||
return "XinferenceRerank"
|
|
||||||
|
|
||||||
def _postprocess_nodes(
|
|
||||||
self,
|
|
||||||
nodes: List[NodeWithScore],
|
|
||||||
query_bundle: Optional[QueryBundle] = None,
|
|
||||||
) -> List[NodeWithScore]:
|
|
||||||
if query_bundle is None:
|
|
||||||
raise ValueError("Missing query bundle in extra info.")
|
|
||||||
if len(nodes) == 0:
|
|
||||||
return []
|
|
||||||
|
|
||||||
dispatcher.event(
|
|
||||||
ReRankStartEvent(
|
|
||||||
nodes = nodes,
|
|
||||||
top_n = self.top_n,
|
|
||||||
query = query_bundle,
|
|
||||||
model_name = self.model
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
with self.callback_manager.event(
|
|
||||||
CBEventType.RERANKING,
|
|
||||||
payload={
|
|
||||||
EventPayload.NODES: nodes,
|
|
||||||
EventPayload.MODEL_NAME: self._model_uid,
|
|
||||||
EventPayload.QUERY_STR: query_bundle.query_str,
|
|
||||||
EventPayload.TOP_K: self.top_n,
|
|
||||||
},
|
|
||||||
) as event:
|
|
||||||
texts = [node.node.get_content() for node in nodes]
|
|
||||||
response = self._generator.rerank(texts,query_bundle.query_str)
|
|
||||||
new_nodes = []
|
|
||||||
for result in response['results']:
|
|
||||||
new_node_with_score = NodeWithScore(
|
|
||||||
node=nodes[result['index']].node, score=result['relevance_score']
|
|
||||||
)
|
|
||||||
if self.threshold is not None:
|
|
||||||
if new_node_with_score.score >=self.threshold:
|
|
||||||
new_nodes.append(new_node_with_score)
|
|
||||||
|
|
||||||
if self.top_n is not None:
|
|
||||||
if len(new_nodes) > self.top_n:
|
|
||||||
for index in new_nodes[self.top_n:-1]:
|
|
||||||
new_nodes.remove(index)
|
|
||||||
|
|
||||||
event.on_end(payload={EventPayload.NODES: new_nodes})
|
|
||||||
|
|
||||||
dispatcher.event(
|
|
||||||
ReRankEndEvent(
|
|
||||||
nodes= new_nodes
|
|
||||||
)
|
|
||||||
)
|
|
||||||
return new_nodes
|
|
||||||
|
|
||||||
def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
|
|
||||||
try:
|
|
||||||
from xinference.client import RESTfulClient
|
|
||||||
except ImportError:
|
|
||||||
raise ImportError(
|
|
||||||
"Could not import Xinference library."
|
|
||||||
'Please install Xinference with `pip install "xinference[all]"`'
|
|
||||||
)
|
|
||||||
|
|
||||||
client = RESTfulClient(endpoint)
|
|
||||||
|
|
||||||
try:
|
|
||||||
assert isinstance(client, RESTfulClient)
|
|
||||||
except AssertionError:
|
|
||||||
raise RuntimeError(
|
|
||||||
"Could not create RESTfulClient instance."
|
|
||||||
"Please make sure Xinference endpoint is running at the correct port."
|
|
||||||
)
|
|
||||||
|
|
||||||
generator = client.get_model(model_uid)
|
|
||||||
model_description = client.list_models()[model_uid]
|
|
||||||
|
|
||||||
try:
|
|
||||||
assert generator is not None
|
|
||||||
assert model_description is not None
|
|
||||||
except AssertionError:
|
|
||||||
raise RuntimeError(
|
|
||||||
"Could not get model from endpoint."
|
|
||||||
"Please make sure Xinference endpoint is running at the correct port."
|
|
||||||
)
|
|
||||||
|
|
||||||
model = model_description["model_name"]
|
|
||||||
|
|
||||||
return generator, model_description
|
|
||||||
+32
-32
@@ -3,46 +3,46 @@ file:
|
|||||||
# use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable
|
# use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable
|
||||||
use_llama_parse: false
|
use_llama_parse: false
|
||||||
|
|
||||||
db:
|
#db:
|
||||||
# The configuration for the database loader, only supports MySQL and PostgreSQL databases for now.
|
# The configuration for the database loader, only supports MySQL and PostgreSQL databases for now.
|
||||||
# uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
|
# uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
|
||||||
# query: The query to fetch data from the database. E.g.: SELECT * FROM table
|
# query: The query to fetch data from the database. E.g.: SELECT * FROM table
|
||||||
- uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
|
#- uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
|
||||||
enable: true # 添加 enable 字段
|
#enable: false # 添加 enable 字段
|
||||||
queries:
|
#queries:
|
||||||
- sql: select * from ProjectProperties;
|
#- sql: select * from ProjectProperties;
|
||||||
explanation: "工程属性表数据,层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。"
|
#explanation: "工程属性表数据,层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。"
|
||||||
|
|
||||||
- sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable;
|
#- sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable;
|
||||||
explanation: "总算表数据,层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。"
|
#explanation: "总算表数据,层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。"
|
||||||
|
|
||||||
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '线路';
|
#- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '线路';
|
||||||
explanation: "专业类型为线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
#explanation: "专业类型为线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||||
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '余物清理';
|
#- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '余物清理';
|
||||||
explanation: "专业类型为余物清理的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
#explanation: "专业类型为余物清理的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||||
- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '拆除线路';
|
#- sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where ProfessionalType = '拆除线路';
|
||||||
explanation: "专业类型为拆除线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
#explanation: "专业类型为拆除线路的项目划分表数据,层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
|
||||||
|
|
||||||
- sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee;
|
#- sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee;
|
||||||
explanation: "其他费用表数据,层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中"
|
#explanation: "其他费用表数据,层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中"
|
||||||
|
|
||||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表'
|
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表'
|
||||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(调试工程)aa'
|
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(调试工程)aa'
|
||||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '大型土石方取费表'
|
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '大型土石方取费表'
|
||||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)'
|
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)'
|
||||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)(1)'
|
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(余物清理)(1)'
|
||||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||||
- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(拆除)'
|
#- sql: select Name, Code, Calculation_Formula, Rate, from FeeCollectionTable where FeeCollection_Table_Name = '线路取费表(拆除)'
|
||||||
explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
#explanation: "取费表名称为线路取费表的取费表数据,层级关系包含在博微电力造价工程文件格式_FeeCollectionTable.json文件中"
|
||||||
|
|
||||||
- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '线路'
|
#- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '线路'
|
||||||
explanation: "专业类型为线路的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
|
#explanation: "专业类型为线路的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
|
||||||
- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '余物清理'
|
#- sql: select Name, Code, Calculation_Formula, Rate, from ProjectQuantities where Professional_Type = '余物清理'
|
||||||
explanation: "专业类型为余物清理的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
|
#explanation: "专业类型为余物清理的工程量表数据,层级关系包含在博微电力造价工程文件格式_ProjectQuantities.json文件中"
|
||||||
#web:
|
#web:
|
||||||
# driver_arguments:
|
# driver_arguments:
|
||||||
# # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode
|
# # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode
|
||||||
|
|||||||
@@ -1,71 +0,0 @@
|
|||||||
{
|
|
||||||
"Table": [
|
|
||||||
{
|
|
||||||
"name": "FeeCollectionTable",
|
|
||||||
"alias": "",
|
|
||||||
"comment": "取费表是取费设置中各取费表明细。查询示例: SELECT Rate FROM FeeCollectionTable WHERE Name = 'findname'。",
|
|
||||||
"fields": [
|
|
||||||
{
|
|
||||||
"name": "FeeCollectionTableName",
|
|
||||||
"alias": "取费表名称,取费名称,取费名",
|
|
||||||
"comment": "取费表名称",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Name",
|
|
||||||
"alias": "名称,费用名,项目名",
|
|
||||||
"comment": "费用名称,项目名称",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "SerialNumber",
|
|
||||||
"alias": "序号,序列号,费用序号",
|
|
||||||
"comment": "费用表序号",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Code",
|
|
||||||
"alias": "编号,代号,代码",
|
|
||||||
"comment": "费用代码",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "CalculationFormula",
|
|
||||||
"alias": "公式,表达式,计算式",
|
|
||||||
"comment": "取费基数",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Rate",
|
|
||||||
"alias": "费用利率,费率",
|
|
||||||
"comment": "取费费率",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Remarks",
|
|
||||||
"alias": "说明,备注",
|
|
||||||
"comment": "费用项备注说明",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Major",
|
|
||||||
"alias": "专业",
|
|
||||||
"comment": "取费表专业",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Type",
|
|
||||||
"alias": "类型,取费类型",
|
|
||||||
"comment": "取费表类型",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Path",
|
|
||||||
"alias": "路径,费用全路径",
|
|
||||||
"comment": "费用项层级全路径",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1,89 +0,0 @@
|
|||||||
{
|
|
||||||
"Table": [
|
|
||||||
{
|
|
||||||
"name": "OtherFee",
|
|
||||||
"alias": "",
|
|
||||||
"comment": "其他费用表被称为“工程费用中其他费用明细”。其他费用是指为完成工程项目建设所必需的,但不属于建筑工程费、安装工程费、设备购置费、基本预备费的其他相关费用。包括建设场地征用及清理费、项目建设管理费、项目建设技术服务费、生产准备费、大件运输措施费、专业爆破服务费等。查询示例: SELECT Rate FROM OtherFee WHERE Name = 'findname'。",
|
|
||||||
"fields": [
|
|
||||||
{
|
|
||||||
"name": "Id",
|
|
||||||
"alias": "项目id,id,费用id",
|
|
||||||
"comment": "费用项目id",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ParentId",
|
|
||||||
"alias": "父级id,父id",
|
|
||||||
"comment": "费用项目父级id",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Level",
|
|
||||||
"alias": "层号,层级,层编号",
|
|
||||||
"comment": "层级编号,从1开始",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Name",
|
|
||||||
"alias": "名称,费用名,项目名",
|
|
||||||
"comment": "费用名称,项目名称",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "SerialNumber",
|
|
||||||
"alias": "序号,序列号",
|
|
||||||
"comment": "费用表序号",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Code",
|
|
||||||
"alias": "编号,代号,代码",
|
|
||||||
"comment": "费用代码",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "CalculationFormula",
|
|
||||||
"alias": "公式,表达式,计算式",
|
|
||||||
"comment": "取费基数",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Rate",
|
|
||||||
"alias": "费用利率,费率",
|
|
||||||
"comment": "取费费率",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Amount",
|
|
||||||
"alias": "金额,价格",
|
|
||||||
"comment": "金额、合计、费用,\n单位为元",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Remarks",
|
|
||||||
"alias": "说明,备注",
|
|
||||||
"comment": "费用项备注说明",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Compilation_Basis",
|
|
||||||
"alias": "编制依据,编制来源",
|
|
||||||
"comment": "费用项编制依据",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "WBS_Code",
|
|
||||||
"alias": "WBS编号,WBS编码",
|
|
||||||
"comment": "费用项WBS编码",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Path",
|
|
||||||
"alias": "路径,费用全路径",
|
|
||||||
"comment": "费用项层级全路径",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1,149 +0,0 @@
|
|||||||
{
|
|
||||||
"Table": [
|
|
||||||
{
|
|
||||||
"name": "ProjectDivision",
|
|
||||||
"alias": "",
|
|
||||||
"comment": "项目划分表是用于存储工程项目划分树状数据。内部包含安装工程项目划分,建筑工程项目划分,线路项目划分,工程分部分项。查询示例: SELECT Sum_Price FROM ProjectDivision WHERE Name = 'findname'。",
|
|
||||||
"fields": [
|
|
||||||
{
|
|
||||||
"name": "Id",
|
|
||||||
"alias": "项目id,id,费用id",
|
|
||||||
"comment": "项目划分id",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ParentId",
|
|
||||||
"alias": "父级id,父id",
|
|
||||||
"comment": "项目划分父级id",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Level",
|
|
||||||
"alias": "层号,层级,层编号",
|
|
||||||
"comment": "层级编号,从1开始",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Quantity",
|
|
||||||
"alias": "个数,数量,数目",
|
|
||||||
"comment": "项目划分数量",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "SerialNumber",
|
|
||||||
"alias": "序号,序列号,项目序号",
|
|
||||||
"comment": "项目划分序号",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Name",
|
|
||||||
"alias": "名称,项目名",
|
|
||||||
"comment": "项目名称",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Encoding",
|
|
||||||
"alias": "编码,译码",
|
|
||||||
"comment": "项目划分编码",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Sum_Price",
|
|
||||||
"alias": "合计,合价",
|
|
||||||
"comment": "项目划分合价,分部分项费用",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "FeeCollectionTableName",
|
|
||||||
"alias": "取费表",
|
|
||||||
"comment": "项目划分的取费表,此项目划分选用的取费表",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Remarks",
|
|
||||||
"alias": "说明,备注",
|
|
||||||
"comment": "备注",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "WBS_Code",
|
|
||||||
"alias": "WBS编号,WBS编码",
|
|
||||||
"comment": "WBS编码",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Manual_Adjustment_Coefficient",
|
|
||||||
"alias": "人工调差系数",
|
|
||||||
"comment": "此项目划分下人工调差系数",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Material_Adjustment_Coefficient",
|
|
||||||
"alias": "材料调差系数",
|
|
||||||
"comment": "此项目划分下材料调差系数",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Mechanical_Adjustment_Coefficient",
|
|
||||||
"alias": "机械调差系数",
|
|
||||||
"comment": "此项目划分下机械调差系数",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Demolition_Manual_Adjustment_Coefficient",
|
|
||||||
"alias": "拆除人工调差系数",
|
|
||||||
"comment": "此项目划分下拆除人工调差系数",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Demolition_Material_Adjustment_Coefficient",
|
|
||||||
"alias": "拆除材料调差系数",
|
|
||||||
"comment": "此项目划分下拆除材料调差系数",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Demolition_Mechanical_Adjustment_Coefficient",
|
|
||||||
"alias": "拆除机械调差系数",
|
|
||||||
"comment": "此项目划分下拆除机械调差系数",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ProfessionalType",
|
|
||||||
"alias": "专业类型",
|
|
||||||
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Unit",
|
|
||||||
"alias": "单位",
|
|
||||||
"comment": "项目划分单位",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "CalculationFormula",
|
|
||||||
"alias": "公式,表达式,计算式",
|
|
||||||
"comment": "项目划分计算式",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Rate",
|
|
||||||
"alias": "费用利率,费率",
|
|
||||||
"comment": "项目划分费率",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Code",
|
|
||||||
"alias": "编号,代号,代码",
|
|
||||||
"comment": "项目划分代码",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Path",
|
|
||||||
"alias": "路径,项目全路径",
|
|
||||||
"comment": "项目划分层级全路径",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1,239 +0,0 @@
|
|||||||
{
|
|
||||||
"Table": [
|
|
||||||
{
|
|
||||||
"name": "ProjectDivisions_CostPreview",
|
|
||||||
"alias": "",
|
|
||||||
"comment": "项目划分_费用预览表也被称为“项目划分费用预览”、“项目划分取费费用”。其中包含项目划分合价、直接费、间接费、利润、税金、主材费等。查询示例: SELECT Total FROM ProjectDivisions_CostPreview WHERE Id = '15'。",
|
|
||||||
"fields": [
|
|
||||||
{
|
|
||||||
"name": "Id",
|
|
||||||
"alias": "id,项目id",
|
|
||||||
"comment": "项目划分id",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ParentId",
|
|
||||||
"alias": "父级id,父id",
|
|
||||||
"comment": "项目划分父级id",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Level",
|
|
||||||
"alias": "层号,层级,层编号",
|
|
||||||
"comment": "层级编号,从1开始",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ProfessionalType",
|
|
||||||
"alias": "专业类型",
|
|
||||||
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "FeeCollectionTableName",
|
|
||||||
"alias": "取费表",
|
|
||||||
"comment": "项目划分的取费表,此项目划分选用的取费表",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Direct_Cost",
|
|
||||||
"alias": "直接费",
|
|
||||||
"comment": "直接费是指施工过程中直接耗用于建筑、安装工程产品的各项费用的总和。包括直接工程费和措施费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Direct_Project_Cost",
|
|
||||||
"alias": "直接工程费",
|
|
||||||
"comment": "直接工程费是指按照正常的施工条件,在施工过程中耗费的构成工程实体的各项费用。包括人工费、材料费和施工机械使用费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Quota_Direct_Cost",
|
|
||||||
"alias": "定额直接费",
|
|
||||||
"comment": "定额直接费,包含人工费、材料费中已进入定额基价的消耗性材料费和施工机械使用费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Labor_Cost",
|
|
||||||
"alias": "人工费",
|
|
||||||
"comment": "人工费是指支付给直接从事建筑安装工程施工作业的生产人员的各项费用。包括基本工资、工资性补贴、辅助工资、职工福利费、生产人员劳动保护费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Material_Cost",
|
|
||||||
"alias": "材料费",
|
|
||||||
"comment": "材料费是指施工过程中一次性消耗材料及摊销材料的费用。指已进入定额基价的消耗性材料费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Construction_Machinery_Cost",
|
|
||||||
"alias": "施工机械使用费",
|
|
||||||
"comment": "施工机械使用费是指施工机械作业所发生的机械使用费以及机械的现场安拆费和场外运费。包括折旧费、检修费、维护费、安装及拆卸费、场外运费、操作人员人工费、燃料动力费、其他费等。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Installation_Material_Cost",
|
|
||||||
"alias": "装置性材料费",
|
|
||||||
"comment": "装置性材料费是指建设工程中构成工艺系统实体的工艺性材料,也称主要材料费。装置性材料通常在概算或预算定额中未计价,也称未计价材料,也称主材。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "A_Supply_Installation_Material_Cost",
|
|
||||||
"alias": "甲供装置性材料费",
|
|
||||||
"comment": "供货方为甲供的装置性材料费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "B_Supply_Installation_Material_Cost",
|
|
||||||
"alias": "乙供装置性材料费",
|
|
||||||
"comment": "供货方为乙供的装置性材料费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Measure_Cost",
|
|
||||||
"alias": "措施费",
|
|
||||||
"comment": "措施费是指为完成工程项目施工而进行施工准备、克服自然条件的不利影响和辅助施工所发生的不构成工程实体的各项费用。包括冬雨季施工增加费、夜间施工增加费、施工工具用具使用费、特殊地区施工增加费、临时设施费、施工机构迁移费、安全文明施工费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "WinterRainySeasons_Additional_Construction_Cost",
|
|
||||||
"alias": "冬雨季施工增加费",
|
|
||||||
"comment": "冬雨季施工增加费是指按照合理的工期要求,建筑、安装工程必须在冬季、雨季期间连续施工而需要增加的费用。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Night_Additional_Construction_Cost",
|
|
||||||
"alias": "夜间施工增加费",
|
|
||||||
"comment": "夜间施工增加费是指按照规程要求,工程必须在夜间连续施工所发生的夜班补助、夜间施工降效、夜间施工照明设备摊销及照明用电等费用。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Construction_Tool_Usage_Cost",
|
|
||||||
"alias": "施工工具用具使用费",
|
|
||||||
"comment": "施工工具用具使用费是指施工企业的生产、检验、试验部门使用的不属于固定资产的工具用具和仪器仪表的购置、摊销和维护费用。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Special_Areas_Additional_Construction_Cost",
|
|
||||||
"alias": "特殊地区施工增加费",
|
|
||||||
"comment": "特殊地区施工增加费是指在高海拔、酷热、严寒等地区施工:因特殊自然条件影响而需额外增加的施工费用。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Temporary_Facility_Cost",
|
|
||||||
"alias": "临时设施费",
|
|
||||||
"comment": "临时设施费是指施工企业为满足现场正常生产、生活需要在现场必须搭设的生产、生活用临时建筑物、构筑物和其他临时设施所发生的费用,以及维修、拆除、折旧及摊销费,或临时设施的租赁费等。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Construction_Organization_Relocation_Cost",
|
|
||||||
"alias": "施工机构迁移费",
|
|
||||||
"comment": "施工机构迁移费是指施工企业派遣施工队伍到所承建工程现场所发生的搬迁费用。包括职工调遣差旅费和调遣期间的工资,以及办公设备、工器具、家具、材料用品和施工机械等的搬迁费用。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Safe_Civilized_Construction_Cost",
|
|
||||||
"alias": "安全文明施工费",
|
|
||||||
"comment": "安全文明施工费,包括安全生产费、文明施工费、环境保护费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Indirect_Cost",
|
|
||||||
"alias": "间接费",
|
|
||||||
"comment": "间接费是指建筑安装工程的施工过程中,为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Regulatory_Cost",
|
|
||||||
"alias": "规费",
|
|
||||||
"comment": "规费是指按照国家行政主管部门或省级政府和省级有关权力部门规定必须缴纳并计入建筑安装工程造价的费用。包括社会保险费和住房公积金。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Social_Insurance_Premiums",
|
|
||||||
"alias": "社会保险费",
|
|
||||||
"comment": "社会保险费包括养老保险费、失业保险费、医疗保险费、生育保险费和工伤保险费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Housing_Provident_Fund",
|
|
||||||
"alias": "住房公积金",
|
|
||||||
"comment": "住房公积金是指企业按照规定标准为职工缴纳的住房公积金。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Enterprise_Management_Cost",
|
|
||||||
"alias": "企业管理费",
|
|
||||||
"comment": "企业管理费是指建筑安装施工企业为组织施工生产和经营管理所发生的费用。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Construction_Enterprise_Cooperation_Debugging_Cost",
|
|
||||||
"alias": "施工企业配合调试费",
|
|
||||||
"comment": "施工企业配合调试费是指在工程整套启动试运阶段,施工企业安装专业配合调试所发生的费用。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Profit",
|
|
||||||
"alias": "利润",
|
|
||||||
"comment": "利润是指施工企业完成所承包工程获得的盈利。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Taxes",
|
|
||||||
"alias": "税金",
|
|
||||||
"comment": "税金是指按照国家税法规定应计入建筑安装工程造价内的销项税额。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Equipment_Cost",
|
|
||||||
"alias": "设备费",
|
|
||||||
"comment": "设备购置费是指为项目建设而购置或自制各种设备,并将设备运至施工现场指定位置所支出的费用。包括设备费和设备运杂费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "B_Supply_Equipment_Excluding_Tax_Price",
|
|
||||||
"alias": "乙供设备不含税价",
|
|
||||||
"comment": "设备费中,供货方为乙供设备,不含税价",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "A_Supply_Equipment_Tax_Price",
|
|
||||||
"alias": "甲供设备含税价",
|
|
||||||
"comment": "设备费中,供货方为甲供设备,含税价",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Installation_Cost",
|
|
||||||
"alias": "安装费",
|
|
||||||
"comment": "安装费包含定额直接费、措施费、间接费、利润、税金和一笔性费用。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Main_Material_Cost",
|
|
||||||
"alias": "主材费",
|
|
||||||
"comment": "主材费指装置性材料费",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Total",
|
|
||||||
"alias": "总价,总计,总体费用,总的费用",
|
|
||||||
"comment": "总计包含安装费、主材费、设备费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Sum",
|
|
||||||
"alias": "合计,合价",
|
|
||||||
"comment": "项目划分合价,分部分项费用,项目划分费用。合计包含安装费和主材费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Path",
|
|
||||||
"alias": "路径,项目划分全路径",
|
|
||||||
"comment": "项目划分层级全路径",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
{
|
|
||||||
"Table": [
|
|
||||||
{
|
|
||||||
"name": "ProjectProperties",
|
|
||||||
"alias": "",
|
|
||||||
"comment": "工程属性表是用于存储整个工程的重要属性,访问该表都是为了通过属性名查找属性值。通常属性值有工程信息、工程属性、技经参数,表中包含工程总投资、工程总费用,工程主要费用,工程技经参数等。查询示例: SELECT Value FROM ProjectProperties WHERE Name = 'findname'。",
|
|
||||||
"fields": [
|
|
||||||
{
|
|
||||||
"name": "Name\n",
|
|
||||||
"alias": "名称、属性、属性名称、字段、字段名称、变量、参数,属性名",
|
|
||||||
"comment": "属性的唯一标识",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Value",
|
|
||||||
"alias": "值、变量值、参数值、数值,属性值",
|
|
||||||
"comment": "属性对应的实际值",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Type",
|
|
||||||
"alias": "类型、变量类型、数值类型,属性类型",
|
|
||||||
"comment": "属性变量的类型",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Unit",
|
|
||||||
"alias": "单位",
|
|
||||||
"comment": "单位",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1,335 +0,0 @@
|
|||||||
{
|
|
||||||
"Table": [
|
|
||||||
{
|
|
||||||
"name": "ProjectQuantities",
|
|
||||||
"alias": "",
|
|
||||||
"comment": "工程量表是项目划分下工程量,包含定额、主材、设备、一笔性费用。查询示例: SELECT BudgetPrice FROM ProjectQuantities WHERE Name = 'findname'。",
|
|
||||||
"fields": [
|
|
||||||
{
|
|
||||||
"name": "Id",
|
|
||||||
"alias": "id",
|
|
||||||
"comment": "消耗量id,工程量id",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ParentId",
|
|
||||||
"alias": "父级id,父id",
|
|
||||||
"comment": "父级id",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ProjectDivisionId",
|
|
||||||
"alias": "项目划分id,项目id",
|
|
||||||
"comment": "父级项目划分id",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Quantity",
|
|
||||||
"alias": "个数,数量,数目",
|
|
||||||
"comment": "数量,消耗量数量,工程量数量,主材数量,定额数量,设备数量,项目划分单位",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "FeatureSegment",
|
|
||||||
"alias": "特征段",
|
|
||||||
"comment": "线路特征段",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ParentQuantity",
|
|
||||||
"alias": "父级个数,父级数量",
|
|
||||||
"comment": "父级id的数量",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Name",
|
|
||||||
"alias": "名称",
|
|
||||||
"comment": "项目名称,工程量名称,消耗量名称,主材名称,定额名称,设备名称,材料名称",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Encoding",
|
|
||||||
"alias": "编码,译码",
|
|
||||||
"comment": "编码,定额编码,主材编码,设备编码",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "SpecificationModel",
|
|
||||||
"alias": "规格型号",
|
|
||||||
"comment": "规格型号,主材规格型号,设备规格型号",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Unit",
|
|
||||||
"alias": "单位",
|
|
||||||
"comment": "单位,主材单位,定额单位,设备单位,项目划分单位",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "BasePrice",
|
|
||||||
"alias": "基价",
|
|
||||||
"comment": "定额基价",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "LaborCost",
|
|
||||||
"alias": "人工费",
|
|
||||||
"comment": "定额人工费",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "MaterialCost",
|
|
||||||
"alias": "材料费",
|
|
||||||
"comment": "定额材料费",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "MachineryCost",
|
|
||||||
"alias": "机械费",
|
|
||||||
"comment": "定额机械费",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "QuotaCoefficient",
|
|
||||||
"alias": "定额系数",
|
|
||||||
"comment": "定额系数",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "LaborCoefficient",
|
|
||||||
"alias": "人工系数",
|
|
||||||
"comment": "定额人工系数",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "MaterialCoefficient",
|
|
||||||
"alias": "材料系数",
|
|
||||||
"comment": "定额材料系数",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "MechanicalCoefficient",
|
|
||||||
"alias": "机械系数",
|
|
||||||
"comment": "定额机械系数",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ExpenseType",
|
|
||||||
"alias": "费用类型",
|
|
||||||
"comment": "费用类型,取值为取费、不取费",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "BudgetPrice",
|
|
||||||
"alias": "预算价",
|
|
||||||
"comment": "预算价",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "MarketPrice",
|
|
||||||
"alias": "市场价",
|
|
||||||
"comment": "间接费是指建筑安装工程的施工过程中,为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Supplier",
|
|
||||||
"alias": "供货方",
|
|
||||||
"comment": "供货方,设备供货方,主材供货方,取值为甲供、乙供",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Type",
|
|
||||||
"alias": "类型",
|
|
||||||
"comment": "工程量类型,取值定额、主材、设备、一笔性费用",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "QuotaRange",
|
|
||||||
"alias": "定额范围",
|
|
||||||
"comment": "定额范围,取值概算、预算",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "A_Supply_Material_Cost_Excluding_Tax",
|
|
||||||
"alias": "甲供材料费不含税",
|
|
||||||
"comment": "甲供材料费不含税",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "A_Supply_Material_Cost_Including_Tax",
|
|
||||||
"alias": "甲供材料费含税",
|
|
||||||
"comment": "甲供材料费含税",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "B_Supply_Material_Cost_Excluding_Tax",
|
|
||||||
"alias": "乙供材料费不含税",
|
|
||||||
"comment": "乙供材料费不含税",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "B_Supply_Material_Cost_Including_Tax",
|
|
||||||
"alias": "乙供材料费含税",
|
|
||||||
"comment": "乙供材料费含税",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ScaffoldCalculation",
|
|
||||||
"alias": "脚手架计取",
|
|
||||||
"comment": "脚手架计取,取值计取、不计取",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Remarks",
|
|
||||||
"alias": "说明,备注",
|
|
||||||
"comment": "备注,说明",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "FeeCollectionTableName",
|
|
||||||
"alias": "取费表",
|
|
||||||
"comment": "项目划分的取费表,工程量的取费表",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Quota_Section_Name",
|
|
||||||
"alias": "定额章节名称",
|
|
||||||
"comment": "定额章节名称",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ProfessionalType",
|
|
||||||
"alias": "专业类型",
|
|
||||||
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "split",
|
|
||||||
"alias": "拆分",
|
|
||||||
"comment": "是否为拆分材料,取值1为拆分,取值0为不拆分",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Loss",
|
|
||||||
"alias": "损耗",
|
|
||||||
"comment": "损耗率,主材损耗率",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "SingleWeight",
|
|
||||||
"alias": "单重",
|
|
||||||
"comment": "单重,主材单重",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "LineWeight",
|
|
||||||
"alias": "线重",
|
|
||||||
"comment": "线重,主材线重",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "SupervisedMaterials",
|
|
||||||
"alias": "监造物料",
|
|
||||||
"comment": "监造物料,取值1为监造物料,取值0为非监造物料",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "EquipmentMaterials",
|
|
||||||
"alias": "设备性材料",
|
|
||||||
"comment": "设备性材料,取值1为设备性材料,取值0为主材",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "GrossWeight",
|
|
||||||
"alias": "毛重",
|
|
||||||
"comment": "毛重,主材毛重",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "TransportationType",
|
|
||||||
"alias": "运输类型",
|
|
||||||
"comment": "运输类型,主材运输类型",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "TransportationMiscellaneous",
|
|
||||||
"alias": "运杂费率",
|
|
||||||
"comment": "运杂费率,设备运杂费率",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "EquipmentType",
|
|
||||||
"alias": "设备类型",
|
|
||||||
"comment": "设备类型,取值为主要设备、普通设备",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "UnitPrice",
|
|
||||||
"alias": "单价",
|
|
||||||
"comment": "单价",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Market_Price_Excluding_Tax",
|
|
||||||
"alias": "市场价不含税",
|
|
||||||
"comment": "市场价不含税",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Market_Price_Including_Tax",
|
|
||||||
"alias": "市场价含税",
|
|
||||||
"comment": "市场价含税,设备含税价",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Budget_Price_Excluding_Tax",
|
|
||||||
"alias": "预算价不含税",
|
|
||||||
"comment": "预算价不含税",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Budget_Price_Including_Tax",
|
|
||||||
"alias": "预算价含税",
|
|
||||||
"comment": "预算价含税",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Unit_Price_Excluding_Tax",
|
|
||||||
"alias": "单价不含税",
|
|
||||||
"comment": "单价不含税,设备不含税价",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "GroupPrice",
|
|
||||||
"alias": "分组合价",
|
|
||||||
"comment": "分组合价",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Pump_Truck_Pouring",
|
|
||||||
"alias": "泵车浇制",
|
|
||||||
"comment": "泵车浇制,取值1为泵车浇制,取值0为非泵车浇制",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "On_Site_Preparation",
|
|
||||||
"alias": "现场制备",
|
|
||||||
"comment": "现场制备,取值1为现场制备,取值0为非现场制备",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Clear_Water_Concrete",
|
|
||||||
"alias": "清水混凝土",
|
|
||||||
"comment": "清水混凝土,取值1为清水混凝土,取值0为非清水混凝土",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Debugging_Fee_Calculation",
|
|
||||||
"alias": "调试费计取",
|
|
||||||
"comment": "调试费计取,取值计取、不计取",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
@@ -1,101 +0,0 @@
|
|||||||
{
|
|
||||||
"Table": [
|
|
||||||
{
|
|
||||||
"name": "TotalCalculateTable",
|
|
||||||
"alias": "",
|
|
||||||
"comment": "总算表也被称为“工程总费用”、“工程费用”。其中包含本地工程、辅助设施工程、编制基准期价差、设备购置费、其他费用、基本预备费、特殊费用、工程静态投资、动态费用、价差预备费、建设期贷款利息、工程动态投资、可抵扣增值税额。查询示例: SELECT Amount FROM TotalCalculateTable WHERE Name = 'findname'。",
|
|
||||||
"fields": [
|
|
||||||
{
|
|
||||||
"name": "Id",
|
|
||||||
"alias": "项目id,id,费用id",
|
|
||||||
"comment": "费用项目id",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "ParentId",
|
|
||||||
"alias": "父级id,父id",
|
|
||||||
"comment": "费用项目父级id",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Level",
|
|
||||||
"alias": "层号,层级,层编号",
|
|
||||||
"comment": "层级编号,从1开始",
|
|
||||||
"type": "INT"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Name",
|
|
||||||
"alias": "名称,费用名,项目名",
|
|
||||||
"comment": "费用名称,项目名称",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "SerialNumber",
|
|
||||||
"alias": "序号",
|
|
||||||
"comment": "工程费用序号",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Code",
|
|
||||||
"alias": "编号,代号,代码",
|
|
||||||
"comment": "费用代码",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Rate",
|
|
||||||
"alias": "费用利率,费率",
|
|
||||||
"comment": "费率",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Amount",
|
|
||||||
"alias": "金额,价格",
|
|
||||||
"comment": "合计费",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "WBS_Code",
|
|
||||||
"alias": "WBS编号,WBS编码",
|
|
||||||
"comment": "费用编码",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Path",
|
|
||||||
"alias": "路径,费用全路径",
|
|
||||||
"comment": "费用名称全路径",
|
|
||||||
"type": "VARCHAR"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Amount_InstallationCost",
|
|
||||||
"alias": "安装金额,金额_安装费,安装价格",
|
|
||||||
"comment": "安装费金额",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Amount_EquipmentCost",
|
|
||||||
"alias": "金额_设备费,设备金额,设备价格",
|
|
||||||
"comment": "设备费金额",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Amount_OtherCost",
|
|
||||||
"alias": "其他费用金额,金额_其他费,其他费用价格",
|
|
||||||
"comment": "其他费金额",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Amount_Total",
|
|
||||||
"alias": "总的金额,金额_占总计,总体金额",
|
|
||||||
"comment": "合计费占总计",
|
|
||||||
"type": "REAL"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"name": "Amount_UnitInvestment",
|
|
||||||
"alias": "金额_单位投资,合计投资金额",
|
|
||||||
"comment": "合计费单位投资",
|
|
||||||
"type": "REAL"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
+5
-3
@@ -11,22 +11,24 @@ from fastapi.responses import RedirectResponse
|
|||||||
from app.api.routers.chat import chat_router
|
from app.api.routers.chat import chat_router
|
||||||
from app.api.routers.upload import file_upload_router
|
from app.api.routers.upload import file_upload_router
|
||||||
from app.api.routers.app import v1_router
|
from app.api.routers.app import v1_router
|
||||||
from app.settings import init_settings
|
|
||||||
|
from app.settings import init_settings,init_ProjectInfo
|
||||||
from app.observability import init_observability
|
from app.observability import init_observability
|
||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
from phoenix.trace import using_project
|
from phoenix.trace import using_project
|
||||||
|
|
||||||
|
|
||||||
logger = logging.getLogger("uvicorn")
|
logger = logging.getLogger("uvicorn")
|
||||||
|
|
||||||
|
|
||||||
usPrj = using_project(os.getenv("PHOENIX_PROJECT_NAME"))
|
usPrj = using_project(os.getenv("PHOENIX_PROJECT_NAME"))
|
||||||
usPrj.__enter__()
|
usPrj.__enter__()
|
||||||
|
|
||||||
|
import nest_asyncio
|
||||||
|
nest_asyncio.apply()
|
||||||
|
|
||||||
init_settings()
|
init_settings()
|
||||||
init_observability()
|
init_observability()
|
||||||
|
init_ProjectInfo()
|
||||||
app = FastAPI()
|
app = FastAPI()
|
||||||
|
|
||||||
environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set
|
environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set
|
||||||
|
|||||||
@@ -0,0 +1,64 @@
|
|||||||
|
from llama_index.llms.dashscope import DashScope
|
||||||
|
from llama_index.core.base.llms.types import LLMMetadata
|
||||||
|
|
||||||
|
class DashScopeGenerationModels:
|
||||||
|
"""DashScope Qwen serial models."""
|
||||||
|
|
||||||
|
QWEN_TURBO = "qwen-turbo"
|
||||||
|
QWEN_PLUS = "qwen-plus"
|
||||||
|
QWEN_MAX = "qwen-max"
|
||||||
|
QWEN_MAX_1201 = "qwen-max-1201"
|
||||||
|
QWEN_MAX_LONGCONTEXT = "qwen-max-longcontext"
|
||||||
|
QWEN2_MATH_72B_INSTRUCT = 'qwen2-math-72b-instruct',
|
||||||
|
QWEN2_72B = 'qwen2-72b-instruct'
|
||||||
|
|
||||||
|
DASHSCOPE_MODEL_META = {
|
||||||
|
DashScopeGenerationModels.QWEN_TURBO: {
|
||||||
|
"context_window": 1024 * 8,
|
||||||
|
"num_output": 1024 * 8,
|
||||||
|
"is_chat_model": True,
|
||||||
|
},
|
||||||
|
DashScopeGenerationModels.QWEN_PLUS: {
|
||||||
|
"context_window": 1024 * 32,
|
||||||
|
"num_output": 1024 * 32,
|
||||||
|
"is_chat_model": True,
|
||||||
|
},
|
||||||
|
DashScopeGenerationModels.QWEN_MAX: {
|
||||||
|
"context_window": 1024 * 8,
|
||||||
|
"num_output": 1024 * 8,
|
||||||
|
"is_chat_model": True,
|
||||||
|
},
|
||||||
|
DashScopeGenerationModels.QWEN_MAX_1201: {
|
||||||
|
"context_window": 1024 * 8,
|
||||||
|
"num_output": 1024 * 8,
|
||||||
|
"is_chat_model": True,
|
||||||
|
},
|
||||||
|
DashScopeGenerationModels.QWEN_MAX_LONGCONTEXT: {
|
||||||
|
"context_window": 1024 * 30,
|
||||||
|
"num_output": 1024 * 30,
|
||||||
|
"is_chat_model": True,
|
||||||
|
},
|
||||||
|
DashScopeGenerationModels.QWEN2_MATH_72B_INSTRUCT: {
|
||||||
|
"context_window": 1024 * 2,
|
||||||
|
"num_output": 1024 * 8,
|
||||||
|
"is_chat_model": True,
|
||||||
|
},
|
||||||
|
DashScopeGenerationModels.QWEN2_72B: {
|
||||||
|
"context_window": 1024 * 2,
|
||||||
|
"num_output": 1024 * 8,
|
||||||
|
"is_chat_model": True,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
class CustomDashScope(DashScope):
|
||||||
|
@property
|
||||||
|
def metadata(self) -> LLMMetadata:
|
||||||
|
DASHSCOPE_MODEL_META[self.model_name]["num_output"] = (
|
||||||
|
self.max_tokens or DASHSCOPE_MODEL_META[self.model_name]["num_output"]
|
||||||
|
)
|
||||||
|
return LLMMetadata(
|
||||||
|
model_name=self.model_name, **DASHSCOPE_MODEL_META[self.model_name]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
Generated
+5323
-963
File diff suppressed because it is too large
Load Diff
+54
-29
@@ -10,44 +10,54 @@ readme = "README.md"
|
|||||||
generate = "app.engine.generate:generate_datasource"
|
generate = "app.engine.generate:generate_datasource"
|
||||||
|
|
||||||
[tool.poetry.dependencies]
|
[tool.poetry.dependencies]
|
||||||
python = "^3.11,<3.12"
|
python = "^3.11,<3.13"
|
||||||
fastapi = "^0.110.3"
|
fastapi = "0.110.3"
|
||||||
python-dotenv = "^1.0.0"
|
python-dotenv = "^1.0.1"
|
||||||
aiostream = "^0.6.2"
|
aiostream = "^0.6.2"
|
||||||
llama-index = "0.10.63"
|
cachetools = "^5.5.0"
|
||||||
cachetools = "^5.3.3"
|
|
||||||
protobuf = "4.25.4"
|
protobuf = "4.25.4"
|
||||||
nltk = "^3.9.1"
|
nltk = "^3.9.1"
|
||||||
jieba = "^0.42.1"
|
jieba = "^0.42.1"
|
||||||
|
|
||||||
#arize-phoenix = "^4.12.0"
|
transformers = "^4.43.0"
|
||||||
openinference-instrumentation-llama-index="2.2.3"
|
|
||||||
llama-index-callbacks-arize-phoenix = "^0.1.4"
|
|
||||||
llama-index-llms-dashscope = "^0.1.2"
|
|
||||||
llama-index-embeddings-dashscope = "^0.1.4"
|
|
||||||
llama-index-postprocessor-dashscope-rerank-custom = "0.1.0"
|
|
||||||
xinference = "^0.14.1"
|
|
||||||
xinference-client = "^0.14.1"
|
|
||||||
llama-index-llms-xinference = "^0.1.2"
|
|
||||||
qdrant-client="^1.10.1"
|
|
||||||
llama-index-vector-stores-qdrant = "^0.2.14"
|
|
||||||
chroma="^0.2.0"
|
|
||||||
llama-index-vector-stores-chroma = "^0.1.10"
|
|
||||||
llama-index-readers-json = "^0.1.5"
|
|
||||||
llama-index-retrievers-bm25 = "^0.2.2"
|
|
||||||
llama-index-experimental = "^0.2.0"
|
|
||||||
|
|
||||||
duckduckgo_search = "^6.2.6"
|
#arize-phoenix = "^4.12.0"
|
||||||
|
openinference-instrumentation-llama-index="^3.0.2"
|
||||||
|
llama-index = "^0.11.7"
|
||||||
|
llama-index-core = "^0.11.7"
|
||||||
|
llama-index-callbacks-arize-phoenix = "^0.2.1"
|
||||||
|
llama-index-llms-dashscope = "^0.2.0"
|
||||||
|
llama-index-embeddings-dashscope = "^0.2.1"
|
||||||
|
#llama-index-postprocessor-dashscope-rerank = "^0.2.0"
|
||||||
|
|
||||||
|
llama-index-llms-ollama = "^0.3.1"
|
||||||
|
llama-index-embeddings-ollama = "^0.3.0"
|
||||||
|
|
||||||
|
xinference = "^0.15.0"
|
||||||
|
xinference-client = "^0.15.0"
|
||||||
|
llama-index-llms-xinference = "^0.2.1"
|
||||||
|
llama-index-embeddings-xinference = "^0.1.0"
|
||||||
|
llama-index-postprocessor-xinference-rerank = "^0.1.0"
|
||||||
|
qdrant-client="^1.11.0"
|
||||||
|
llama-index-vector-stores-qdrant = "^0.3.0"
|
||||||
|
chroma="^0.2.0"
|
||||||
|
llama-index-vector-stores-chroma = "^0.2.0"
|
||||||
|
llama-index-readers-json = "^0.2.0"
|
||||||
|
llama-index-retrievers-bm25 = "^0.3.0"
|
||||||
|
llama-index-experimental = "^0.3.0"
|
||||||
|
|
||||||
|
|
||||||
|
duckduckgo_search = "^6.2.10"
|
||||||
|
|
||||||
[tool.poetry.dependencies.uvicorn]
|
[tool.poetry.dependencies.uvicorn]
|
||||||
extras = [ "standard" ]
|
extras = [ "standard" ]
|
||||||
version = "^0.23.2"
|
version = "^0.30.6"
|
||||||
|
|
||||||
[tool.poetry.dependencies.llama-index-readers-database]
|
[tool.poetry.dependencies.llama-index-readers-database]
|
||||||
version = "^0.1.3"
|
version = "^0.2.0"
|
||||||
|
|
||||||
[tool.poetry.dependencies.pymysql]
|
[tool.poetry.dependencies.pymysql]
|
||||||
version = "^1.1.0"
|
version = "^1.1.1"
|
||||||
extras = [ "rsa" ]
|
extras = [ "rsa" ]
|
||||||
|
|
||||||
#[tool.poetry.dependencies.psycopg2]
|
#[tool.poetry.dependencies.psycopg2]
|
||||||
@@ -60,15 +70,30 @@ extras = [ "rsa" ]
|
|||||||
version = "^0.8"
|
version = "^0.8"
|
||||||
|
|
||||||
[tool.poetry.dependencies.e2b_code_interpreter]
|
[tool.poetry.dependencies.e2b_code_interpreter]
|
||||||
version = "0.0.7"
|
version = "^0.0.7"
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
[[tool.poetry.source]]
|
[[tool.poetry.source]]
|
||||||
name = "mirrors"
|
name = "ali"
|
||||||
url = "https://pypi.tuna.tsinghua.edu.cn/simple/"
|
url = "https://mirrors.aliyun.com/pypi/simple/"
|
||||||
priority = "default"
|
priority = "primary"
|
||||||
|
|
||||||
|
|
||||||
|
[[tool.poetry.source]]
|
||||||
|
name = "tencent"
|
||||||
|
url = "https://mirrors.cloud.tencent.com/pypi/simple/"
|
||||||
|
priority = "primary"
|
||||||
|
|
||||||
|
|
||||||
|
[[tool.poetry.source]]
|
||||||
|
name = "tsinghua"
|
||||||
|
url = "https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple/"
|
||||||
|
priority = "primary"
|
||||||
|
|
||||||
|
|
||||||
[build-system]
|
[build-system]
|
||||||
requires = [ "poetry-core" ]
|
requires = [ "poetry-core" ]
|
||||||
build-backend = "poetry.core.masonry.api"
|
build-backend = "poetry.core.masonry.api"
|
||||||
|
|
||||||
|
tiktoken
|
||||||
@@ -8,7 +8,8 @@ from llama_index.core import VectorStoreIndex, SQLDatabase
|
|||||||
from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
|
from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
|
||||||
from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
|
from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
|
||||||
from app.api.routers.chat import generate_filters
|
from app.api.routers.chat import generate_filters
|
||||||
from app.engine import get_index, makeDescriptionByEngine
|
from app.engine import get_index
|
||||||
|
from app.engine.engine import makeDescriptionByEngine
|
||||||
from app.engine.loaders.db import CustomDatabaseReader
|
from app.engine.loaders.db import CustomDatabaseReader
|
||||||
from app.engine.vectordb import get_vector_store
|
from app.engine.vectordb import get_vector_store
|
||||||
from app.observability import init_observability
|
from app.observability import init_observability
|
||||||
|
|||||||
@@ -7,7 +7,8 @@ from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
|
|||||||
from sqlalchemy import create_engine
|
from sqlalchemy import create_engine
|
||||||
|
|
||||||
from app.api.routers.chat import generate_filters
|
from app.api.routers.chat import generate_filters
|
||||||
from app.engine import get_index, makeDescriptionByEngine
|
from app.engine import get_index
|
||||||
|
from app.engine.engine import makeDescriptionByEngine
|
||||||
from app.engine.vectordb import get_vector_store
|
from app.engine.vectordb import get_vector_store
|
||||||
from app.observability import init_observability
|
from app.observability import init_observability
|
||||||
from app.settings import init_settings
|
from app.settings import init_settings
|
||||||
|
|||||||
@@ -0,0 +1,43 @@
|
|||||||
|
from typing import Dict, List
|
||||||
|
|
||||||
|
class ClsRegister:
|
||||||
|
clsLst:Dict[str,Dict[str,str]] = {}
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def add(cls,catalog,name,obj) -> None:
|
||||||
|
if catalog in cls.clsLst:
|
||||||
|
registry = cls.clsLst[catalog]
|
||||||
|
registry[name] = obj
|
||||||
|
else:
|
||||||
|
registry:Dict[str,str] = {}
|
||||||
|
registry[name] = obj
|
||||||
|
cls.clsLst[catalog] = registry
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def get(cls,catalog,name,fuzzy:bool=False) -> None:
|
||||||
|
if catalog in cls.clsLst:
|
||||||
|
registry = cls.clsLst[catalog]
|
||||||
|
for key,value in registry.items():
|
||||||
|
if fuzzy:
|
||||||
|
if key in name:
|
||||||
|
return value
|
||||||
|
else:
|
||||||
|
if key == name:
|
||||||
|
return value
|
||||||
|
return None
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def getClsList(cls,catalog) -> None:
|
||||||
|
res_Lst = []
|
||||||
|
if catalog in cls.clsLst:
|
||||||
|
registry = cls.clsLst[catalog]
|
||||||
|
for key,value in registry.items():
|
||||||
|
res_Lst.append(value)
|
||||||
|
return res_Lst
|
||||||
|
|
||||||
|
|
||||||
|
def register(catalog,name):
|
||||||
|
def decorator(className):
|
||||||
|
ClsRegister.add(catalog,name,className)
|
||||||
|
return className
|
||||||
|
return decorator
|
||||||
Reference in New Issue
Block a user