Compare commits

7 Commits

7 changed files with 1148 additions and 1250 deletions
+15
View File
@@ -55,6 +55,7 @@ class JsonTable:
def comment(self):
return self._comment
class ProjectJson:
def __init__(self,dir:str) -> None:
self._dir = dir
@@ -75,5 +76,19 @@ class ProjectJson:
def tables(self):
return self._tables
def getProjectName(dir:str):
result = dir.split('\\')
if len(result) > 0:
return result[-1]
return "未知工程名称"
prjJson = ProjectJson(dir)
prjJson.parse()
tb:JsonTable = prjJson.table('工程属性')
records = tb.records()
for record in records:
name = record.value('名称')
if name == '工程名称':
return record.value('')
return ''
+36 -51
View File
@@ -2,36 +2,29 @@ from llama_index.core import PromptTemplate
text_qa_template_str = (
"# 角色\n"
"你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。"
"你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答,"
"如同直接从文件中提取的内容。\n"
"知识库中已经导入一个工程的全部数据,请你站在当前工程的角度回答用户关于工程文件的问题。\n"
"例如:询问“此工程”指当前导入的工程。询问“此工程名称”指当前导入的工程的工程名称\n"
"## 技能\n"
"### 技能 1: 数据查询与提供\n"
"- 准确回答所有关于电力工程造价的相关问题。\n"
"- 提供具体数据,如成本估算、材料清单、劳动力需求等\n"
"- 确保提供的信息严格基于工程文档中的记录\n"
"### 技能 2: 技术性解释\n"
"- 解释造价工程中的技术术语和概念\n"
"- 为复杂的工程细节提供清晰易懂的说明。\n"
"## 约束\n"
"- 仅回答与电力工程造价文件相关的具体问题。\n"
"- 不进行任何超出文件内容的猜测或假设。\n"
"- 所有回答均基于文件内容,采用客观和技术性的语言。\n"
"- 请基于这些信息回答问题。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。\n"
"以下为上下文信息\n"
"你是一名擅长博微造价工程数据问答的专家,可以根据电力工程文件中的内容回答用户问题。\n"
"\n"
"# 任务描述:\n"
"请仔细阅读所给的文档片段,并根据其内容回答问题。\n"
"您需要判断文档的内容是否可以回答问题,不要强行回答。如果可以回答,答案必须严格遵循文档内容,即使与事实不符\n"
"如果答案与事实不符,直接给出答案,不要做解释。\n"
"\n"
"# 回答规则:\n"
"- 请使用与文档材料相同的语言回答问题。\n"
"- 评估文档是否含有足够信息回答问题。无关时不要回答\n"
"- 如果问题能被回答,你的回答必须严格遵循文档内容,即使与事实不符。一定不要做多余解释\n"
"- 如果问题能被回答,直接引用文档的相关信息保证答案准确、完整,并追求简洁。\n"
"- 当文档中只有少量信息与问题相关时,重点关注这部分信息,这种情况下一定回答。\n"
"- 当文档中信息与问题无关时,请不要额外发散回答,只需要回答“我不知道这个问题的答案”\n"
"\n"
"来自多个来源的文档片段如下,请充分理解以下参考资料内容,组织出满足用户提问的条理清晰的回复。\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"请根据上下文信息而非先知识回答我的问题或回复我的指令。前面的上下文信息可能有用,也可能没用,你需要从我给出的上下文信息中选出与我的问题最相关的那些,来为你的回答提供依据。回答一定要忠于原文,简洁但不丢信息,不要胡乱编造。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。我的问题或指令是什么语种,你就用什么语种回复\n"
"鉴于来自多个来源的文档片段而非先知识,回答查询\n"
"如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
"问题:{query_str}\n"
"你的回复: "
"Query: {query_str}\n"
"Answer: "
)
text_qa_template = PromptTemplate(text_qa_template_str)
@@ -57,31 +50,26 @@ refine_template = PromptTemplate(refine_template_str)
summary_template_str = (
"# 角色\n"
"你是一名博微造价工程数据查询助手,专精于电力工程文件中的信息。"
"你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答,"
"如同直接从文件中提取的内容。\n"
"## 技能\n"
"### 技能 1: 数据查询与提供\n"
"- 准确回答所有关于电力工程造价的相关问题。\n"
"- 提供具体数据,如成本估算、材料清单、劳动力需求等。\n"
"- 确保提供的信息严格基于工程文档中的记录\n"
"### 技能 2: 技术性解释\n"
"- 解释造价工程中的技术术语和概念\n"
"- 为复杂的工程细节提供清晰易懂的说明\n"
"## 约束\n"
"- 仅回答与电力工程造价文件相关的具体问题\n"
"- 不进行任何超出文件内容的猜测或假设。\n"
"- 所有回答均基于文件内容,采用客观和技术性的语言。\n"
"- 请基于这些信息回答问题。如果无法找到相关信息,请不要额外发散回答,不要回答多余的信息,只需要回答“我不知道这个问题的答案”。\n"
"来自多个来源的上下文信息如下。\n"
"你是一名擅长博微造价工程数据问答的专家,可以根据电力工程文件中的内容回答用户问题。\n"
"\n"
"# 任务描述:\n"
"请仔细阅读所给的文档片段,并根据其内容回答问题。\n"
"您需要判断文档的内容是否可以回答问题,不要强行回答。如果可以回答,答案必须严格遵循文档内容,即使与事实不符。\n"
"如果答案与事实不符,直接给出答案,不要做解释。\n"
"\n"
"# 回答规则:\n"
"- 请使用与文档材料相同的语言回答问题\n"
"- 评估文档是否含有足够信息回答问题。无关时不要回答。\n"
"- 如果问题能被回答,你的回答必须严格遵循文档内容,即使与事实不符。一定不要做多余解释\n"
"- 如果问题能被回答,直接引用文档的相关信息保证答案准确、完整,并追求简洁\n"
"- 当文档中只有少量信息与问题相关时,重点关注这部分信息,这种情况下一定回答\n"
"- 当文档中信息与问题无关时,请不要额外发散回答,只需要回答“我不知道这个问题的答案”。\n"
"\n"
"来自多个来源的文档片段如下,请充分理解以下参考资料内容,组织出满足用户提问的条理清晰的回复\n"
"---------------------\n"
"{context_str}\n"
"---------------------\n"
"鉴于来自多个来源的信息而非先验知识, "
"回答查询。\n"
"鉴于来自多个来源的文档片段而非先验知识,回答查询。\n"
"如果是表结构或者是数据库的相关内容,只用于推导问题,不需要告诉用户数据库或表结构等物理信息。\n"
"Query: {query_str}\n"
"Answer: "
@@ -108,9 +96,6 @@ ReActChatFormatter_messages = (
"Action Input: 输入给工具的内容,使用JSON格式表示kwargs(例如{{\"input\": \"hello world\", \"num_beams\": 5}}\n"
"'''\n\n"
"请始终以Thought开始。\n\n"
"请始终以Thought开始。\n\n"
"请始终以Thought开始。\n\n"
"请始终以Thought开始。\n\n"
"切勿用Markdown代码标记包围你的响应。如果需要,可以在响应中使用代码标记。\n\n"
"请为Action Input使用有效的JSON格式。不要这样做{{\'input\': \'hello world\', \'num_beams\': 5}}。\n\n"
"如果使用此格式,用户将以下面的格式进行回应:\n\n"
+6 -3
View File
@@ -3,15 +3,18 @@ from typing import Dict
from abc import abstractmethod
from llama_index.core.constants import DEFAULT_TEMPERATURE
from llama_index.core.settings import Settings
from llama_index.embeddings.xinference import XinferenceEmbedding
from llama_index.llms.xinference import Xinference
#from llama_index.embeddings.xinference import XinferenceEmbedding
from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP
from llama_index.postprocessor.xinference_rerank import XinferenceRerank
from app.xinference.base import XinferenceEmbedding, XinferenceRerank
from app.engine.loaders import getProjectInfos
from app.api.routers.request.base import ProjectInfo
from modelProvide.customDashScope import CustomDashScope
from util.register import *
from llama_index.core.callbacks import CallbackManager
from modelProvide.customDashScope import CustomDashScope
ModelPlateCategory = '模型平台'
@@ -103,7 +106,7 @@ class XinferencePlatform(ModelPlatform):
embed_model_name = os.getenv("EMBEDDING_MODEL")
dimensions = os.getenv("EMBEDDING_DIM")
dimensions = int(dimensions) if dimensions is not None else None
return XinferenceEmbedding(embed_model_name, embedding_base_url, dimensions=dimensions)
return XinferenceEmbedding(embed_model_name, embedding_base_url)
def rerank(self):
rerank_model = os.getenv("RERANK_MODEL")
View File
-272
View File
@@ -1,272 +0,0 @@
"""Xinference embeddings file."""
import logging
from enum import Enum
from http import HTTPStatus
from typing import Any, Dict, List, Optional, Union, Tuple
from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding, dispatcher
from llama_index.core.bridge.pydantic import PrivateAttr
from llama_index.core.callbacks import CBEventType, EventPayload
from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding
from llama_index.core.instrumentation.events.rerank import ReRankStartEvent, ReRankEndEvent
from llama_index.core.postprocessor.types import BaseNodePostprocessor
from llama_index.core.schema import ImageType, NodeWithScore, QueryBundle
from pydantic import Field
logger = logging.getLogger(__name__)
EMBED_MAX_INPUT_LENGTH = 2048
EMBED_MAX_BATCH_SIZE = 1
class XinferenceEmbedding(BaseEmbedding):
"""Xinference class for text embedding.
"""
model_description: Dict[str, Any] = Field(
description="The model description from Xinference."
)
_generator: Any = PrivateAttr()
_model_uid: str = Field(description="The Xinference model to use.")
_endpoint: str = Field(description="The Xinference endpoint URL to use.")
def __init__(
self,
model_uid: str,
endpoint: str,
embed_batch_size: int = EMBED_MAX_BATCH_SIZE,
dimensions: Optional[int] = None,
additional_kwargs: Optional[Dict[str, Any]] = None,
api_key: Optional[str] = None,
api_base: Optional[str] = None,
api_version: Optional[str] = None,
max_retries: int = 10,
# timeout: float = 60.0,
# reuse_client: bool = True,
# callback_manager: Optional[CallbackManager] = None,
# default_headers: Optional[Dict[str, str]] = None,
# http_client: Optional[httpx.Client] = None,
# async_http_client: Optional[httpx.AsyncClient] = None,
# num_workers: Optional[int] = None,
**kwargs: Any,
) -> None:
generator, model_description, embed_batch_size, dimensions = self.load_model(
model_uid, endpoint
)
self._generator = generator
#self._model_uid = model_uid
#self._endpoint = endpoint
super().__init__(
embed_batch_size=embed_batch_size,
dimensions=dimensions,
#callback_manager=callback_manager,
model_name=model_uid,
additional_kwargs=additional_kwargs,
api_key=api_key,
api_base=api_base,
api_version=api_version,
max_retries=max_retries,
# reuse_client=reuse_client,
# timeout=timeout,
# default_headers=default_headers,
# num_workers=num_workers,
**kwargs,
)
def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
try:
from xinference.client import RESTfulClient
except ImportError:
raise ImportError(
"Could not import Xinference library."
'Please install Xinference with `pip install "xinference[all]"`'
)
client = RESTfulClient(endpoint)
try:
assert isinstance(client, RESTfulClient)
except AssertionError:
raise RuntimeError(
"Could not create RESTfulClient instance."
"Please make sure Xinference endpoint is running at the correct port."
)
generator = client.get_model(model_uid)
model_description = client.list_models()[model_uid]
try:
assert generator is not None
assert model_description is not None
except AssertionError:
raise RuntimeError(
"Could not get model from endpoint."
"Please make sure Xinference endpoint is running at the correct port."
)
model = model_description["model_name"]
replica = model_description['replica']
dimensions = model_description['dimensions']
max_tokens = model_description['max_tokens']
return generator, model_description, replica, dimensions
@classmethod
def class_name(cls) -> str:
return "XinferenceEmbedding"
def _get_text_embedding(self, text: str) -> Embedding:
"""
Embed the input text synchronously.
Subclasses should implement this method. Reference get_text_embedding's
docstring for more information.
"""
assert self._generator is not None
response = self._generator.create_embedding(input=text)
return response['data'][0]['embedding']
def _get_query_embedding(self, query: str) -> Embedding:
"""
Embed the input query synchronously.
Subclasses should implement this method. Reference get_query_embedding's
docstring for more information.
"""
return self._get_text_embedding(query)
async def _aget_query_embedding(self, query: str) -> Embedding:
"""
Embed the input query asynchronously.
Subclasses should implement this method. Reference get_query_embedding's
docstring for more information.
"""
return self._get_query_embedding(query)
class XinferenceRerank(BaseNodePostprocessor):
"""Xinference class for rerank.
"""
model_description: Dict[str, Any] = Field(
description="The model description from Xinference."
)
_generator: Any = PrivateAttr()
_model_uid: str = Field(description="The Xinference model to use.")
_endpoint: str = Field(description="The Xinference endpoint URL to use.")
model: str = Field(description="Dashscope rerank model name.")
top_n: int = Field(description="Top N nodes to return.")
threshold: float = Field(description="threshold nodes to return.")
def __init__(
self,
model_uid: str,
endpoint: str,
top_n: int = None,
threshold: float = None,
return_documents: bool = False
):
_model_uid = model_uid
_endpoint = endpoint
_op_n = top_n
threshold = threshold
generator, model_description = self.load_model(
model_uid, endpoint
)
self._generator = generator
super().__init__(top_n=top_n, model=model_uid, model_uid=model_uid, threshold = threshold, return_documents=return_documents)
@classmethod
def class_name(cls) -> str:
return "XinferenceRerank"
def _postprocess_nodes(
self,
nodes: List[NodeWithScore],
query_bundle: Optional[QueryBundle] = None,
) -> List[NodeWithScore]:
if query_bundle is None:
raise ValueError("Missing query bundle in extra info.")
if len(nodes) == 0:
return []
dispatcher.event(
ReRankStartEvent(
nodes = nodes,
top_n = self.top_n,
query = query_bundle,
model_name = self.model
)
)
with self.callback_manager.event(
CBEventType.RERANKING,
payload={
EventPayload.NODES: nodes,
EventPayload.MODEL_NAME: self._model_uid,
EventPayload.QUERY_STR: query_bundle.query_str,
EventPayload.TOP_K: self.top_n,
},
) as event:
texts = [node.node.get_content() for node in nodes]
response = self._generator.rerank(texts,query_bundle.query_str)
new_nodes = []
for result in response['results']:
new_node_with_score = NodeWithScore(
node=nodes[result['index']].node, score=result['relevance_score']
)
if self.threshold is not None:
if new_node_with_score.score >=self.threshold:
new_nodes.append(new_node_with_score)
if self.top_n is not None:
if len(new_nodes) > self.top_n:
for index in new_nodes[self.top_n:-1]:
new_nodes.remove(index)
event.on_end(payload={EventPayload.NODES: new_nodes})
dispatcher.event(
ReRankEndEvent(
nodes= new_nodes
)
)
return new_nodes
def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
try:
from xinference.client import RESTfulClient
except ImportError:
raise ImportError(
"Could not import Xinference library."
'Please install Xinference with `pip install "xinference[all]"`'
)
client = RESTfulClient(endpoint)
try:
assert isinstance(client, RESTfulClient)
except AssertionError:
raise RuntimeError(
"Could not create RESTfulClient instance."
"Please make sure Xinference endpoint is running at the correct port."
)
generator = client.get_model(model_uid)
model_description = client.list_models()[model_uid]
try:
assert generator is not None
assert model_description is not None
except AssertionError:
raise RuntimeError(
"Could not get model from endpoint."
"Please make sure Xinference endpoint is running at the correct port."
)
model = model_description["model_name"]
return generator, model_description
+1053 -896
View File
File diff suppressed because it is too large Load Diff
+38 -28
View File
@@ -10,46 +10,54 @@ readme = "README.md"
generate = "app.engine.generate:generate_datasource"
[tool.poetry.dependencies]
python = "^3.11,<3.12"
fastapi = "^0.110.3"
python-dotenv = "^1.0.0"
python = "^3.11,<3.13"
fastapi = "0.110.3"
python-dotenv = "^1.0.1"
aiostream = "^0.6.2"
llama-index = "0.10.63"
cachetools = "^5.3.3"
cachetools = "^5.5.0"
protobuf = "4.25.4"
nltk = "^3.9.1"
jieba = "^0.42.1"
#arize-phoenix = "^4.12.0"
openinference-instrumentation-llama-index="2.2.3"
llama-index-callbacks-arize-phoenix = "^0.1.4"
llama-index-llms-dashscope = "^0.1.2"
llama-index-embeddings-dashscope = "^0.1.4"
llama-index-postprocessor-dashscope-rerank-custom = "0.1.0"
xinference = "^0.14.1"
xinference-client = "^0.14.1"
llama-index-llms-xinference = "^0.1.2"
qdrant-client="^1.10.1"
llama-index-vector-stores-qdrant = "^0.2.14"
chroma="^0.2.0"
llama-index-vector-stores-chroma = "^0.1.10"
llama-index-readers-json = "^0.1.5"
llama-index-retrievers-bm25 = "^0.2.2"
llama-index-experimental = "^0.1.4"
llama-index-llms-ollama = "^0.1.6"
llama-index-embeddings-ollama = "^0.1.3"
transformers = "^4.43.0"
duckduckgo_search = "^6.2.6"
#arize-phoenix = "^4.12.0"
openinference-instrumentation-llama-index="^3.0.2"
llama-index = "^0.11.7"
llama-index-core = "^0.11.7"
llama-index-callbacks-arize-phoenix = "^0.2.1"
llama-index-llms-dashscope = "^0.2.0"
llama-index-embeddings-dashscope = "^0.2.1"
#llama-index-postprocessor-dashscope-rerank = "^0.2.0"
llama-index-llms-ollama = "^0.3.1"
llama-index-embeddings-ollama = "^0.3.0"
xinference = "^0.15.0"
xinference-client = "^0.15.0"
llama-index-llms-xinference = "^0.2.1"
llama-index-embeddings-xinference = "^0.1.0"
llama-index-postprocessor-xinference-rerank = "^0.1.0"
qdrant-client="^1.11.0"
llama-index-vector-stores-qdrant = "^0.3.0"
chroma="^0.2.0"
llama-index-vector-stores-chroma = "^0.2.0"
llama-index-readers-json = "^0.2.0"
llama-index-retrievers-bm25 = "^0.3.0"
llama-index-experimental = "^0.3.0"
duckduckgo_search = "^6.2.10"
[tool.poetry.dependencies.uvicorn]
extras = [ "standard" ]
version = "^0.23.2"
version = "^0.30.6"
[tool.poetry.dependencies.llama-index-readers-database]
version = "^0.1.3"
version = "^0.2.0"
[tool.poetry.dependencies.pymysql]
version = "^1.1.0"
version = "^1.1.1"
extras = [ "rsa" ]
#[tool.poetry.dependencies.psycopg2]
@@ -62,7 +70,8 @@ extras = [ "rsa" ]
version = "^0.8"
[tool.poetry.dependencies.e2b_code_interpreter]
version = "0.0.7"
version = "^0.0.7"
[[tool.poetry.source]]
@@ -82,6 +91,7 @@ name = "tsinghua"
url = "https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple/"
priority = "primary"
[build-system]
requires = [ "poetry-core" ]
build-backend = "poetry.core.masonry.api"