初始化提交

This commit is contained in:
2024-08-13 09:37:23 +08:00
parent 4923337038
commit e112fa4e44
50 changed files with 1649 additions and 259 deletions
+4 -4
View File
@@ -20,9 +20,9 @@ class CallbackEvent(BaseModel):
if self.payload: if self.payload:
nodes = self.payload.get("nodes") nodes = self.payload.get("nodes")
if nodes: if nodes:
msg = f"Retrieved {len(nodes)} sources to use as context for the query" msg = f"根据查询检索到 {len(nodes)} 源文件"
else: else:
msg = f"Retrieving context for query: '{self.payload.get('query_str')}'" msg = f"查询检索中: '{self.payload.get('query_str')}'"
return { return {
"type": "events", "type": "events",
"data": {"title": msg}, "data": {"title": msg},
@@ -37,7 +37,7 @@ class CallbackEvent(BaseModel):
return { return {
"type": "events", "type": "events",
"data": { "data": {
"title": f"Calling tool: {tool.name} with inputs: {func_call_args}", "title": f"调用工具 {tool.name} ,参数: {func_call_args}",
}, },
} }
@@ -87,7 +87,7 @@ class CallbackEvent(BaseModel):
case _: case _:
return None return None
except Exception as e: except Exception as e:
logger.error(f"Error in converting event to response: {e}") logger.error(f"转换回应时间时发生错误,原因: {e}")
return None return None
+2 -2
View File
@@ -173,12 +173,12 @@ class SourceNodes(BaseModel):
def from_source_node(cls, source_node: NodeWithScore): def from_source_node(cls, source_node: NodeWithScore):
metadata = source_node.node.metadata metadata = source_node.node.metadata
url = cls.get_url_from_metadata(metadata) url = cls.get_url_from_metadata(metadata)
text = 'filename' in metadata and metadata['filename'] or source_node.node.node_id
return cls( return cls(
id=source_node.node.node_id, id=source_node.node.node_id,
metadata=metadata, metadata=metadata,
score=source_node.score, score=source_node.score,
text=source_node.node.text, # type: ignore text=text, # type: ignore
url=url, url=url,
) )
+54 -4
View File
@@ -1,24 +1,67 @@
import os import os
from llama_index.core import SQLDatabase, SummaryIndex, VectorStoreIndex
from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
from llama_index.core.settings import Settings from llama_index.core.settings import Settings
from llama_index.core.agent import AgentRunner from llama_index.core.agent import AgentRunner, StructuredPlannerAgent, FunctionCallingAgentWorker
from llama_index.core.tools.query_engine import QueryEngineTool from llama_index.core.tools.query_engine import QueryEngineTool
from sqlalchemy import create_engine, Engine
from app.engine.loaders.db import makeDescriptionByEngine
from app.engine.tools import ToolFactory from app.engine.tools import ToolFactory
from app.engine.index import get_index from app.engine.index import get_index
sql_database = None
sql_obj_index = None
def get_chat_engine(filters=None, params=None): def get_chat_engine(filters=None, params=None):
system_prompt = os.getenv("SYSTEM_PROMPT") system_prompt = os.getenv("SYSTEM_PROMPT")
top_k = os.getenv("TOP_K", "3") top_k = int(os.getenv("TOP_K", "3"))
tools = [] tools = []
global sql_obj_index
global sql_database
if sql_obj_index is None:
sqlengine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
sql_database = SQLDatabase(sqlengine)
table_schema_objs = makeDescriptionByEngine(sql_database)
table_node_mapping = SQLTableNodeMapping(sql_database)
sql_obj_index = ObjectIndex.from_objects(
table_schema_objs,
table_node_mapping,
index_cls=VectorStoreIndex,
)
# 创建SQL查询工具
sql_query_engine = SQLTableRetrieverQueryEngine(sql_database,
sql_obj_index.as_retriever(similarity_top_k=top_k),
verbose=True,)
sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine,
name="zjdata_query_tool",
description="来源于一个由博微公司电力造价软件编制的造价工程文件。该文件以多张表格的形式存储存储了整个工程的全部数据内容。适用于以详细的自然语言查询表格数据方式查询造价工程各项具体属性、费用的数值。请先使用“zj_query_tool”无法解决才使用本工具")
# Add query tool if index exists # Add query tool if index exists
index = get_index() index = get_index()
if index is not None: if index is not None:
summary_index = SummaryIndex(index.vector_store.get_nodes(node_ids=None))
summary_query_engine = summary_index.as_query_engine()
summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool",
description="适用于任何需要进行全面总结、概括的要求。",
#description="适用于任何需要对所有内容进行全面总结的请求。有关电力造价领域更具体部分的问题,请使用zj_query_engine_tool",
)
# 创建向量检索查询工具
query_engine = index.as_query_engine( query_engine = index.as_query_engine(
similarity_top_k=int(top_k), filters=filters similarity_top_k=top_k, filters=filters
) )
query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine) query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool",
description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后建议使用“zjdata_query_tool”工具。",
)
tools.append(summary_query_tool)
tools.append(query_engine_tool) tools.append(query_engine_tool)
#tools.append(sql_query_tool)
# Add additional tools # Add additional tools
tools += ToolFactory.from_env() tools += ToolFactory.from_env()
@@ -29,3 +72,10 @@ def get_chat_engine(filters=None, params=None):
system_prompt=system_prompt, system_prompt=system_prompt,
verbose=True, verbose=True,
) )
# create the function calling worker for reasoning
# worker = FunctionCallingAgentWorker.from_tools(
# tools, verbose=True
# )
#
# # wrap the worker in the top-level planner
# return StructuredPlannerAgent(worker, tools)
+1
View File
@@ -0,0 +1 @@
STORAGE_DIR = "storage" # directory to cache the generated index
+60 -26
View File
@@ -2,50 +2,84 @@ from dotenv import load_dotenv
load_dotenv() load_dotenv()
import os
import logging import logging
from app.settings import init_settings import os
from app.engine.loaders import get_documents
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex
from app.engine.loaders import get_documents
from app.engine.vectordb import get_vector_store
from app.settings import init_settings
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core.settings import Settings
from llama_index.core.storage import StorageContext
from llama_index.core.storage.docstore import SimpleDocumentStore
logging.basicConfig(level=logging.INFO) logging.basicConfig(level=logging.INFO)
logger = logging.getLogger() logger = logging.getLogger()
STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
def get_doc_store():
# If the storage directory is there, load the document store from it.
# If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
if os.path.exists(STORAGE_DIR):
return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
else:
return SimpleDocumentStore()
def run_pipeline(docstore, vector_store, documents):
pipeline = IngestionPipeline(
transformations=[
SentenceSplitter(
chunk_size=Settings.chunk_size,
chunk_overlap=Settings.chunk_overlap,
),
Settings.embed_model,
],
docstore=docstore,
docstore_strategy="upserts_and_delete",
vector_store=vector_store,
)
# Run the ingestion pipeline and store the results
nodes = pipeline.run(show_progress=True, documents=documents)
return nodes
def persist_storage(docstore, vector_store):
storage_context = StorageContext.from_defaults(
docstore=docstore,
vector_store=vector_store,
)
storage_context.persist(STORAGE_DIR)
def generate_datasource(): def generate_datasource():
init_settings() init_settings()
logger.info("Generate index for the provided data") logger.info("Generate index for the provided data")
name = os.getenv("LLAMA_CLOUD_INDEX_NAME") # Get the stores and documents or create new ones
project_name = os.getenv("LLAMA_CLOUD_PROJECT_NAME")
api_key = os.getenv("LLAMA_CLOUD_API_KEY")
base_url = os.getenv("LLAMA_CLOUD_BASE_URL")
organization_id = os.getenv("LLAMA_CLOUD_ORGANIZATION_ID")
if name is None or project_name is None or api_key is None:
raise ValueError(
"Please set LLAMA_CLOUD_INDEX_NAME, LLAMA_CLOUD_PROJECT_NAME and LLAMA_CLOUD_API_KEY"
" to your environment variables or config them in .env file"
)
documents = get_documents() documents = get_documents()
# Set private=false to mark the document as public (required for filtering) # Set private=false to mark the document as public (required for filtering)
for doc in documents: for doc in documents:
doc.metadata["private"] = "false" doc.metadata["private"] = "false"
docstore = get_doc_store()
vector_store = get_vector_store()
LlamaCloudIndex.from_documents( # Run the ingestion pipeline
documents=documents, _ = run_pipeline(docstore, vector_store, documents)
name=name,
project_name=project_name, # Build the index and persist storage
api_key=api_key, persist_storage(docstore, vector_store)
base_url=base_url,
organization_id=organization_id
)
logger.info("Finished generating the index") logger.info("Finished generating the index")
if __name__ == "__main__": if __name__ == "__main__":
generate_datasource() from phoenix.trace import using_project
with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj:
generate_datasource()
+13 -22
View File
@@ -1,31 +1,22 @@
import logging import logging
import os from llama_index.core.indices import VectorStoreIndex
from llama_index.indices.managed.llama_cloud import LlamaCloudIndex from app.engine.vectordb import get_vector_store
logger = logging.getLogger("uvicorn") logger = logging.getLogger("uvicorn")
index = None
def get_index(params=None): def get_index(params=None):
configParams = params or {} global index
pipelineConfig = configParams.get("llamaCloudPipeline", {}) if index is None:
name = pipelineConfig.get("pipeline", os.getenv("LLAMA_CLOUD_INDEX_NAME")) logger.info("Connecting vector store...")
project_name = pipelineConfig.get("project", os.getenv("LLAMA_CLOUD_PROJECT_NAME"))
api_key = os.getenv("LLAMA_CLOUD_API_KEY")
base_url = os.getenv("LLAMA_CLOUD_BASE_URL")
organization_id = os.getenv("LLAMA_CLOUD_ORGANIZATION_ID")
if name is None or project_name is None or api_key is None: store = get_vector_store()
raise ValueError( # Load the index from the vector store
"Please set LLAMA_CLOUD_INDEX_NAME, LLAMA_CLOUD_PROJECT_NAME and LLAMA_CLOUD_API_KEY" # If you are using a vector store that doesn't store text,
" to your environment variables or config them in .env file" # you must load the index from both the vector store and the document store
) index = VectorStoreIndex.from_vector_store(store)
logger.info("Finished load index from vector store.")
index = LlamaCloudIndex(
name=name,
project_name=project_name,
api_key=api_key,
base_url=base_url,
organization_id=organization_id
)
return index return index
+6 -3
View File
@@ -17,19 +17,22 @@ def load_configs():
def get_documents(): def get_documents():
documents = [] documents = []
config = load_configs() config = load_configs()
if config is None or len(config.items()) == 0:
return documents
for loader_type, loader_config in config.items(): for loader_type, loader_config in config.items():
logger.info( logger.info(
f"Loading documents from loader: {loader_type}, config: {loader_config}" f"Loading documents from loader: {loader_type}, config: {loader_config}"
) )
loader_config = loader_config or []
match loader_type: match loader_type:
case "file": case "file":
document = get_file_documents(FileLoaderConfig(**loader_config)) document = get_file_documents(FileLoaderConfig(**loader_config))
case "web": case "web":
document = get_web_documents(WebLoaderConfig(**loader_config)) document = get_web_documents(WebLoaderConfig(**loader_config))
case "db": case "db":
document = get_db_documents( document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config])
configs=[DBLoaderConfig(**cfg) for cfg in loader_config]
)
case _: case _:
raise ValueError(f"Invalid loader type: {loader_type}") raise ValueError(f"Invalid loader type: {loader_type}")
documents.extend(document) documents.extend(document)
+167 -6
View File
@@ -1,26 +1,187 @@
import os import os
import logging import logging
from typing import List from typing import List
from typing import Any, List, Optional
from llama_index.core.readers.base import BaseReader
from llama_index.core.schema import Document
from llama_index.core.utilities.sql_wrapper import SQLDatabase
from sqlalchemy import text
from sqlalchemy.engine import Engine
from llama_index.core import SQLDatabase, Document
from llama_index.core.objects import SQLTableSchema, SQLTableNodeMapping
from llama_index.core.readers.base import BaseReader
from llama_index.readers.database import DatabaseReader
from pydantic import BaseModel, validator from pydantic import BaseModel, validator
from llama_index.core.indices.vector_store import VectorStoreIndex from llama_index.core.indices.vector_store import VectorStoreIndex
from sqlalchemy import create_engine
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class CustomDatabaseReader(BaseReader):
"""Simple Database reader.
Concatenates each row into Document used by LlamaIndex.
Args:
sql_database (Optional[SQLDatabase]): SQL database to use,
including table names to specify.
See :ref:`Ref-Struct-Store` for more details.
OR
engine (Optional[Engine]): SQLAlchemy Engine object of the database connection.
OR
uri (Optional[str]): uri of the database connection.
OR
scheme (Optional[str]): scheme of the database connection.
host (Optional[str]): host of the database connection.
port (Optional[int]): port of the database connection.
user (Optional[str]): user of the database connection.
password (Optional[str]): password of the database connection.
dbname (Optional[str]): dbname of the database connection.
Returns:
DatabaseReader: A DatabaseReader object.
"""
def __init__(
self,
sql_database: Optional[SQLDatabase] = None,
engine: Optional[Engine] = None,
uri: Optional[str] = None,
scheme: Optional[str] = None,
host: Optional[str] = None,
port: Optional[str] = None,
user: Optional[str] = None,
password: Optional[str] = None,
dbname: Optional[str] = None,
*args: Any,
**kwargs: Any,
) -> None:
"""Initialize with parameters."""
if sql_database:
self.sql_database = sql_database
elif engine:
self.sql_database = SQLDatabase(engine, *args, **kwargs)
elif uri:
self.uri = uri
self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs)
elif scheme and host and port and user and password and dbname:
uri = f"{scheme}://{user}:{password}@{host}:{port}/{dbname}"
self.uri = uri
self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs)
else:
raise ValueError(
"You must provide either a SQLDatabase, "
"a SQL Alchemy Engine, a valid connection URI, or a valid "
"set of credentials."
)
def load_data(self, query: str) -> List[Document]:
"""Query and load data from the Database, returning a list of Documents.
Args:
query (str): Query parameter to filter tables and rows.
Returns:
List[Document]: A list of Document objects.
"""
dco_str = ""
with self.sql_database.engine.connect() as connection:
if query is None:
raise ValueError("A query parameter is necessary to filter the data")
else:
result = connection.execute(text(query))
dco_str = ", ".join(
[f"{entry}" for entry in result.keys()]
)
for item in result.fetchall():
# fetch each item
record_str = ", ".join(
[f"{entry}" for col, entry in zip(result.keys(), item)]
)
dco_str += record_str + "\n"
doc = Document(text=dco_str)
doc.metadata["name"] = query
doc.metadata["context"] = query
doc.metadata["file_type"] = "application/vnd.ms-excel"
return [doc]
class DBLoaderConfig(BaseModel): class DBLoaderConfig(BaseModel):
uri: str uri: str
queries: List[str] queries: List[str]
def makeDescriptionByEngine(sql_database:SQLDatabase):
reader = DatabaseReader(sql_database)
table_names = sql_database.get_usable_table_names()
table_schema_objs = []
for table_name in table_names:
columns = sql_database.get_table_columns(table_name)
if len(columns) > 150:
continue
stats_txt = ""
if table_name == 'gongchengshuxing':
stats_txt = '该表中有以下属性:'
documents = reader.load_data(query='select name from gongchengshuxing')
for index in range(len(documents) if len(documents) < 30 else 30):
if index == 0:
continue
elif index > 1:
stats_txt += ','
stats_txt += documents[index].text.split(':')[1]
tbSchema = (SQLTableSchema(table_name=table_name, context_str=stats_txt))
table_schema_objs.append(tbSchema)
return table_schema_objs
def get_db_documents(configs: list[DBLoaderConfig]): def get_db_documents(configs: list[DBLoaderConfig]):
from llama_index.readers.database import DatabaseReader
docs = [] docs = []
if len(configs) == 0 or configs[0].uri == "":
logger.warning(
f"Failed to load database, error message: uri is empty. Return as empty document list."
)
return docs
metadata = {
#'file_name':'',
'file_type':'application/booway.document.zj',
#'file_path':'',
#'file_size':'',
#'creation_date':'',
#'last_modified_date':'',
}
#from llama_index.readers.database import DatabaseReader
for entry in configs: for entry in configs:
loader = DatabaseReader(uri=entry.uri) engine = create_engine(entry.uri)
for query in entry.queries: sql_database = SQLDatabase(engine)
table_schema_objs = makeDescriptionByEngine(sql_database)
table_node_mapping = SQLTableNodeMapping(sql_database)
nodes = table_node_mapping.to_nodes(table_schema_objs)
for node in nodes:
node.metadata.update(metadata)
docs.extend(nodes)
queries = entry.queries or []
loader = CustomDatabaseReader(sql_database)
for query in queries:
logger.info(f"Loading data from database with query: {query}") logger.info(f"Loading data from database with query: {query}")
documents = loader.load_data(query=query) documents = loader.load_data(query=query)
docs.extend(documents)
return documents docs.extend(documents)
return docs
+9
View File
@@ -1,6 +1,9 @@
import os import os
import logging import logging
from typing import Dict from typing import Dict
from llama_index.core.readers.base import BaseReader
from llama_index.core.readers.json import JSONReader
from llama_parse import LlamaParse from llama_parse import LlamaParse
from pydantic import BaseModel, validator from pydantic import BaseModel, validator
@@ -39,6 +42,9 @@ def llama_parse_extractor() -> Dict[str, LlamaParse]:
parser = llama_parse_parser() parser = llama_parse_parser()
return {file_type: parser for file_type in SUPPORTED_FILE_TYPES} return {file_type: parser for file_type in SUPPORTED_FILE_TYPES}
def llama_local_extractor() -> Dict[str, BaseReader]:
return {"json" : JSONReader}
def get_file_documents(config: FileLoaderConfig): def get_file_documents(config: FileLoaderConfig):
from llama_index.core.readers import SimpleDirectoryReader from llama_index.core.readers import SimpleDirectoryReader
@@ -53,6 +59,9 @@ def get_file_documents(config: FileLoaderConfig):
nest_asyncio.apply() nest_asyncio.apply()
file_extractor = llama_parse_extractor() file_extractor = llama_parse_extractor()
else:
file_extractor = llama_local_extractor()
reader = SimpleDirectoryReader( reader = SimpleDirectoryReader(
config.data_dir, config.data_dir,
recursive=True, recursive=True,
+2 -1
View File
@@ -11,7 +11,7 @@ class CrawlUrl(BaseModel):
class WebLoaderConfig(BaseModel): class WebLoaderConfig(BaseModel):
driver_arguments: list[str] = Field(default=None) driver_arguments: list[str] = Field(default=None)
urls: list[CrawlUrl] urls: list[CrawlUrl] = []
def get_web_documents(config: WebLoaderConfig): def get_web_documents(config: WebLoaderConfig):
@@ -25,6 +25,7 @@ def get_web_documents(config: WebLoaderConfig):
options.add_argument(arg) options.add_argument(arg)
docs = [] docs = []
urls = config.urls or []
for url in config.urls: for url in config.urls:
scraper = WholeSiteReader( scraper = WholeSiteReader(
prefix=url.prefix, prefix=url.prefix,
+9 -5
View File
@@ -48,9 +48,13 @@ class ToolFactory:
if os.path.exists("config/tools.yaml"): if os.path.exists("config/tools.yaml"):
with open("config/tools.yaml", "r") as f: with open("config/tools.yaml", "r") as f:
tool_configs = yaml.safe_load(f) tool_configs = yaml.safe_load(f)
for tool_type, config_entries in tool_configs.items(): if tool_configs != None and len(tool_configs.items()) != 0:
for tool_name, config in config_entries.items(): for tool_type, config_entries in tool_configs.items():
tools.extend( if config_entries == None or len(config_entries.items()) == 0:
ToolFactory.load_tools(tool_type, tool_name, config) continue
)
for tool_name, config in config_entries.items():
tools.extend(
ToolFactory.load_tools(tool_type, tool_name, config)
)
return tools return tools
+71
View File
@@ -0,0 +1,71 @@
import os
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client import qdrant_client
qclient = None
def get_qdrant_vector_store():
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
vector_store_path = os.getenv("VECTOR_STORE_PATH")
host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"),
port=int(os.getenv("VECTOR_STORE_PORT", "6333")),
if not vector_store_path or not host:
raise ValueError(
"Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
)
# if VECTOR_STORE_PATH is set, use a local QdrantVectorStore from the path
# otherwise, use a remote QdrantVectorStore
global qclient
if qclient == None:
if vector_store_path:
qclient = qdrant_client.QdrantClient(
path=vector_store_path,
)
else:
qclient = qdrant_client.QdrantClient(
host=host,
port=port,
)
vector_store = QdrantVectorStore(client=qclient, collection_name=collection_name)
return vector_store
def get_chroma_vector_store():
collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
vector_store_path = os.getenv("VECTOR_STORE_PATH")
# if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path
# otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
if vector_store_path:
store = ChromaVectorStore.from_params(
persist_dir=vector_store_path, collection_name=collection_name,
collection_kwargs={"metadata":{"hnsw:space":"cosine"}},
)
else:
if not os.getenv("VECTOR_STORE_HOST") or not os.getenv("VECTOR_STORE_PORT"):
raise ValueError(
"Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
)
store = ChromaVectorStore.from_params(
host=os.getenv("VECTOR_STORE_HOST"),
port=int(os.getenv("VECTOR_STORE_PORT")),
collection_name=collection_name,
collection_kwargs={"metadata":{"hnsw:space":"cosine"}},
)
return store
def get_vector_store():
store_type=os.getenv("VECTOR_STORE_TYPE")
store = None
match store_type:
case "chroma":
store = get_chroma_vector_store()
case "qdrant":
store = get_qdrant_vector_store()
case _:
raise ValueError(f"Invalid vector store type: {store_type}")
return store
+19 -1
View File
@@ -1,2 +1,20 @@
import os
import llama_index.core
def init_observability(): def init_observability():
pass
PHOENIX_API_KEY = os.getenv("PHOENIX_API_KEY")
if not PHOENIX_API_KEY:
raise ValueError("PHOENIX_API_KEY environment variable is not set")
os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"
PHOENIX_URL = os.getenv("PHOENIX_URL")
llama_index.core.set_global_handler(
"arize_phoenix", endpoint=PHOENIX_URL, eval_params={}
)
#debugHandle=[]
# llama_debug = LlamaDebugHandler(print_trace_on_end=True)
# debugHandle.append(llama_debug)
# callback_manager = CallbackManager(debugHandle)
# settings.Settings.callback_manager = callback_manager
+125 -96
View File
@@ -1,6 +1,7 @@
import os import os
from typing import Dict from typing import Dict
from llama_index.core.constants import DEFAULT_TEMPERATURE
from llama_index.core.settings import Settings from llama_index.core.settings import Settings
@@ -9,6 +10,8 @@ def init_settings():
match model_provider: match model_provider:
case "openai": case "openai":
init_openai() init_openai()
case "dashscope":
init_dashscope()
case "groq": case "groq":
init_groq() init_groq()
case "ollama": case "ollama":
@@ -33,20 +36,21 @@ def init_settings():
def init_ollama(): def init_ollama():
from llama_index.embeddings.ollama import OllamaEmbedding # from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama # from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama
#
base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434" # base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
request_timeout = float( # request_timeout = float(
os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT) # os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT)
) # )
Settings.embed_model = OllamaEmbedding( # Settings.embed_model = OllamaEmbedding(
base_url=base_url, # base_url=base_url,
model_name=os.getenv("EMBEDDING_MODEL"), # model_name=os.getenv("EMBEDDING_MODEL"),
) # )
Settings.llm = Ollama( # Settings.llm = Ollama(
base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout # base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout
) # )
pass
def init_openai(): def init_openai():
@@ -69,104 +73,129 @@ def init_openai():
} }
Settings.embed_model = OpenAIEmbedding(**config) Settings.embed_model = OpenAIEmbedding(**config)
def init_dashscope():
from llama_index.llms.dashscope import DashScope,DashScopeGenerationModels
from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeBatchTextEmbeddingModels,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels
max_tokens = os.getenv("LLM_MAX_TOKENS")
config = {
"model": os.getenv("MODEL"),
"temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
"max_tokens": int(max_tokens) if max_tokens is not None else None,
}
Settings.llm = llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX)
dimensions = os.getenv("EMBEDDING_DIM")
config = {
"model": os.getenv("EMBEDDING_MODEL"),
"dimensions": int(dimensions) if dimensions is not None else None,
}
Settings.embed_model = DashScopeEmbedding(model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY)
def init_azure_openai(): def init_azure_openai():
from llama_index.core.constants import DEFAULT_TEMPERATURE # from llama_index.core.constants import DEFAULT_TEMPERATURE
from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding # from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
from llama_index.llms.azure_openai import AzureOpenAI # from llama_index.llms.azure_openai import AzureOpenAI
#
llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"] # llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"]
embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"] # embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]
max_tokens = os.getenv("LLM_MAX_TOKENS") # max_tokens = os.getenv("LLM_MAX_TOKENS")
temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE) # temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
dimensions = os.getenv("EMBEDDING_DIM") # dimensions = os.getenv("EMBEDDING_DIM")
#
azure_config = { # azure_config = {
"api_key": os.environ["AZURE_OPENAI_KEY"], # "api_key": os.environ["AZURE_OPENAI_KEY"],
"azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"], # "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
"api_version": os.getenv("AZURE_OPENAI_API_VERSION") # "api_version": os.getenv("AZURE_OPENAI_API_VERSION")
or os.getenv("OPENAI_API_VERSION"), # or os.getenv("OPENAI_API_VERSION"),
} # }
#
Settings.llm = AzureOpenAI( # Settings.llm = AzureOpenAI(
model=os.getenv("MODEL"), # model=os.getenv("MODEL"),
max_tokens=int(max_tokens) if max_tokens is not None else None, # max_tokens=int(max_tokens) if max_tokens is not None else None,
temperature=float(temperature), # temperature=float(temperature),
deployment_name=llm_deployment, # deployment_name=llm_deployment,
**azure_config, # **azure_config,
) # )
#
Settings.embed_model = AzureOpenAIEmbedding( # Settings.embed_model = AzureOpenAIEmbedding(
model=os.getenv("EMBEDDING_MODEL"), # model=os.getenv("EMBEDDING_MODEL"),
dimensions=int(dimensions) if dimensions is not None else None, # dimensions=int(dimensions) if dimensions is not None else None,
deployment_name=embedding_deployment, # deployment_name=embedding_deployment,
**azure_config, # **azure_config,
) # )
pass
def init_fastembed(): def init_fastembed():
""" """
Use Qdrant Fastembed as the local embedding provider. Use Qdrant Fastembed as the local embedding provider.
""" """
from llama_index.embeddings.fastembed import FastEmbedEmbedding # from llama_index.embeddings.fastembed import FastEmbedEmbedding
#
embed_model_map: Dict[str, str] = { # embed_model_map: Dict[str, str] = {
# Small and multilingual # # Small and multilingual
"all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2", # "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
# Large and multilingual # # Large and multilingual
"paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", # noqa: E501 # "paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2", # noqa: E501
} # }
#
# This will download the model automatically if it is not already downloaded # # This will download the model automatically if it is not already downloaded
Settings.embed_model = FastEmbedEmbedding( # Settings.embed_model = FastEmbedEmbedding(
model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")] # model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")]
) # )
pass
def init_groq(): def init_groq():
from llama_index.llms.groq import Groq # from llama_index.llms.groq import Groq
#
model_map: Dict[str, str] = { # model_map: Dict[str, str] = {
"llama3-8b": "llama3-8b-8192", # "llama3-8b": "llama3-8b-8192",
"llama3-70b": "llama3-70b-8192", # "llama3-70b": "llama3-70b-8192",
"mixtral-8x7b": "mixtral-8x7b-32768", # "mixtral-8x7b": "mixtral-8x7b-32768",
} # }
#
Settings.llm = Groq(model=model_map[os.getenv("MODEL")]) # Settings.llm = Groq(model=model_map[os.getenv("MODEL")])
# Groq does not provide embeddings, so we use FastEmbed instead # # Groq does not provide embeddings, so we use FastEmbed instead
init_fastembed() # init_fastembed()
pass
def init_anthropic(): def init_anthropic():
from llama_index.llms.anthropic import Anthropic # from llama_index.llms.anthropic import Anthropic
#
model_map: Dict[str, str] = { # model_map: Dict[str, str] = {
"claude-3-opus": "claude-3-opus-20240229", # "claude-3-opus": "claude-3-opus-20240229",
"claude-3-sonnet": "claude-3-sonnet-20240229", # "claude-3-sonnet": "claude-3-sonnet-20240229",
"claude-3-haiku": "claude-3-haiku-20240307", # "claude-3-haiku": "claude-3-haiku-20240307",
"claude-2.1": "claude-2.1", # "claude-2.1": "claude-2.1",
"claude-instant-1.2": "claude-instant-1.2", # "claude-instant-1.2": "claude-instant-1.2",
} # }
#
Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")]) # Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")])
# Anthropic does not provide embeddings, so we use FastEmbed instead # # Anthropic does not provide embeddings, so we use FastEmbed instead
init_fastembed() # init_fastembed()
pass
def init_gemini(): def init_gemini():
from llama_index.embeddings.gemini import GeminiEmbedding # from llama_index.embeddings.gemini import GeminiEmbedding
from llama_index.llms.gemini import Gemini # from llama_index.llms.gemini import Gemini
#
model_name = f"models/{os.getenv('MODEL')}" # model_name = f"models/{os.getenv('MODEL')}"
embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}" # embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}"
#
Settings.llm = Gemini(model=model_name) # Settings.llm = Gemini(model=model_name)
Settings.embed_model = GeminiEmbedding(model_name=embed_model_name) # Settings.embed_model = GeminiEmbedding(model_name=embed_model_name)
pass
def init_mistral(): def init_mistral():
from llama_index.embeddings.mistralai import MistralAIEmbedding # from llama_index.embeddings.mistralai import MistralAIEmbedding
from llama_index.llms.mistralai import MistralAI # from llama_index.llms.mistralai import MistralAI
#
Settings.llm = MistralAI(model=os.getenv("MODEL")) # Settings.llm = MistralAI(model=os.getenv("MODEL"))
Settings.embed_model = MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL")) # Settings.embed_model = MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
pass
+22 -2
View File
@@ -1,10 +1,30 @@
file: file:
# use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable # use_llama_parse: Use LlamaParse if `true`. Needs a `LLAMA_CLOUD_API_KEY` from https://cloud.llamaindex.ai set as environment variable
use_llama_parse: true use_llama_parse: false
db: db:
# The configuration for the database loader, only supports MySQL and PostgreSQL databases for now. # The configuration for the database loader, only supports MySQL and PostgreSQL databases for now.
# uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db # uri: The URI for the database. E.g.: mysql+pymysql://user:password@localhost:3306/db or postgresql+psycopg2://user:password@localhost:5432/db
# query: The query to fetch data from the database. E.g.: SELECT * FROM table # query: The query to fetch data from the database. E.g.: SELECT * FROM table
- uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1 - uri: mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
#- uri: mysql+pymysql://zjinfo:Y6EAjEEdSYmskA8B@110.42.234.166:3306/zjinfo
# - uri: mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
queries: queries:
- SELECT * FROM mytable - select * from ProjectProperties limit 30;
- select Name, Code, Amount, Amount_Total from TotalCalculateTable
- select SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 1 limit 30;
- select Name, Code, Rate, Amount from OtherFee
#web:
# driver_arguments:
# # The arguments to pass to the webdriver. E.g.: add --headless to run in headless mode
# - --no-sandbox
# - --disable-dev-shm-usage
# urls:
# # base_url: The URL to start crawling with
# # prefix: Only crawl URLs matching the specified prefix
# # depth: The maximum depth for BFS traversal
# # You can add more websites by adding more entries (don't forget the - prefix from YAML)
# - base_url: https://www.llamaindex.ai
# prefix: https://www.llamaindex.ai
# depth: 1
+3 -2
View File
@@ -1,4 +1,5 @@
local: local:
weather: {} #weather: {}
interpreter: {} #interpreter: {}
#duckduckgo: {}
llamahub: {} llamahub: {}
Binary file not shown.
@@ -0,0 +1,61 @@
{
"Table": [
{
"name": "FeeCollectionTable",
"alias": "",
"comment": "取费表是取费设置中各取费表明细。查询示例: SELECT Rate FROM FeeCollectionTable WHERE Name = 'findname'。",
"fileds": [
{
"name": "FeeCollectionTableName",
"alias": "取费名,取费名称,取费表名称",
"comment": "取费表名称"
},
{
"name": "Name",
"alias": "费用名,名称,项目名",
"comment": "费用名称,项目名称"
},
{
"name": "SerialNumber",
"alias": "费用序号,序号,序列号",
"comment": "费用表序号"
},
{
"name": "Code",
"alias": "代码,代号,编号",
"comment": "费用代码"
},
{
"name": "CalculationFormula",
"alias": "表达式,公式,计算式",
"comment": "取费基数"
},
{
"name": "Rate",
"alias": "费用利率,费率",
"comment": "取费费率"
},
{
"name": "Remarks",
"alias": "备注,说明",
"comment": "费用项备注说明"
},
{
"name": "Major",
"alias": "专业",
"comment": "取费表专业"
},
{
"name": "Type",
"alias": "类型,取费类型",
"comment": "取费表类型"
},
{
"name": "Path",
"alias": "费用全路径,路径",
"comment": "费用项层级全路径"
}
]
}
]
}
@@ -0,0 +1,76 @@
{
"Table": [
{
"name": "OtherFee",
"alias": "",
"comment": "其他费用表被称为“工程费用中其他费用明细”。其他费用是指为完成工程项目建设所必需的,但不属于建筑工程费、安装工程费、设备购置费、基本预备费的其他相关费用。包括建设场地征用及清理费、项目建设管理费、项目建设技术服务费、生产准备费、大件运输措施费、专业爆破服务费等。查询示例: SELECT Rate FROM OtherFee WHERE Name = 'findname'。",
"fileds": [
{
"name": "Id",
"alias": "id,项目id,费用id",
"comment": "费用项目id"
},
{
"name": "ParentId",
"alias": "父id,父级id",
"comment": "费用项目父级id"
},
{
"name": "Level",
"alias": "层级,层编号,层号",
"comment": "层级编号,从1开始"
},
{
"name": "Name",
"alias": "费用名,名称,项目名",
"comment": "费用名称,项目名称"
},
{
"name": "SerialNumber",
"alias": "序号,序列号",
"comment": "费用表序号"
},
{
"name": "Code",
"alias": "代码,代号,编号",
"comment": "费用代码"
},
{
"name": "CalculationFormula",
"alias": "表达式,公式,计算式",
"comment": "取费基数"
},
{
"name": "Rate",
"alias": "费用利率,费率",
"comment": "取费费率"
},
{
"name": "Amount",
"alias": "金额,价格",
"comment": "金额、合计、费用,\n单位为元"
},
{
"name": "Remarks",
"alias": "备注,说明",
"comment": "费用项备注说明"
},
{
"name": "Compilation_Basis",
"alias": "编制依据,编制来源",
"comment": "费用项编制依据"
},
{
"name": "WBS_Code",
"alias": "WBS编号,WBS编码",
"comment": "费用项WBS编码"
},
{
"name": "Path",
"alias": "费用全路径,路径",
"comment": "费用项层级全路径"
}
]
}
]
}
@@ -0,0 +1,126 @@
{
"Table": [
{
"name": "ProjectDivision",
"alias": "",
"comment": "项目划分表是用于存储工程项目划分树状数据。内部包含安装工程项目划分,建筑工程项目划分,线路项目划分,工程分部分项。查询示例: SELECT Sum_Price FROM ProjectDivision WHERE Name = 'findname'。",
"fileds": [
{
"name": "Id",
"alias": "id,项目id,费用id",
"comment": "项目划分id"
},
{
"name": "ParentId",
"alias": "父id,父级id",
"comment": "项目划分父级id"
},
{
"name": "Level",
"alias": "层级,层编号,层号",
"comment": "层级编号,从1开始"
},
{
"name": "Quantity",
"alias": "个数,数量,数目",
"comment": "项目划分数量"
},
{
"name": "SerialNumber",
"alias": "项目序号,序号,序列号",
"comment": "项目划分序号"
},
{
"name": "Name",
"alias": "项目名,名称",
"comment": "项目名称"
},
{
"name": "Encoding",
"alias": "译码,编码",
"comment": "项目划分编码"
},
{
"name": "Sum_Price",
"alias": "合计,合价",
"comment": "项目划分合价,分部分项费用"
},
{
"name": "FeeCollectionTableName",
"alias": "取费表",
"comment": "项目划分的取费表,此项目划分选用的取费表"
},
{
"name": "Remarks",
"alias": "备注,说明",
"comment": "备注"
},
{
"name": "WBS_Code",
"alias": "WBS编号,WBS编码",
"comment": "WBS编码"
},
{
"name": "Manual_Adjustment_Coefficient",
"alias": "人工调差系数",
"comment": "此项目划分下人工调差系数"
},
{
"name": "Material_Adjustment_Coefficient",
"alias": "材料调差系数",
"comment": "此项目划分下材料调差系数"
},
{
"name": "Mechanical_Adjustment_Coefficient",
"alias": "机械调差系数",
"comment": "此项目划分下机械调差系数"
},
{
"name": "Demolition_Manual_Adjustment_Coefficient",
"alias": "拆除人工调差系数",
"comment": "此项目划分下拆除人工调差系数"
},
{
"name": "Demolition_Material_Adjustment_Coefficient",
"alias": "拆除材料调差系数",
"comment": "此项目划分下拆除材料调差系数"
},
{
"name": "Demolition_Mechanical_Adjustment_Coefficient",
"alias": "拆除机械调差系数",
"comment": "此项目划分下拆除机械调差系数"
},
{
"name": "ProfessionalType",
"alias": "专业类型",
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。"
},
{
"name": "Unit",
"alias": "单位",
"comment": "项目划分单位"
},
{
"name": "CalculationFormula",
"alias": "表达式,公式,计算式",
"comment": "项目划分计算式"
},
{
"name": "Rate",
"alias": "费用利率,费率",
"comment": "项目划分费率"
},
{
"name": "Code",
"alias": "代码,代号,编号",
"comment": "项目划分代码"
},
{
"name": "Path",
"alias": "路径,项目全路径",
"comment": "项目划分层级全路径"
}
]
}
]
}
@@ -0,0 +1,201 @@
{
"Table": [
{
"name": "ProjectDivisions_CostPreview",
"alias": "",
"comment": "项目划分_费用预览表也被称为“项目划分费用预览”、“项目划分取费费用”。其中包含项目划分合价、直接费、间接费、利润、税金、主材费等。查询示例: SELECT Total FROM ProjectDivisions_CostPreview WHERE Id = '15'。",
"fileds": [
{
"name": "Id",
"alias": "id,项目id",
"comment": "项目划分id"
},
{
"name": "ParentId",
"alias": "父id,父级id",
"comment": "项目划分父级id"
},
{
"name": "Level",
"alias": "层级,层编号,层号",
"comment": "层级编号,从1开始"
},
{
"name": "ProfessionalType",
"alias": "专业类型",
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。"
},
{
"name": "FeeCollectionTableName",
"alias": "取费表",
"comment": "项目划分的取费表,此项目划分选用的取费表"
},
{
"name": "Direct_Cost",
"alias": "直接费",
"comment": "直接费是指施工过程中直接耗用于建筑、安装工程产品的各项费用的总和。包括直接工程费和措施费。"
},
{
"name": "Direct_Project_Cost",
"alias": "直接工程费",
"comment": "直接工程费是指按照正常的施工条件,在施工过程中耗费的构成工程实体的各项费用。包括人工费、材料费和施工机械使用费。"
},
{
"name": "Quota_Direct_Cost",
"alias": "定额直接费",
"comment": "定额直接费,包含人工费、材料费中已进入定额基价的消耗性材料费和施工机械使用费。"
},
{
"name": "Labor_Cost",
"alias": "人工费",
"comment": "人工费是指支付给直接从事建筑安装工程施工作业的生产人员的各项费用。包括基本工资、工资性补贴、辅助工资、职工福利费、生产人员劳动保护费。"
},
{
"name": "Material_Cost",
"alias": "材料费",
"comment": "材料费是指施工过程中一次性消耗材料及摊销材料的费用。指已进入定额基价的消耗性材料费。"
},
{
"name": "Construction_Machinery_Cost",
"alias": "施工机械使用费",
"comment": "施工机械使用费是指施工机械作业所发生的机械使用费以及机械的现场安拆费和场外运费。包括折旧费、检修费、维护费、安装及拆卸费、场外运费、操作人员人工费、燃料动力费、其他费等。"
},
{
"name": "Installation_Material_Cost",
"alias": "装置性材料费",
"comment": "装置性材料费是指建设工程中构成工艺系统实体的工艺性材料,也称主要材料费。装置性材料通常在概算或预算定额中未计价,也称未计价材料,也称主材。"
},
{
"name": "A_Supply_Installation_Material_Cost",
"alias": "甲供装置性材料费",
"comment": "供货方为甲供的装置性材料费。"
},
{
"name": "B_Supply_Installation_Material_Cost",
"alias": "乙供装置性材料费",
"comment": "供货方为乙供的装置性材料费。"
},
{
"name": "Measure_Cost",
"alias": "措施费",
"comment": "措施费是指为完成工程项目施工而进行施工准备、克服自然条件的不利影响和辅助施工所发生的不构成工程实体的各项费用。包括冬雨季施工增加费、夜间施工增加费、施工工具用具使用费、特殊地区施工增加费、临时设施费、施工机构迁移费、安全文明施工费。"
},
{
"name": "WinterRainySeasons_Additional_Construction_Cost",
"alias": "冬雨季施工增加费",
"comment": "冬雨季施工增加费是指按照合理的工期要求,建筑、安装工程必须在冬季、雨季期间连续施工而需要增加的费用。"
},
{
"name": "Night_Additional_Construction_Cost",
"alias": "夜间施工增加费",
"comment": "夜间施工增加费是指按照规程要求,工程必须在夜间连续施工所发生的夜班补助、夜间施工降效、夜间施工照明设备摊销及照明用电等费用。"
},
{
"name": "Construction_Tool_Usage_Cost",
"alias": "施工工具用具使用费",
"comment": "施工工具用具使用费是指施工企业的生产、检验、试验部门使用的不属于固定资产的工具用具和仪器仪表的购置、摊销和维护费用。"
},
{
"name": "Special_Areas_Additional_Construction_Cost",
"alias": "特殊地区施工增加费",
"comment": "特殊地区施工增加费是指在高海拔、酷热、严寒等地区施工:因特殊自然条件影响而需额外增加的施工费用。"
},
{
"name": "Temporary_Facility_Cost",
"alias": "临时设施费",
"comment": "临时设施费是指施工企业为满足现场正常生产、生活需要在现场必须搭设的生产、生活用临时建筑物、构筑物和其他临时设施所发生的费用,以及维修、拆除、折旧及摊销费,或临时设施的租赁费等。"
},
{
"name": "Construction_Organization_Relocation_Cost",
"alias": "施工机构迁移费",
"comment": "施工机构迁移费是指施工企业派遣施工队伍到所承建工程现场所发生的搬迁费用。包括职工调遣差旅费和调遣期间的工资,以及办公设备、工器具、家具、材料用品和施工机械等的搬迁费用。"
},
{
"name": "Safe_Civilized_Construction_Cost",
"alias": "安全文明施工费",
"comment": "安全文明施工费,包括安全生产费、文明施工费、环境保护费。"
},
{
"name": "Indirect_Cost",
"alias": "间接费",
"comment": "间接费是指建筑安装工程的施工过程中,为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。"
},
{
"name": "Regulatory_Cost",
"alias": "规费",
"comment": "规费是指按照国家行政主管部门或省级政府和省级有关权力部门规定必须缴纳并计入建筑安装工程造价的费用。包括社会保险费和住房公积金。"
},
{
"name": "Social_Insurance_Premiums",
"alias": "社会保险费",
"comment": "社会保险费包括养老保险费、失业保险费、医疗保险费、生育保险费和工伤保险费。"
},
{
"name": "Housing_Provident_Fund",
"alias": "住房公积金",
"comment": "住房公积金是指企业按照规定标准为职工缴纳的住房公积金。"
},
{
"name": "Enterprise_Management_Cost",
"alias": "企业管理费",
"comment": "企业管理费是指建筑安装施工企业为组织施工生产和经营管理所发生的费用。"
},
{
"name": "Construction_Enterprise_Cooperation_Debugging_Cost",
"alias": "施工企业配合调试费",
"comment": "施工企业配合调试费是指在工程整套启动试运阶段,施工企业安装专业配合调试所发生的费用。"
},
{
"name": "Profit",
"alias": "利润",
"comment": "利润是指施工企业完成所承包工程获得的盈利。"
},
{
"name": "Taxes",
"alias": "税金",
"comment": "税金是指按照国家税法规定应计入建筑安装工程造价内的销项税额。"
},
{
"name": "Equipment_Cost",
"alias": "设备费",
"comment": "设备购置费是指为项目建设而购置或自制各种设备,并将设备运至施工现场指定位置所支出的费用。包括设备费和设备运杂费。"
},
{
"name": "B_Supply_Equipment_Excluding_Tax_Price",
"alias": "乙供设备不含税价",
"comment": "设备费中,供货方为乙供设备,不含税价"
},
{
"name": "A_Supply_Equipment_Tax_Price",
"alias": "甲供设备含税价",
"comment": "设备费中,供货方为甲供设备,含税价"
},
{
"name": "Installation_Cost",
"alias": "安装费",
"comment": "安装费包含定额直接费、措施费、间接费、利润、税金和一笔性费用。"
},
{
"name": "Main_Material_Cost",
"alias": "主材费",
"comment": "主材费指装置性材料费"
},
{
"name": "Total",
"alias": "总体费用,总计,总价,总的费用",
"comment": "总计包含安装费、主材费、设备费。"
},
{
"name": "Sum",
"alias": "合计,合价",
"comment": "项目划分合价,分部分项费用,项目划分费用。合计包含安装费和主材费。"
},
{
"name": "Path",
"alias": "路径,项目划分全路径",
"comment": "项目划分层级全路径"
}
]
}
]
}
@@ -0,0 +1,31 @@
{
"Table": [
{
"name": "ProjectProperties",
"alias": "",
"comment": "工程属性表是用于存储整个工程的重要属性,访问该表都是为了通过属性名查找属性值。通常属性值有工程信息、工程属性、技经参数,表中包含工程总投资、工程总费用,工程主要费用,工程技经参数等。查询示例: SELECT Value FROM ProjectProperties WHERE Name = 'findname'。",
"fileds": [
{
"name": "Name\n",
"alias": "属性名,属性名称,属性",
"comment": "属性的唯一标识"
},
{
"name": "Value",
"alias": "属性值",
"comment": "属性对应的实际值"
},
{
"name": "Type",
"alias": "类型,属性类型",
"comment": "属性变量的类型"
},
{
"name": "Unit",
"alias": "单位",
"comment": "单位"
}
]
}
]
}
@@ -0,0 +1,281 @@
{
"Table": [
{
"name": "ProjectQuantities",
"alias": "",
"comment": "工程量表是项目划分下工程量,包含定额、主材、设备、一笔性费用。查询示例: SELECT BudgetPrice FROM ProjectQuantities WHERE Name = 'findname'。",
"fileds": [
{
"name": "Id",
"alias": "id",
"comment": "消耗量id,工程量id"
},
{
"name": "ParentId",
"alias": "父id,父级id",
"comment": "父级id"
},
{
"name": "ProjectDivisionId",
"alias": "项目划分id,项目id",
"comment": "父级项目划分id"
},
{
"name": "Quantity",
"alias": "个数,数量,数目",
"comment": "数量,消耗量数量,工程量数量,主材数量,定额数量,设备数量,项目划分单位"
},
{
"name": "FeatureSegment",
"alias": "特征段",
"comment": "线路特征段"
},
{
"name": "ParentQuantity",
"alias": "父级个数,父级数量",
"comment": "父级id的数量"
},
{
"name": "Name",
"alias": "名称",
"comment": "项目名称,工程量名称,消耗量名称,主材名称,定额名称,设备名称,材料名称"
},
{
"name": "Encoding",
"alias": "译码,编码",
"comment": "编码,定额编码,主材编码,设备编码"
},
{
"name": "SpecificationModel",
"alias": "规格型号",
"comment": "规格型号,主材规格型号,设备规格型号"
},
{
"name": "Unit",
"alias": "单位",
"comment": "单位,主材单位,定额单位,设备单位,项目划分单位"
},
{
"name": "BasePrice",
"alias": "基价",
"comment": "定额基价"
},
{
"name": "LaborCost",
"alias": "人工费",
"comment": "定额人工费"
},
{
"name": "MaterialCost",
"alias": "材料费",
"comment": "定额材料费"
},
{
"name": "MachineryCost",
"alias": "机械费",
"comment": "定额机械费"
},
{
"name": "QuotaCoefficient",
"alias": "定额系数",
"comment": "定额系数"
},
{
"name": "LaborCoefficient",
"alias": "人工系数",
"comment": "定额人工系数"
},
{
"name": "MaterialCoefficient",
"alias": "材料系数",
"comment": "定额材料系数"
},
{
"name": "MechanicalCoefficient",
"alias": "机械系数",
"comment": "定额机械系数"
},
{
"name": "ExpenseType",
"alias": "费用类型",
"comment": "费用类型,取值为取费、不取费"
},
{
"name": "BudgetPrice",
"alias": "预算价",
"comment": "预算价"
},
{
"name": "MarketPrice",
"alias": "市场价",
"comment": "间接费是指建筑安装工程的施工过程中,为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。"
},
{
"name": "Supplier",
"alias": "供货方",
"comment": "供货方,设备供货方,主材供货方,取值为甲供、乙供"
},
{
"name": "Type",
"alias": "类型",
"comment": "工程量类型,取值定额、主材、设备、一笔性费用"
},
{
"name": "QuotaRange",
"alias": "定额范围",
"comment": "定额范围,取值概算、预算"
},
{
"name": "A_Supply_Material_Cost_Excluding_Tax",
"alias": "甲供材料费不含税",
"comment": "甲供材料费不含税"
},
{
"name": "A_Supply_Material_Cost_Including_Tax",
"alias": "甲供材料费含税",
"comment": "甲供材料费含税"
},
{
"name": "B_Supply_Material_Cost_Excluding_Tax",
"alias": "乙供材料费不含税",
"comment": "乙供材料费不含税"
},
{
"name": "B_Supply_Material_Cost_Including_Tax",
"alias": "乙供材料费含税",
"comment": "乙供材料费含税"
},
{
"name": "ScaffoldCalculation",
"alias": "脚手架计取",
"comment": "脚手架计取,取值计取、不计取"
},
{
"name": "Remarks",
"alias": "备注,说明",
"comment": "备注,说明"
},
{
"name": "FeeCollectionTableName",
"alias": "取费表",
"comment": "项目划分的取费表,工程量的取费表"
},
{
"name": "Quota_Section_Name",
"alias": "定额章节名称",
"comment": "定额章节名称"
},
{
"name": "ProfessionalType",
"alias": "专业类型",
"comment": "专业类型,字段值有变电安装、变电建筑、线路等。变电安装等于安装工程,变电建筑等于建筑工程,线路等于安装工程。"
},
{
"name": "split",
"alias": "拆分",
"comment": "是否为拆分材料,取值1为拆分,取值0为不拆分"
},
{
"name": "Loss",
"alias": "损耗",
"comment": "损耗率,主材损耗率"
},
{
"name": "SingleWeight",
"alias": "单重",
"comment": "单重,主材单重"
},
{
"name": "LineWeight",
"alias": "线重",
"comment": "线重,主材线重"
},
{
"name": "SupervisedMaterials",
"alias": "监造物料",
"comment": "监造物料,取值1为监造物料,取值0为非监造物料"
},
{
"name": "EquipmentMaterials",
"alias": "设备性材料",
"comment": "设备性材料,取值1为设备性材料,取值0为主材"
},
{
"name": "GrossWeight",
"alias": "毛重",
"comment": "毛重,主材毛重"
},
{
"name": "TransportationType",
"alias": "运输类型",
"comment": "运输类型,主材运输类型"
},
{
"name": "TransportationMiscellaneous",
"alias": "运杂费率",
"comment": "运杂费率,设备运杂费率"
},
{
"name": "EquipmentType",
"alias": "设备类型",
"comment": "设备类型,取值为主要设备、普通设备"
},
{
"name": "UnitPrice",
"alias": "单价",
"comment": "单价"
},
{
"name": "Market_Price_Excluding_Tax",
"alias": "市场价不含税",
"comment": "市场价不含税"
},
{
"name": "Market_Price_Including_Tax",
"alias": "市场价含税",
"comment": "市场价含税,设备含税价"
},
{
"name": "Budget_Price_Excluding_Tax",
"alias": "预算价不含税",
"comment": "预算价不含税"
},
{
"name": "Budget_Price_Including_Tax",
"alias": "预算价含税",
"comment": "预算价含税"
},
{
"name": "Unit_Price_Excluding_Tax",
"alias": "单价不含税",
"comment": "单价不含税,设备不含税价"
},
{
"name": "GroupPrice",
"alias": "分组合价",
"comment": "分组合价"
},
{
"name": "Pump_Truck_Pouring",
"alias": "泵车浇制",
"comment": "泵车浇制,取值1为泵车浇制,取值0为非泵车浇制"
},
{
"name": "On_Site_Preparation",
"alias": "现场制备",
"comment": "现场制备,取值1为现场制备,取值0为非现场制备"
},
{
"name": "Clear_Water_Concrete",
"alias": "清水混凝土",
"comment": "清水混凝土,取值1为清水混凝土,取值0为非清水混凝土"
},
{
"name": "Debugging_Fee_Calculation",
"alias": "调试费计取",
"comment": "调试费计取,取值计取、不计取"
}
]
}
]
}
@@ -0,0 +1,86 @@
{
"Table": [
{
"name": "TotalCalculateTable",
"alias": "总算表",
"comment": "总算表也被称为“工程总费用”、“工程费用”。其中包含本地工程、辅助设施工程、编制基准期价差、设备购置费、其他费用、基本预备费、特殊费用、工程静态投资、动态费用、价差预备费、建设期贷款利息、工程动态投资、可抵扣增值税额。查询示例: SELECT Amount FROM TotalCalculateTable WHERE Name = 'findname'。",
"fileds": [
{
"name": "Id",
"alias": "id,项目id,费用id",
"comment": "费用项目id"
},
{
"name": "ParentId",
"alias": "父id,父级id",
"comment": "费用项目父级id"
},
{
"name": "Level",
"alias": "层级,层编号,层号",
"comment": "层级编号,从1开始"
},
{
"name": "Name",
"alias": "费用名,名称,项目名",
"comment": "费用名称,项目名称"
},
{
"name": "SerialNumber",
"alias": "序号",
"comment": "工程费用序号"
},
{
"name": "Code",
"alias": "代码,代号,编号",
"comment": "费用代码"
},
{
"name": "Rate",
"alias": "费用利率,费率",
"comment": "费率"
},
{
"name": "Amount",
"alias": "金额,价格",
"comment": "合计费"
},
{
"name": "WBS_Code",
"alias": "WBS编号,WBS编码",
"comment": "费用编码"
},
{
"name": "Path",
"alias": "费用全路径,路径",
"comment": "费用名称全路径"
},
{
"name": "Amount_InstallationCost",
"alias": "安装价格,安装金额,金额_安装费",
"comment": "安装费金额"
},
{
"name": "Amount_EquipmentCost",
"alias": "设备价格,金额_设备费,设备金额",
"comment": "设备费金额"
},
{
"name": "Amount_OtherCost",
"alias": "其他费用价格,其他费用金额,金额_其他费",
"comment": "其他费金额"
},
{
"name": "Amount_Total",
"alias": "总的金额,金额_占总计,总体金额",
"comment": "合计费占总计"
},
{
"name": "Amount_UnitInvestment",
"alias": "合计投资金额,金额_单位投资",
"comment": "合计费单位投资"
}
]
}
]
}
Binary file not shown.
Binary file not shown.
+40 -37
View File
@@ -15,50 +15,53 @@ from app.observability import init_observability
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
app = FastAPI()
init_settings()
init_observability()
environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set
logger = logging.getLogger("uvicorn") logger = logging.getLogger("uvicorn")
app = None
if environment == "dev": def init_webserver():
logger.warning("Running in development mode - allowing CORS for all origins") global app
app.add_middleware( app = FastAPI()
CORSMiddleware, environment = os.getenv("ENVIRONMENT", "dev") # Default to 'development' if not set
allow_origins=["*"], if environment == "dev":
allow_credentials=True, logger.warning("Running in development mode - allowing CORS for all origins")
allow_methods=["*"], app.add_middleware(
allow_headers=["*"], CORSMiddleware,
) allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
def mount_static_files(directory, path):
if os.path.exists(directory):
for dir, _, _ in os.walk(directory):
relative_path = os.path.relpath(dir, directory)
mount_path = path if relative_path == "." else f"{path}/{relative_path}"
logger.info(f"Mounting static files '{dir}' at {mount_path}")
app.mount(mount_path, StaticFiles(directory=dir), name=f"{dir}-static")
# Mount the data files to serve the file viewer
mount_static_files("data", "/api/files/data")
# Mount the output files from tools
mount_static_files("data_output", "/api/files/output")
app.include_router(chat_router, prefix="/api/chat")
app.include_router(file_upload_router, prefix="/api/chat/upload")
# Redirect to documentation page when accessing base URL # Redirect to documentation page when accessing base URL
@app.get("/") @app.get("/")
async def redirect_to_docs(): async def redirect_to_docs():
return RedirectResponse(url="/docs") return RedirectResponse(url="/docs")
def mount_static_files(directory, path):
if os.path.exists(directory):
for dir, _, _ in os.walk(directory):
relative_path = os.path.relpath(dir, directory)
mount_path = path if relative_path == "." else f"{path}/{relative_path}"
logger.info(f"Mounting static files '{dir}' at {mount_path}")
app.mount(mount_path, StaticFiles(directory=dir), name=f"{dir}-static")
# Mount the data files to serve the file viewer
mount_static_files("data", "/api/files/data")
# Mount the output files from tools
mount_static_files("output", "/api/files/output")
app.include_router(chat_router, prefix="/api/chat")
app.include_router(file_upload_router, prefix="/api/chat/upload")
if __name__ == "__main__": if __name__ == "__main__":
app_host = os.getenv("APP_HOST", "0.0.0.0") from phoenix.trace import using_project
app_port = int(os.getenv("APP_PORT", "8000")) with using_project(os.getenv("PHOENIX_PROJECT_NAME")) as obj:
reload = True if environment == "dev" else False
uvicorn.run(app="main:app", host=app_host, port=app_port, reload=reload) init_settings()
init_observability()
init_webserver()
app_host = os.getenv("APP_HOST", "0.0.0.0")
app_port = int(os.getenv("APP_PORT", "8000"))
#reload = True if environment == "dev" else False
reload = False
uvicorn.run(app=app, host=app_host, port=app_port, reload=reload)
+21 -9
View File
@@ -11,11 +11,25 @@ generate = "app.engine.generate:generate_datasource"
[tool.poetry.dependencies] [tool.poetry.dependencies]
python = "^3.11,<3.12" python = "^3.11,<3.12"
fastapi = "^0.109.1" fastapi = "^0.112.0"
python-dotenv = "^1.0.0" python-dotenv = "^1.0.0"
aiostream = "^0.5.2" aiostream = "^0.6.2"
llama-index = "0.10.58" llama-index = "0.10.63"
cachetools = "^5.3.3" cachetools = "^5.3.3"
protobuf = "4.25.4"
#arize-phoenix = "^4.12.0"
openinference-instrumentation-llama-index="2.2.3"
llama-index-callbacks-arize-phoenix = "^0.1.4"
llama-index-llms-dashscope = "^0.1.2"
llama-index-embeddings-dashscope = "^0.1.4"
llama-index-postprocessor-dashscope-rerank-custom = "0.1.0"
qdrant-client="^1.10.1"
llama-index-vector-stores-qdrant = "^0.2.14"
chroma="^0.5.5"
llama-index-vector-stores-chroma = "^0.1.10"
llama-index-readers-json = "^0.1.5"
duckduckgo_search = "^6.2.6"
[tool.poetry.dependencies.uvicorn] [tool.poetry.dependencies.uvicorn]
extras = [ "standard" ] extras = [ "standard" ]
@@ -28,11 +42,11 @@ version = "^0.1.3"
version = "^1.1.0" version = "^1.1.0"
extras = [ "rsa" ] extras = [ "rsa" ]
[tool.poetry.dependencies.psycopg2] #[tool.poetry.dependencies.psycopg2]
version = "^2.9.9" #version = "^2.9.9"
[tool.poetry.dependencies.llama-index-indices-managed-llama-cloud] #[tool.poetry.dependencies.llama-index-indices-managed-llama-cloud]
version = "^0.2.7" #version = "^0.2.7"
[tool.poetry.dependencies.docx2txt] [tool.poetry.dependencies.docx2txt]
version = "^0.8" version = "^0.8"
@@ -40,8 +54,6 @@ version = "^0.8"
[tool.poetry.dependencies.e2b_code_interpreter] [tool.poetry.dependencies.e2b_code_interpreter]
version = "0.0.7" version = "0.0.7"
[tool.poetry.dependencies.llama-index-agent-openai]
version = "0.2.6"
[build-system] [build-system]
requires = [ "poetry-core" ] requires = [ "poetry-core" ]
+4
View File
@@ -0,0 +1,4 @@
rmdir /S /Q storage_vector
rmdir /S /Q storage
C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python app/engine/generate.py
+4
View File
@@ -0,0 +1,4 @@
rmdir /S /Q storage_vector
rmdir /S /Q storage
C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python tests/query.py
+1
View File
@@ -0,0 +1 @@
C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python main.py
+67
View File
@@ -0,0 +1,67 @@
import os
from ctypes import cast
from llama_index.core import VectorStoreIndex, SQLDatabase
from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
from llama_index.readers.database import DatabaseReader
from sqlalchemy import create_engine
from app.api.routers.chat import generate_filters
from app.engine import get_index, makeDescriptionByEngine
from app.engine.loaders.db import CustomDatabaseReader
from app.engine.vectordb import get_vector_store
from app.observability import init_observability
from app.settings import init_settings
def main():
init_settings()
init_observability()
index = get_index()
top_k = 5
filters = generate_filters([])
#question = "从工程属性表中查找工程名称"
question = "总算表中名称等于架空输电线路本体工程的金额?"
# 创建向量检索查询工具
query_engine = index.as_query_engine(
similarity_top_k=top_k, filters=filters
)
query_result = query_engine.query(question)
print(query_result)
engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
sql_database = SQLDatabase(engine)
loader = CustomDatabaseReader(sql_database)
documents = loader.load_data(query="select * from ProjectProperties")
table_schema_objs = makeDescriptionByEngine(sql_database)
table_node_mapping = SQLTableNodeMapping(sql_database)
vectorIndex = VectorStoreIndex()
# 创建SQL查询工具
sql_obj_index = ObjectIndex.from_objects(
table_schema_objs,
table_node_mapping,
index_cls=VectorStoreIndex,
)
query_result =vectorIndex.as_query_engine(
similarity_top_k=top_k, filters=filters
).query(question)
print(query_result)
sql_query_engine = SQLTableRetrieverQueryEngine(sql_database,
sql_obj_index.as_retriever(similarity_top_k=1))
sql_query_result = sql_query_engine.query(question)
print(sql_query_result)
if __name__ == "__main__":
from phoenix.trace import using_project
with using_project("ly_zjapp_test") as obj:
main()
+4 -2
View File
@@ -1,6 +1,8 @@
# The backend API for chat endpoint. # The backend API for chat endpoint.
NEXT_PUBLIC_CHAT_API=http://localhost:8000/api/chat NEXT_PUBLIC_CHAT_API=http://localhost:8000/api/chat
# Let's the user change indexes in LlamaCloud projects PHOENIX_SERVER_URL=http://localhost:6006/
NEXT_PUBLIC_USE_LLAMACLOUD=true
# Let's the user change indexes in LlamaCloud projects
NEXT_PUBLIC_USE_LLAMACLOUD=false
+6 -4
View File
@@ -1,18 +1,20 @@
import Image from "next/image"; import Image from "next/image";
const phoenixUrl = process.env.PHOENIX_SERVER_URL;
export default function Header() { export default function Header() {
return ( return (
<div className="z-10 max-w-5xl w-full items-center justify-between font-mono text-sm lg:flex"> <div className="z-10 max-w-5xl w-full items-center justify-between font-mono text-sm lg:flex">
<p className="fixed left-0 top-0 flex w-full justify-center border-b border-gray-300 bg-gradient-to-b from-zinc-200 pb-6 pt-8 backdrop-blur-2xl dark:border-neutral-800 dark:bg-zinc-800/30 dark:from-inherit lg:static lg:w-auto lg:rounded-xl lg:border lg:bg-gray-200 lg:p-4 lg:dark:bg-zinc-800/30"> <p className="fixed left-0 top-0 flex w-full justify-center border-b border-gray-300 bg-gradient-to-b from-zinc-200 pb-6 pt-8 backdrop-blur-2xl dark:border-neutral-800 dark:bg-zinc-800/30 dark:from-inherit lg:static lg:w-auto lg:rounded-xl lg:border lg:bg-gray-200 lg:p-4 lg:dark:bg-zinc-800/30">
Get started by editing&nbsp; <code className="font-mono font-bold"><a href="javascript:location.reload();"></a></code>
<code className="font-mono font-bold">app/page.tsx</code>
</p> </p>
<div className="fixed bottom-0 left-0 mb-4 flex h-auto w-full items-end justify-center bg-gradient-to-t from-white via-white dark:from-black dark:via-black lg:static lg:w-auto lg:bg-none lg:mb-0"> <div className="fixed bottom-0 left-0 mb-4 flex h-auto w-full items-end justify-center bg-gradient-to-t from-white via-white dark:from-black dark:via-black lg:static lg:w-auto lg:bg-none lg:mb-0">
<a <a
href="https://www.llamaindex.ai/" href={phoenixUrl}
target="_blank"
className="flex items-center justify-center font-nunito text-lg font-bold gap-2" className="flex items-center justify-center font-nunito text-lg font-bold gap-2"
> >
<span>Built by LlamaIndex</span> <code className="font-mono font-bold"></code>
<Image <Image
className="rounded-xl" className="rounded-xl"
src="/llama.png" src="/llama.png"
@@ -14,13 +14,13 @@ export default function ChatActions(
{props.showStop && ( {props.showStop && (
<Button variant="outline" size="sm" onClick={props.stop}> <Button variant="outline" size="sm" onClick={props.stop}>
<PauseCircle className="mr-2 h-4 w-4" /> <PauseCircle className="mr-2 h-4 w-4" />
Stop generating
</Button> </Button>
)} )}
{props.showReload && ( {props.showReload && (
<Button variant="outline" size="sm" onClick={props.reload}> <Button variant="outline" size="sm" onClick={props.reload}>
<RefreshCw className="mr-2 h-4 w-4" /> <RefreshCw className="mr-2 h-4 w-4" />
Regenerate
</Button> </Button>
)} )}
</div> </div>
@@ -68,7 +68,7 @@ export default function ChatInput(
const handleUploadFile = async (file: File) => { const handleUploadFile = async (file: File) => {
if (imageUrl || files.length > 0) { if (imageUrl || files.length > 0) {
alert("You can only upload one file at a time."); alert("同一时刻只能上传一个文件。");
return; return;
} }
try { try {
@@ -99,10 +99,11 @@ export default function ChatInput(
</div> </div>
)} )}
<div className="flex w-full items-start justify-between gap-4 "> <div className="flex w-full items-start justify-between gap-4 ">
<Input <textarea
autoFocus autoFocus
rows={2}
name="message" name="message"
placeholder="Type a message" placeholder="请输入消息"
className="flex-1" className="flex-1"
value={props.input} value={props.input}
onChange={props.handleInputChange} onChange={props.handleInputChange}
@@ -119,7 +120,7 @@ export default function ChatInput(
<LlamaCloudSelector setRequestData={setRequestData} /> <LlamaCloudSelector setRequestData={setRequestData} />
)} )}
<Button type="submit" disabled={props.isLoading || !props.input.trim()}> <Button type="submit" disabled={props.isLoading || !props.input.trim()}>
Send message
</Button> </Button>
</div> </div>
</form> </form>
@@ -17,7 +17,7 @@ export function ChatEvents({
}) { }) {
const [isOpen, setIsOpen] = useState(false); const [isOpen, setIsOpen] = useState(false);
const buttonLabel = isOpen ? "Hide events" : "Show events"; const buttonLabel = isOpen ? "隐藏" : "详情";
const EventIcon = isOpen ? ( const EventIcon = isOpen ? (
<ChevronDown className="h-4 w-4" /> <ChevronDown className="h-4 w-4" />
@@ -9,23 +9,33 @@ import {
import { useCopyToClipboard } from "../hooks/use-copy-to-clipboard"; import { useCopyToClipboard } from "../hooks/use-copy-to-clipboard";
import { SourceData } from "../index"; import { SourceData } from "../index";
import PdfDialog from "../widgets/PdfDialog"; import PdfDialog from "../widgets/PdfDialog";
import { useClientConfig } from "../hooks/use-config";
const SCORE_THRESHOLD = 0.3; const SCORE_THRESHOLD = 0.3;
function SourceNumberButton({ index }: { index: number }) { function truncateNumber(num: number | undefined, precision: number): number {
if (num == undefined || num == 0) return 0;
const factor = Math.pow(10, precision);
return Math.trunc(num * factor) / factor;
}
function SourceNumberButton({ index, score }: { index: number, score: number | undefined }) {
return ( return (
<div className="text-xs w-5 h-5 rounded-full bg-gray-100 mb-2 flex items-center justify-center hover:text-white hover:bg-primary hover:cursor-pointer"> <div className="text-xs w-45 h-45 rounded-full bg-gray-100 mb-2 flex items-center justify-center hover:text-white hover:bg-primary hover:cursor-pointer">
{index + 1} {truncateNumber(score, 2)}
</div> </div>
); );
} }
type NodeInfo = { type NodeInfo = {
id: string; id: string;
score?: number;
text: string;
url?: string; url?: string;
}; };
export function ChatSources({ data }: { data: SourceData }) { export function ChatSources({ data }: { data: SourceData }) {
const { backend } = useClientConfig();
const sources: NodeInfo[] = useMemo(() => { const sources: NodeInfo[] = useMemo(() => {
// aggregate nodes by url or file_path (get the highest one by score) // aggregate nodes by url or file_path (get the highest one by score)
const nodesByPath: { [path: string]: NodeInfo } = {}; const nodesByPath: { [path: string]: NodeInfo } = {};
@@ -36,6 +46,8 @@ export function ChatSources({ data }: { data: SourceData }) {
.forEach((node) => { .forEach((node) => {
const nodeInfo = { const nodeInfo = {
id: node.id, id: node.id,
score: node.score,
text: node.text,
url: node.url, url: node.url,
}; };
const key = nodeInfo.url ?? nodeInfo.id; // use id as key for UNKNOWN type const key = nodeInfo.url ?? nodeInfo.id; // use id as key for UNKNOWN type
@@ -51,7 +63,7 @@ export function ChatSources({ data }: { data: SourceData }) {
return ( return (
<div className="space-x-2 text-sm"> <div className="space-x-2 text-sm">
<span className="font-semibold">Sources:</span> <span className="font-semibold">:</span>
<div className="inline-flex gap-1 items-center"> <div className="inline-flex gap-1 items-center">
{sources.map((nodeInfo: NodeInfo, index: number) => { {sources.map((nodeInfo: NodeInfo, index: number) => {
if (nodeInfo.url?.endsWith(".pdf")) { if (nodeInfo.url?.endsWith(".pdf")) {
@@ -59,8 +71,8 @@ export function ChatSources({ data }: { data: SourceData }) {
<PdfDialog <PdfDialog
key={nodeInfo.id} key={nodeInfo.id}
documentId={nodeInfo.id} documentId={nodeInfo.id}
url={nodeInfo.url!} url={backend+nodeInfo.url}
trigger={<SourceNumberButton index={index} />} trigger={<SourceNumberButton index={index} score={nodeInfo.score} />}
/> />
); );
} }
@@ -68,9 +80,9 @@ export function ChatSources({ data }: { data: SourceData }) {
<div key={nodeInfo.id}> <div key={nodeInfo.id}>
<HoverCard> <HoverCard>
<HoverCardTrigger> <HoverCardTrigger>
<SourceNumberButton index={index} /> <SourceNumberButton index={index} score={nodeInfo.score}/>
</HoverCardTrigger> </HoverCardTrigger>
<HoverCardContent className="w-[320px]"> <HoverCardContent className="w-[450px]">
<NodeInfo nodeInfo={nodeInfo} /> <NodeInfo nodeInfo={nodeInfo} />
</HoverCardContent> </HoverCardContent>
</HoverCard> </HoverCard>
@@ -83,6 +95,7 @@ export function ChatSources({ data }: { data: SourceData }) {
} }
function NodeInfo({ nodeInfo }: { nodeInfo: NodeInfo }) { function NodeInfo({ nodeInfo }: { nodeInfo: NodeInfo }) {
const { backend } = useClientConfig();
const { isCopied, copyToClipboard } = useCopyToClipboard({ timeout: 1000 }); const { isCopied, copyToClipboard } = useCopyToClipboard({ timeout: 1000 });
if (nodeInfo.url) { if (nodeInfo.url) {
@@ -92,10 +105,10 @@ function NodeInfo({ nodeInfo }: { nodeInfo: NodeInfo }) {
<div className="flex items-center my-2"> <div className="flex items-center my-2">
<a <a
className="hover:text-blue-900 truncate" className="hover:text-blue-900 truncate"
href={nodeInfo.url} href={backend+nodeInfo.url}
target="_blank" target="_blank"
> >
<span>{nodeInfo.url}</span> <span>{nodeInfo.text}</span>
</a> </a>
<Button <Button
onClick={() => copyToClipboard(nodeInfo.url!)} onClick={() => copyToClipboard(nodeInfo.url!)}
@@ -116,8 +129,7 @@ function NodeInfo({ nodeInfo }: { nodeInfo: NodeInfo }) {
// node generated by unknown loader, implement renderer by analyzing logged out metadata // node generated by unknown loader, implement renderer by analyzing logged out metadata
return ( return (
<p> <p>
Sorry, unknown node type. Please add a new renderer in the NodeInfo , .
component.
</p> </p>
); );
} }
@@ -9,7 +9,7 @@ export default function ChatTools({ data }: { data: ToolData }) {
if (toolOutput.isError) { if (toolOutput.isError) {
return ( return (
<div className="border-l-2 border-red-400 pl-2"> <div className="border-l-2 border-red-400 pl-2">
There was an error when calling the tool {toolCall.name} with input:{" "} {toolCall.name} :{" "}
<br /> <br />
{JSON.stringify(toolCall.input)} {JSON.stringify(toolCall.input)}
</div> </div>
@@ -68,7 +68,7 @@ const CodeBlock: FC<Props> = memo(({ language, value }) => {
3, 3,
true, true,
)}${fileExtension}`; )}${fileExtension}`;
const fileName = window.prompt("Enter file name" || "", suggestedFileName); const fileName = window.prompt("请输入文件名称" || "", suggestedFileName);
if (!fileName) { if (!fileName) {
// User pressed cancel on prompt. // User pressed cancel on prompt.
@@ -99,7 +99,7 @@ const CodeBlock: FC<Props> = memo(({ language, value }) => {
<div className="flex items-center space-x-1"> <div className="flex items-center space-x-1">
<Button variant="ghost" onClick={downloadAsFile} size="icon"> <Button variant="ghost" onClick={downloadAsFile} size="icon">
<Download /> <Download />
<span className="sr-only">Download</span> <span className="sr-only"></span>
</Button> </Button>
<Button variant="ghost" size="icon" onClick={onCopy}> <Button variant="ghost" size="icon" onClick={onCopy}>
{isCopied ? ( {isCopied ? (
@@ -107,7 +107,7 @@ const CodeBlock: FC<Props> = memo(({ language, value }) => {
) : ( ) : (
<Copy className="h-4 w-4" /> <Copy className="h-4 w-4" />
)} )}
<span className="sr-only">Copy code</span> <span className="sr-only"></span>
</Button> </Button>
</div> </div>
</div> </div>
@@ -10,7 +10,7 @@ export interface ChatHandler {
data?: any; data?: any;
}, },
) => void; ) => void;
handleInputChange: (e: React.ChangeEvent<HTMLInputElement>) => void; handleInputChange: (e: React.ChangeEvent<HTMLTextAreaElement>) => void;
reload?: () => void; reload?: () => void;
stop?: () => void; stop?: () => void;
onFileUpload?: (file: File) => Promise<void>; onFileUpload?: (file: File) => Promise<void>;
@@ -63,7 +63,7 @@ export function useFile() {
...requestParams, ...requestParams,
}), }),
}); });
if (!response.ok) throw new Error("Failed to upload document."); if (!response.ok) throw new Error("上传文件时发生错误。");
return await response.json(); return await response.json();
}; };
@@ -109,7 +109,7 @@ export function useFile() {
} }
const filetype = docMineTypeMap[file.type]; const filetype = docMineTypeMap[file.type];
if (!filetype) throw new Error("Unsupported document type."); if (!filetype) throw new Error("不支持的文件类型。");
const newDoc: Omit<DocumentFile, "content"> = { const newDoc: Omit<DocumentFile, "content"> = {
id: uuidv4(), id: uuidv4(),
filetype, filetype,
+2 -2
View File
@@ -32,7 +32,7 @@ export default function FileUploader({
const allowedExtensions = config?.allowedExtensions; const allowedExtensions = config?.allowedExtensions;
const defaultCheckExtension = (extension: string) => { const defaultCheckExtension = (extension: string) => {
if (allowedExtensions && !allowedExtensions.includes(extension)) { if (allowedExtensions && !allowedExtensions.includes(extension)) {
return `Invalid file type. Please select a file with one of these formats: ${allowedExtensions!.join( return `无效的文件类型。请选择一个以下格式的文件: ${allowedExtensions!.join(
",", ",",
)}`; )}`;
} }
@@ -69,7 +69,7 @@ export default function FileUploader({
if (isFileSizeExceeded(file)) { if (isFileSizeExceeded(file)) {
return onFileUploadError( return onFileUploadError(
`File size exceeded. Limit is ${fileSizeLimit / 1024 / 1024} MB`, `文件尺寸超标。请选择不大于 ${fileSizeLimit / 1024 / 1024} MB 的文件。`,
); );
} }
+4 -4
View File
@@ -3,11 +3,11 @@ import { Inter } from "next/font/google";
import "./globals.css"; import "./globals.css";
import "./markdown.css"; import "./markdown.css";
const inter = Inter({ subsets: ["latin"] }); const inter = Inter({ subsets: ["latin", "latin-ext"] });
export const metadata: Metadata = { export const metadata: Metadata = {
title: "Create Llama App", title: "博微造价工程文件知识问答",
description: "Generated by create-llama", description: "博微技术中心搭建的造价工程文件知识问答",
}; };
export default function RootLayout({ export default function RootLayout({
@@ -16,7 +16,7 @@ export default function RootLayout({
children: React.ReactNode; children: React.ReactNode;
}) { }) {
return ( return (
<html lang="en"> <html lang="zh-CN">
<body className={inter.className}>{children}</body> <body className={inter.className}>{children}</body>
</html> </html>
); );
+1 -1
View File
@@ -4,7 +4,7 @@ import ChatSection from "./components/chat-section";
export default function Home() { export default function Home() {
return ( return (
<main className="h-screen w-screen flex justify-center items-center background-gradient"> <main className="h-screen w-screen flex justify-center items-center background-gradient">
<div className="space-y-2 lg:space-y-10 w-[90%] lg:w-[60rem]"> <div className="space-y-2 lg:space-y-10 w-[90%] lg:w-[90rem]">
<Header /> <Header />
<div className="h-[65vh] flex"> <div className="h-[65vh] flex">
<ChatSection /> <ChatSection />
+1
View File
@@ -0,0 +1 @@
npm run dev
+3
View File
@@ -0,0 +1,3 @@
ENV_PHOENIX_HOST=0.0.0.0
ENV_PHOENIX_PORT=6006
PHOENIX_HOST_ROOT_PATH=./.phoenix/
+18
View File
@@ -0,0 +1,18 @@
import os
import phoenix as px
os.environ['PHOENIX_HOST'] = "0.0.0.0"
session = px.launch_app(use_temp_dir=False)
import msvcrt
def wait_for_keypress():
print("Press any key to continue...")
msvcrt.getch() # 等待按键
print("\nKey pressed!")
wait_for_keypress()
px.close_app()
+5
View File
@@ -0,0 +1,5 @@
SET ENV_PHOENIX_HOST=0.0.0.0
SET ENV_PHOENIX_PORT=6006
SET PHOENIX_HOST_ROOT_PATH=./.phoenix/
C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-pCyqx0Uo-py3.11\Scripts\python phoenixserver.py