优化了提示词

将项目划分表按照业务拆分
2024-08-23 18:35:19 +08:00 · 2024-08-23 15:07:26 +08:00 · 2024-08-23 15:05:48 +08:00 · 2024-08-23 08:55:54 +08:00 · 2024-08-23 08:53:13 +08:00 · 2024-08-22 21:21:37 +08:00
99 changed files with 27322 additions and 460 deletions
@@ -0,0 +1,81 @@
 # The Llama Cloud API key.
 # LLAMA_CLOUD_API_KEY=
 SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
 #SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
 DASHSCOPE_API_KEY=sk-02c8540e86d84b7ca0e6f4f51bac6e60
 # The provider for the AI models to use.
 MODEL_PROVIDER=dashscope
 # The name of LLM model to use.
 MODEL=qwen-max
 # 是否启用检索重排功能
 ENABLE_RERANK=true
 # Name of the embedding model to use.
 EMBEDDING_MODEL=text-embedding-v2
 # Dimension of the embedding model to use.
 EMBEDDING_DIM=1024
 # The questions to help users get started (multi-line).
 CONVERSATION_STARTERS=本工程指什么？\n总算表有哪些费用？\n项目划分哪些内容构成？\n其他费用表有哪些内容？
 # The OpenAI API key to use.
 # OPENAI_API_KEY=
 # Temperature for sampling from the model.
 # LLM_TEMPERATURE=
 # Maximum number of tokens to generate.
 # LLM_MAX_TOKENS=
 # The number of similar embeddings to return when retrieving documents.
 TOP_K=5
 # The time in milliseconds to wait for the stream to return a response.
 STREAM_TIMEOUT=60000
 # 向量存储数据库类型，目前可选：chroma、qdrant
 VECTOR_STORE_TYPE=chroma
 # The name of the collection in your vector database
 VECTOR_STORE_COLLECTION=default
 # The API endpoint for your vector database
 # VECTOR_STORE_HOST=
 # The port for your vector database
 # VECTOR_STORE_PORT=
 # The local path to the vector database.
 # Specify this if you are using a local vector database.
 # Otherwise, use VECTOR_STORE__HOST and VECTOR_STORE__PORT config above
 VECTOR_STORE_PATH=./storage_vector
 BM_RETRIEVER_PATH =./storage_bm
 PHOENIX_API_KEY=123456
 PHOENIX_URL=http://localhost:6006/v1/traces
 PHOENIX_PROJECT_NAME=ly_zjapp
 #OTEL_SERVICE_NAME=ly_zjapp
 #OTEL_RESOURCE_ATTRIBUTES=openinference.project.name=ly_zjapp
 # The address to start the backend app.
 APP_HOST=0.0.0.0
 # The port to start the backend app.
 APP_PORT=8000
 FILESERVER_URL_PREFIX=/api/files
 # E2B_API_KEY key is required to run code interpreter tool. Get it here: https://e2b.dev/docs/getting-started/api-key
 # E2B_API_KEY=
 # The system prompt for the AI model.
 SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weather forecast for a given location.
 -You are a Python interpreter that can run any python code in a secure environment.
 - The python code runs in a Jupyter notebook. Every time you call the 'interpreter' tool, the python code is executed in a separate cell. 
 - You are given tasks to complete and you run python code to solve them.
 - It's okay to make multiple calls to interpreter tool. If you get an error or the result is not what you expected, you can call the tool again. Don't give up too soon!
 - Plot visualizations using matplotlib or any other visualization library directly in the notebook.
 - You can install any pip package (if it exists) by running a cell with pip install.
 "
@@ -0,0 +1,112 @@
 # The Llama Cloud API key.
 # LLAMA_CLOUD_API_KEY=
 SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
 #SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
 # The number of similar embeddings to return when retrieving documents.
 TOP_K=10
 #--------------------------
 # 是否启用混合检索
 HYBRID_ENABLED = true
 # 混合检索阈值
 HYBRID_ALPHA = 0.6
 #--------------------------
 # 是否启用检索重排功能
 RERANK_ENABLED=true
 # Rerank model
 RERANK_MODEL=bge-reranker-v2-m3
 RERANK_BASE_URL=http://10.1.16.39:9995
 RERANK_TOP_N=5
 RERANK_THRESHOLD=0.3
 #----------   Xinference    ----------------
 # The provider for the AI models to use.
 MODEL_PROVIDER=xinference
 # The OpenAI API key to use.
 OPENAI_API_KEY=xinference
 BASE_URL=http://10.1.0.142:9995
 MODEL=Qwen2-72B-Instruct-GPTQ-Int8
 # Temperature for sampling from the model.
 LLM_TEMPERATURE=0.1
 # Maximum number of tokens to generate.
 #LLM_MAX_TOKENS=
 # Name of the embedding model to use.
 EMBEDDING_MODEL=bge-m3
 EMBEDDING_BASE_URL=http://10.1.16.39:9995
 # Dimension of the embedding model to use.
 EMBEDDING_DIM=1024
 ##----------   OpenAI    ----------------
 ## The provider for the AI models to use.
 #MODEL_PROVIDER=openai
 ## The OpenAI API key to use.
 #OPENAI_API_KEY=xinference
 #BASE_URL=http://10.1.0.142:9995/v1
 #MODEL=Qwen2-72B-Instruct-GPTQ-Int4
 ## Temperature for sampling from the model.
 #LLM_TEMPERATURE=0.1
 ## Maximum number of tokens to generate.
 ##LLM_MAX_TOKENS=
 ## Name of the embedding model to use.
 #EMBEDDING_MODEL=text-embedding-v2
 ## Dimension of the embedding model to use.
 #EMBEDDING_DIM=1024
 #----------   DashScope    ----------------
 #DASHSCOPE_API_KEY=sk-02c8540e86d84b7ca0e6f4f51bac6e60
 ## The provider for the AI models to use.
 #MODEL_PROVIDER=dashscope
 ## The name of LLM model to use.
 #MODEL=qwen-max
 ## Name of the embedding model to use.
 #EMBEDDING_MODEL=text-embedding-v2
 # The questions to help users get started (multi-line).
 CONVERSATION_STARTERS=本工程指什么？\n总算表有哪些费用？\n项目划分哪些内容构成？\n其他费用表有哪些内容？
 # The time in milliseconds to wait for the stream to return a response.
 STREAM_TIMEOUT=60000
 # 向量存储数据库类型，目前可选：chroma、qdrant
 VECTOR_STORE_TYPE=chroma
 # The name of the collection in your vector database
 VECTOR_STORE_COLLECTION=default
 # The API endpoint for your vector database
 # VECTOR_STORE_HOST=
 # The port for your vector database
 # VECTOR_STORE_PORT=
 # The local path to the vector database.
 # Specify this if you are using a local vector database.
 # Otherwise, use VECTOR_STORE__HOST and VECTOR_STORE__PORT config above
 VECTOR_STORE_PATH=./storage_vector
 BM_RETRIEVER_PATH =./storage_bm
 PHOENIX_API_KEY=123456
 PHOENIX_URL=http://localhost:6006/v1/traces
 PHOENIX_PROJECT_NAME=ly_zjapp
 #OTEL_SERVICE_NAME=ly_zjapp
 #OTEL_RESOURCE_ATTRIBUTES=openinference.project.name=ly_zjapp
 # The address to start the backend app.
 APP_HOST=0.0.0.0
 # The port to start the backend app.
 APP_PORT=8000
 FILESERVER_URL_PREFIX=/api/files
 # E2B_API_KEY key is required to run code interpreter tool. Get it here: https://e2b.dev/docs/getting-started/api-key
 # E2B_API_KEY=
 # The system prompt for the AI model.
 SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weather forecast for a given location.
 -You are a Python interpreter that can run any python code in a secure environment.
 - The python code runs in a Jupyter notebook. Every time you call the 'interpreter' tool, the python code is executed in a separate cell. 
 - You are given tasks to complete and you run python code to solve them.
 - It's okay to make multiple calls to interpreter tool. If you get an error or the result is not what you expected, you can call the tool again. Don't give up too soon!
 - Plot visualizations using matplotlib or any other visualization library directly in the notebook.
 - You can install any pip package (if it exists) by running a cell with pip install.
 "
@@ -2,3 +2,6 @@ __pycache__
 storage
 .env
 output
 /storage_vector/
 /.idea/
 /.python-version
@@ -0,0 +1,61 @@
 from llama_index.embeddings.openai import OpenAIEmbedding
 from llama_index.core.settings import Settings
 from typing import Dict
 import os
 DEFAULT_MODEL = "gpt-3.5-turbo"
 DEFAULT_EMBEDDING_MODEL = "text-embedding-3-large"
 class TSIEmbedding(OpenAIEmbedding):
    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._query_engine = self._text_engine = self.model_name
 def llm_config_from_env() -> Dict:
    from llama_index.core.constants import DEFAULT_TEMPERATURE
    model = os.getenv("MODEL", DEFAULT_MODEL)
    temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
    max_tokens = os.getenv("LLM_MAX_TOKENS")
    api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY")
    api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL")
    config = {
        "model": model,
        "api_key": api_key,
        "api_base": api_base,
        "temperature": float(temperature),
        "max_tokens": int(max_tokens) if max_tokens is not None else None,
    }
    return config
 def embedding_config_from_env() -> Dict:
    from llama_index.core.constants import DEFAULT_EMBEDDING_DIM
    model = os.getenv("EMBEDDING_MODEL", DEFAULT_EMBEDDING_MODEL)
    dimension = os.getenv("EMBEDDING_DIM", DEFAULT_EMBEDDING_DIM)
    api_key = os.getenv("T_SYSTEMS_LLMHUB_API_KEY")
    api_base = os.getenv("T_SYSTEMS_LLMHUB_BASE_URL")
    config = {
        "model_name": model,
        "dimension": int(dimension) if dimension is not None else None,
        "api_key": api_key,
        "api_base": api_base,
    }
    return config
 def init_llmhub():
    from llama_index.llms.openai_like import OpenAILike
    llm_configs = llm_config_from_env()
    embedding_configs = embedding_config_from_env()
    Settings.embed_model = TSIEmbedding(**embedding_configs)
    Settings.llm = OpenAILike(
        **llm_configs,
        is_chat_model=True,
        is_function_calling_model=False,
        context_window=4096,
    )
@@ -0,0 +1,20 @@
 import os
 import llama_index.core
 def init_observability():
    PHOENIX_API_KEY = os.getenv("PHOENIX_API_KEY")
    if not PHOENIX_API_KEY:
        raise ValueError("PHOENIX_API_KEY environment variable is not set")
    os.environ["OTEL_EXPORTER_OTLP_HEADERS"] = f"api_key={PHOENIX_API_KEY}"
    PHOENIX_URL = os.getenv("PHOENIX_URL")
    llama_index.core.set_global_handler(
        "arize_phoenix", endpoint=PHOENIX_URL, eval_params={}
    )
    #debugHandle=[]
    # llama_debug = LlamaDebugHandler(print_trace_on_end=True)
    # debugHandle.append(llama_debug)
    # callback_manager = CallbackManager(debugHandle)
    # settings.Settings.callback_manager = callback_manager
@@ -0,0 +1,235 @@
 import os
 from typing import Dict
 from llama_index.core.constants import DEFAULT_TEMPERATURE
 from llama_index.core.settings import Settings
 from llama_index.llms.xinference import Xinference
 from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP
 from app.xinference.base import XinferenceEmbedding, XinferenceRerank
 def get_node_postprocessors():
    rerank_enabled = os.getenv("RERANK_ENABLED").title()
    if rerank_enabled is None or rerank_enabled == 'False':
        return []
    rerank_model = os.getenv("RERANK_MODEL")
    rerank_url = os.getenv("RERANK_BASE_URL")
    rerank_top_n = os.getenv("RERANK_TOP_N")
    rerank_threshold = os.getenv("RERANK_THRESHOLD")
    postprocess = None
    if rerank_model is not None:
        postprocess = [XinferenceRerank(rerank_model, rerank_url, top_n=rerank_top_n, threshold=rerank_threshold)]
    return postprocess
 def init_settings():
    model_provider = os.getenv("MODEL_PROVIDER")
    match model_provider:
        case "openai":
            init_openai()
        case "dashscope":
            init_dashscope()
        case "groq":
            init_groq()
        case "ollama":
            init_ollama()
        case "anthropic":
            init_anthropic()
        case "gemini":
            init_gemini()
        case "mistral":
            init_mistral()
        case "azure-openai":
            init_azure_openai()
        case "t-systems":
            from .llmhub import init_llmhub
            init_llmhub()
        case "xinference":
            init_xinference()
        case _:
            raise ValueError(f"Invalid model provider: {model_provider}")
    Settings.chunk_size = int(os.getenv("CHUNK_SIZE", "1024"))
    Settings.chunk_overlap = int(os.getenv("CHUNK_OVERLAP", "20"))
 def init_ollama():
    # from llama_index.embeddings.ollama import OllamaEmbedding
    # from llama_index.llms.ollama.base import DEFAULT_REQUEST_TIMEOUT, Ollama
    #
    # base_url = os.getenv("OLLAMA_BASE_URL") or "http://127.0.0.1:11434"
    # request_timeout = float(
    #     os.getenv("OLLAMA_REQUEST_TIMEOUT", DEFAULT_REQUEST_TIMEOUT)
    # )
    # Settings.embed_model = OllamaEmbedding(
    #     base_url=base_url,
    #     model_name=os.getenv("EMBEDDING_MODEL"),
    # )
    # Settings.llm = Ollama(
    #     base_url=base_url, model=os.getenv("MODEL"), request_timeout=request_timeout
    # )
    pass
 def init_xinference():
    base_url = os.getenv("BASE_URL")
    model = os.getenv("MODEL")
    max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None
    temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP))
    Settings.llm = Xinference(model, base_url, temperature, max_tokens)
    embedding_base_url = os.getenv("EMBEDDING_BASE_URL")
    embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url
    embed_model_name = os.getenv("EMBEDDING_MODEL")
    dimensions = os.getenv("EMBEDDING_DIM")
    dimensions = int(dimensions) if dimensions is not None else None
    Settings.embed_model = XinferenceEmbedding(embed_model_name, embedding_base_url, dimensions=dimensions)
 def init_openai():
    from llama_index.core.constants import DEFAULT_TEMPERATURE
    from llama_index.embeddings.openai import OpenAIEmbedding
    from llama_index.llms.openai import OpenAI
    max_tokens = os.getenv("LLM_MAX_TOKENS")
    config = {
        "model": os.getenv("MODEL"),
        "temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
        "max_tokens": int(max_tokens) if max_tokens is not None else None,
    }
    Settings.llm = OpenAI(**config)
    dimensions = os.getenv("EMBEDDING_DIM")
    config = {
        "model": os.getenv("EMBEDDING_MODEL"),
        "dimensions": int(dimensions) if dimensions is not None else None,
    }
    Settings.embed_model = OpenAIEmbedding(**config)
 def init_dashscope():
    from llama_index.llms.dashscope import DashScope,DashScopeGenerationModels
    from llama_index.embeddings.dashscope import DashScopeEmbedding,DashScopeBatchTextEmbeddingModels,DashScopeTextEmbeddingType,DashScopeTextEmbeddingModels
    max_tokens = os.getenv("LLM_MAX_TOKENS")
    config = {
        "model": os.getenv("MODEL"),
        "temperature": float(os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)),
        "max_tokens": int(max_tokens) if max_tokens is not None else None,
    }
    Settings.llm = llm = DashScope(model_name=DashScopeGenerationModels.QWEN_MAX)
    dimensions = os.getenv("EMBEDDING_DIM")
    config = {
        "model": os.getenv("EMBEDDING_MODEL"),
        "dimensions": int(dimensions) if dimensions is not None else None,
    }
    Settings.embed_model = DashScopeEmbedding(model_name=DashScopeTextEmbeddingModels.TEXT_EMBEDDING_V2,
                                              text_type=DashScopeTextEmbeddingType.TEXT_TYPE_QUERY)
 def init_azure_openai():
    # from llama_index.core.constants import DEFAULT_TEMPERATURE
    # from llama_index.embeddings.azure_openai import AzureOpenAIEmbedding
    # from llama_index.llms.azure_openai import AzureOpenAI
    #
    # llm_deployment = os.environ["AZURE_OPENAI_LLM_DEPLOYMENT"]
    # embedding_deployment = os.environ["AZURE_OPENAI_EMBEDDING_DEPLOYMENT"]
    # max_tokens = os.getenv("LLM_MAX_TOKENS")
    # temperature = os.getenv("LLM_TEMPERATURE", DEFAULT_TEMPERATURE)
    # dimensions = os.getenv("EMBEDDING_DIM")
    #
    # azure_config = {
    #     "api_key": os.environ["AZURE_OPENAI_KEY"],
    #     "azure_endpoint": os.environ["AZURE_OPENAI_ENDPOINT"],
    #     "api_version": os.getenv("AZURE_OPENAI_API_VERSION")
    #     or os.getenv("OPENAI_API_VERSION"),
    # }
    #
    # Settings.llm = AzureOpenAI(
    #     model=os.getenv("MODEL"),
    #     max_tokens=int(max_tokens) if max_tokens is not None else None,
    #     temperature=float(temperature),
    #     deployment_name=llm_deployment,
    #     **azure_config,
    # )
    #
    # Settings.embed_model = AzureOpenAIEmbedding(
    #     model=os.getenv("EMBEDDING_MODEL"),
    #     dimensions=int(dimensions) if dimensions is not None else None,
    #     deployment_name=embedding_deployment,
    #     **azure_config,
    # )
    pass
 def init_fastembed():
    """
    Use Qdrant Fastembed as the local embedding provider.
    """
    # from llama_index.embeddings.fastembed import FastEmbedEmbedding
    #
    # embed_model_map: Dict[str, str] = {
    #     # Small and multilingual
    #     "all-MiniLM-L6-v2": "sentence-transformers/all-MiniLM-L6-v2",
    #     # Large and multilingual
    #     "paraphrase-multilingual-mpnet-base-v2": "sentence-transformers/paraphrase-multilingual-mpnet-base-v2",  # noqa: E501
    # }
    #
    # # This will download the model automatically if it is not already downloaded
    # Settings.embed_model = FastEmbedEmbedding(
    #     model_name=embed_model_map[os.getenv("EMBEDDING_MODEL")]
    # )
    pass
 def init_groq():
    # from llama_index.llms.groq import Groq
    #
    # model_map: Dict[str, str] = {
    #     "llama3-8b": "llama3-8b-8192",
    #     "llama3-70b": "llama3-70b-8192",
    #     "mixtral-8x7b": "mixtral-8x7b-32768",
    # }
    #
    # Settings.llm = Groq(model=model_map[os.getenv("MODEL")])
    # # Groq does not provide embeddings, so we use FastEmbed instead
    # init_fastembed()
    pass
 def init_anthropic():
    # from llama_index.llms.anthropic import Anthropic
    #
    # model_map: Dict[str, str] = {
    #     "claude-3-opus": "claude-3-opus-20240229",
    #     "claude-3-sonnet": "claude-3-sonnet-20240229",
    #     "claude-3-haiku": "claude-3-haiku-20240307",
    #     "claude-2.1": "claude-2.1",
    #     "claude-instant-1.2": "claude-instant-1.2",
    # }
    #
    # Settings.llm = Anthropic(model=model_map[os.getenv("MODEL")])
    # # Anthropic does not provide embeddings, so we use FastEmbed instead
    # init_fastembed()
    pass
 def init_gemini():
    # from llama_index.embeddings.gemini import GeminiEmbedding
    # from llama_index.llms.gemini import Gemini
    #
    # model_name = f"models/{os.getenv('MODEL')}"
    # embed_model_name = f"models/{os.getenv('EMBEDDING_MODEL')}"
    #
    # Settings.llm = Gemini(model=model_name)
    # Settings.embed_model = GeminiEmbedding(model_name=embed_model_name)
    pass
 def init_mistral():
    # from llama_index.embeddings.mistralai import MistralAIEmbedding
    # from llama_index.llms.mistralai import MistralAI
    #
    # Settings.llm = MistralAI(model=os.getenv("MODEL"))
    # Settings.embed_model = MistralAIEmbedding(model_name=os.getenv("EMBEDDING_MODEL"))
    pass
@@ -0,0 +1,150 @@
 import logging
 import os
 from typing import List
 from fastapi import APIRouter, BackgroundTasks, Depends, HTTPException, Request, status
 from llama_index.core.chat_engine.types import BaseChatEngine, NodeWithScore
 from llama_index.core.llms import MessageRole
 from llama_index.core.vector_stores.types import MetadataFilter, MetadataFilters
 from app.api.routers.events import EventCallbackHandler
 from app.api.routers.models import (
    ChatConfig,
    ChatData,
    Message,
    Result,
    SourceNodes,
 )
 from app.api.routers.vercel_response import VercelStreamResponse
 from app.api.services.llama_cloud import LLamaCloudFileService
 from app.engine import get_chat_engine
 chat_router = r = APIRouter()
 logger = logging.getLogger("uvicorn")
 def process_response_nodes(
    nodes: List[NodeWithScore],
    background_tasks: BackgroundTasks,
 ):
    """
    Start background tasks on the source nodes if needed.
    """
    files_to_download = SourceNodes.get_download_files(nodes)
    for file in files_to_download:
        background_tasks.add_task(
            LLamaCloudFileService.download_llamacloud_pipeline_file, file
        )
 # streaming endpoint - delete if not needed
@r.post("")
 async def chat(
    request: Request,
    data: ChatData,
    background_tasks: BackgroundTasks,
    chat_engine: BaseChatEngine = Depends(get_chat_engine),
 ):
    try:
        last_message_content = data.get_last_message_content()
        # 由于基于历史消息的提示词没有调整好，所以暂时屏蔽历史消息
        data.messages.clear()
        messages = data.get_history_messages()
        doc_ids = data.get_chat_document_ids()
        filters = generate_filters(doc_ids)
        params = data.data or {}
        logger.info("Creating chat engine with filters", filters.dict())
        chat_engine = get_chat_engine(filters=filters, params=params)
        event_handler = EventCallbackHandler()
        chat_engine.callback_manager.handlers.append(event_handler)  # type: ignore
        response = await chat_engine.astream_chat(last_message_content, messages)
        process_response_nodes(response.source_nodes, background_tasks)
        return VercelStreamResponse(request, event_handler, response, data)
    except Exception as e:
        logger.exception("Error in chat engine", exc_info=True)
        raise HTTPException(
            status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
            detail=f"Error in chat engine: {e}",
        ) from e
 def generate_filters(doc_ids):
    if len(doc_ids) > 0:
        filters = MetadataFilters(
            filters=[
                MetadataFilter(
                    key="private",
                    value=["true"],
                    operator="nin",  # type: ignore
                ),
                MetadataFilter(
                    key="doc_id",
                    value=doc_ids,
                    operator="in",  # type: ignore
                ),
            ],
            condition="or",  # type: ignore
        )
    else:
        filters = MetadataFilters(
            # Use the "NIN" - "not in" operator to include all public documents (don't have the private key set)
            filters=[
                MetadataFilter(
                    key="private",
                    value=["true"],
                    operator="nin",  # type: ignore
                ),
            ]
        )
    return filters
 # non-streaming endpoint - delete if not needed
@r.post("/request")
 async def chat_request(
    data: ChatData,
    chat_engine: BaseChatEngine = Depends(get_chat_engine),
 ) -> Result:
    last_message_content = data.get_last_message_content()
    messages = data.get_history_messages()
    response = await chat_engine.achat(last_message_content, messages)
    return Result(
        result=Message(role=MessageRole.ASSISTANT, content=response.response),
        nodes=SourceNodes.from_source_nodes(response.source_nodes),
    )
@r.get("/config")
 async def chat_config() -> ChatConfig:
    starter_questions = None
    conversation_starters = os.getenv("CONVERSATION_STARTERS")
    if conversation_starters and conversation_starters.strip():
        starter_questions = conversation_starters.strip().split("\\n")
    return ChatConfig(starter_questions=starter_questions)
@r.get("/config/llamacloud")
 async def chat_llama_cloud_config():
    projects = LLamaCloudFileService.get_all_projects_with_pipelines()
    pipeline = os.getenv("LLAMA_CLOUD_INDEX_NAME")
    project = os.getenv("LLAMA_CLOUD_PROJECT_NAME")
    pipeline_config = (
        pipeline
        and project
        and {
            "pipeline": pipeline,
            "project": project,
        }
        or None
    )
    return {
        "projects": projects,
        "pipeline": pipeline_config,
    }
@@ -0,0 +1,149 @@
 import json
 import asyncio
 import logging
 from typing import AsyncGenerator, Dict, Any, List, Optional
 from llama_index.core.callbacks.base import BaseCallbackHandler
 from llama_index.core.callbacks.schema import CBEventType
 from llama_index.core.tools.types import ToolOutput
 from pydantic import BaseModel
 logger = logging.getLogger(__name__)
 class CallbackEvent(BaseModel):
    event_type: CBEventType
    payload: Optional[Dict[str, Any]] = None
    event_id: str = ""
    def get_retrieval_message(self) -> dict | None:
        if self.payload:
            nodes = self.payload.get("nodes")
            if nodes:
                msg = f"根据查询检索到 {len(nodes)} 源文件"
            else:
                msg = f"查询检索中: '{self.payload.get('query_str')}'"
            return {
                "type": "events",
                "data": {"title": msg},
            }
        else:
            return None
    def get_tool_message(self) -> dict | None:
        func_call_args = self.payload.get("function_call")
        if func_call_args is not None and "tool" in self.payload:
            tool = self.payload.get("tool")
            return {
                "type": "events",
                "data": {
                    "title": f"调用工具 {tool.name} ，参数: {func_call_args}",
                },
            }
    def _is_output_serializable(self, output: Any) -> bool:
        try:
            json.dumps(output)
            return True
        except TypeError:
            return False
    def get_agent_tool_response(self) -> dict | None:
        response = self.payload.get("response")
        if response is not None:
            sources = response.sources
            for source in sources:
                # Return the tool response here to include the toolCall information
                if isinstance(source, ToolOutput):
                    if self._is_output_serializable(source.raw_output):
                        output = source.raw_output
                    else:
                        output = source.content
                    return {
                        "type": "tools",
                        "data": {
                            "toolOutput": {
                                "output": output,
                                "isError": source.is_error,
                            },
                            "toolCall": {
                                "id": None,  # There is no tool id in the ToolOutput
                                "name": source.tool_name,
                                "input": source.raw_input,
                            },
                        },
                    }
    def to_response(self):
        try:
            match self.event_type:
                case "retrieve":
                    return self.get_retrieval_message()
                case "function_call":
                    return self.get_tool_message()
                case "agent_step":
                    return self.get_agent_tool_response()
                case _:
                    return None
        except Exception as e:
            logger.error(f"转换回应时间时发生错误，原因: {e}")
            return None
 class EventCallbackHandler(BaseCallbackHandler):
    _aqueue: asyncio.Queue
    is_done: bool = False
    def __init__(
        self,
    ):
        """Initialize the base callback handler."""
        ignored_events = [
            CBEventType.CHUNKING,
            CBEventType.NODE_PARSING,
            CBEventType.EMBEDDING,
            CBEventType.LLM,
            CBEventType.TEMPLATING,
        ]
        super().__init__(ignored_events, ignored_events)
        self._aqueue = asyncio.Queue()
    def on_event_start(
        self,
        event_type: CBEventType,
        payload: Optional[Dict[str, Any]] = None,
        event_id: str = "",
        **kwargs: Any,
    ) -> str:
        event = CallbackEvent(event_id=event_id, event_type=event_type, payload=payload)
        if event.to_response() is not None:
            self._aqueue.put_nowait(event)
    def on_event_end(
        self,
        event_type: CBEventType,
        payload: Optional[Dict[str, Any]] = None,
        event_id: str = "",
        **kwargs: Any,
    ) -> None:
        event = CallbackEvent(event_id=event_id, event_type=event_type, payload=payload)
        if event.to_response() is not None:
            self._aqueue.put_nowait(event)
    def start_trace(self, trace_id: Optional[str] = None) -> None:
        """No-op."""
    def end_trace(
        self,
        trace_id: Optional[str] = None,
        trace_map: Optional[Dict[str, List[str]]] = None,
    ) -> None:
        """No-op."""
    async def async_event_gen(self) -> AsyncGenerator[CallbackEvent, None]:
        while not self._aqueue.empty() or not self.is_done:
            try:
                yield await asyncio.wait_for(self._aqueue.get(), timeout=0.1)
            except asyncio.TimeoutError:
                pass
@@ -0,0 +1,253 @@
 import logging
 import os
 from typing import Any, Dict, List, Literal, Optional, Set
 from llama_index.core.llms import ChatMessage, MessageRole
 from llama_index.core.schema import NodeWithScore
 from pydantic import BaseModel, Field, validator, field_validator
 from pydantic.alias_generators import to_camel
 logger = logging.getLogger("uvicorn")
 class FileContent(BaseModel):
    type: Literal["text", "ref"]
    # If the file is pure text then the value is be a string
    # otherwise, it's a list of document IDs
    value: str | List[str]
 class File(BaseModel):
    id: str
    content: FileContent
    filename: str
    filesize: int
    filetype: str
 class AnnotationFileData(BaseModel):
    files: List[File] = Field(
        default=[],
        description="List of files",
    )
    class Config:
        json_schema_extra = {
            "example": {
                "csvFiles": [
                    {
                        "content": "Name, Age\nAlice, 25\nBob, 30",
                        "filename": "example.csv",
                        "filesize": 123,
                        "id": "123",
                        "type": "text/csv",
                    }
                ]
            }
        }
        alias_generator = to_camel
 class Annotation(BaseModel):
    type: str
    data: AnnotationFileData | List[str]
    def to_content(self) -> str | None:
        if self.type == "document_file":
            # We only support generating context content for CSV files for now
            csv_files = [file for file in self.data.files if file.filetype == "csv"]
            if len(csv_files) > 0:
                return "Use data from following CSV raw content\n" + "\n".join(
                    [f"```csv\n{csv_file.content.value}\n```" for csv_file in csv_files]
                )
        else:
            logger.warning(
                f"The annotation {self.type} is not supported for generating context content"
            )
        return None
 class Message(BaseModel):
    role: MessageRole
    content: str
    annotations: List[Annotation] | None = None
 class ChatData(BaseModel):
    messages: List[Message]
    data: Any = None
    class Config:
        json_schema_extra = {
            "example": {
                "messages": [
                    {
                        "role": "user",
                        "content": "What standards for letters exist?",
                    }
                ]
            }
        }
    @field_validator("messages")
    def messages_must_not_be_empty(cls, v):
        if len(v) == 0:
            raise ValueError("Messages must not be empty")
        return v
    def get_last_message_content(self) -> str:
        """
        Get the content of the last message along with the data content if available.
        Fallback to use data content from previous messages
        """
        if len(self.messages) == 0:
            raise ValueError("There is not any message in the chat")
        last_message = self.messages[-1]
        message_content = last_message.content
        for message in reversed(self.messages):
            if message.role == MessageRole.USER and message.annotations is not None:
                annotation_contents = filter(
                    None,
                    [annotation.to_content() for annotation in message.annotations],
                )
                if not annotation_contents:
                    continue
                annotation_text = "\n".join(annotation_contents)
                message_content = f"{message_content}\n{annotation_text}"
                break
        return message_content
    def get_history_messages(self) -> List[ChatMessage]:
        """
        Get the history messages
        """
        return [
            ChatMessage(role=message.role, content=message.content)
            for message in self.messages[:-1]
        ]
    def is_last_message_from_user(self) -> bool:
        return self.messages[-1].role == MessageRole.USER
    def get_chat_document_ids(self) -> List[str]:
        """
        Get the document IDs from the chat messages
        """
        document_ids: List[str] = []
        for message in self.messages:
            if message.role == MessageRole.USER and message.annotations is not None:
                for annotation in message.annotations:
                    if (
                        annotation.type == "document_file"
                        and annotation.data.files is not None
                    ):
                        for fi in annotation.data.files:
                            if fi.content.type == "ref":
                                document_ids += fi.content.value
        return list(set(document_ids))
 class LlamaCloudFile(BaseModel):
    file_name: str
    pipeline_id: str
    def __eq__(self, other):
        if not isinstance(other, LlamaCloudFile):
            return NotImplemented
        return (
            self.file_name == other.file_name and self.pipeline_id == other.pipeline_id
        )
    def __hash__(self):
        return hash((self.file_name, self.pipeline_id))
 class SourceNodes(BaseModel):
    id: str
    metadata: Dict[str, Any]
    score: Optional[float]
    text: str
    url: Optional[str]
    @classmethod
    def from_source_node(cls, source_node: NodeWithScore):
        metadata = source_node.node.metadata
        url = cls.get_url_from_metadata(metadata)
        #text = 'filename' in metadata and metadata['filename'] or source_node.node.node_id
        text = source_node.node.text
        return cls(
            id=source_node.node.node_id,
            metadata=metadata,
            score=source_node.score,
            text=text,  # type: ignore
            url=url,
        )
    @classmethod
    def get_url_from_metadata(cls, metadata: Dict[str, Any]) -> str:
        url_prefix = os.getenv("FILESERVER_URL_PREFIX")
        if not url_prefix:
            logger.warning(
                "Warning: FILESERVER_URL_PREFIX not set in environment variables. Can't use file server"
            )
        file_name = metadata.get("file_name")
        if file_name and url_prefix:
            # file_name exists and file server is configured
            pipeline_id = metadata.get("pipeline_id")
            if pipeline_id and metadata.get("private") is None:
                # file is from LlamaCloud and was not ingested locally
                file_name = f"{pipeline_id}${file_name}"
                return f"{url_prefix}/output/llamacloud/{file_name}"
            is_private = metadata.get("private", "false") == "true"
            if is_private:
                return f"{url_prefix}/output/uploaded/{file_name}"
            return f"{url_prefix}/data/{file_name}"
        else:
            # fallback to URL in metadata (e.g. for websites)
            return metadata.get("URL")
    @classmethod
    def from_source_nodes(cls, source_nodes: List[NodeWithScore]):
        return [cls.from_source_node(node) for node in source_nodes]
    @staticmethod
    def get_download_files(nodes: List[NodeWithScore]) -> Set[LlamaCloudFile]:
        source_nodes = SourceNodes.from_source_nodes(nodes)
        llama_cloud_files = [
            LlamaCloudFile(
                file_name=node.metadata.get("file_name"),
                pipeline_id=node.metadata.get("pipeline_id"),
            )
            for node in source_nodes
            if (
                node.metadata.get("private")
                is None  # Only download files are from LlamaCloud and were not ingested locally
                and node.metadata.get("pipeline_id") is not None
                and node.metadata.get("file_name") is not None
            )
        ]
        # Remove duplicates and return
        return set(llama_cloud_files)
 class Result(BaseModel):
    result: Message
    nodes: List[SourceNodes]
 class ChatConfig(BaseModel):
    starter_questions: Optional[List[str]] = Field(
        default=None,
        description="List of starter questions",
        serialization_alias="starterQuestions",
    )
    class Config:
        json_schema_extra = {
            "example": {
                "starterQuestions": [
                    "What standards for letters exist?",
                    "What are the requirements for a letter to be considered a letter?",
                ]
            }
        }
@@ -0,0 +1,25 @@
 import logging
 from typing import List
 from fastapi import APIRouter, HTTPException
 from pydantic import BaseModel
 from app.api.services.file import PrivateFileService
 file_upload_router = r = APIRouter()
 logger = logging.getLogger("uvicorn")
 class FileUploadRequest(BaseModel):
    base64: str
@r.post("")
 def upload_file(request: FileUploadRequest) -> List[str]:
    try:
        logger.info("Processing file")
        return PrivateFileService.process_file(request.base64)
    except Exception as e:
        logger.error(f"Error processing file: {e}", exc_info=True)
        raise HTTPException(status_code=500, detail="Error processing file")
@@ -0,0 +1,109 @@
 import json
 from aiostream import stream
 from fastapi import Request
 from fastapi.responses import StreamingResponse
 from llama_index.core.chat_engine.types import StreamingAgentChatResponse
 from app.api.routers.events import EventCallbackHandler
 from app.api.routers.models import ChatData, Message, SourceNodes
 from app.api.services.suggestion import NextQuestionSuggestion
 class VercelStreamResponse(StreamingResponse):
    """
    Class to convert the response from the chat engine to the streaming format expected by Vercel
    """
    TEXT_PREFIX = "0:"
    DATA_PREFIX = "8:"
    @classmethod
    def convert_text(cls, token: str):
        # Escape newlines and double quotes to avoid breaking the stream
        token = json.dumps(token)
        return f"{cls.TEXT_PREFIX}{token}\n"
    @classmethod
    def convert_data(cls, data: dict):
        data_str = json.dumps(data)
        return f"{cls.DATA_PREFIX}[{data_str}]\n"
    def __init__(
        self,
        request: Request,
        event_handler: EventCallbackHandler,
        response: StreamingAgentChatResponse,
        chat_data: ChatData,
    ):
        content = VercelStreamResponse.content_generator(
            request, event_handler, response, chat_data
        )
        super().__init__(content=content)
    @classmethod
    async def content_generator(
        cls,
        request: Request,
        event_handler: EventCallbackHandler,
        response: StreamingAgentChatResponse,
        chat_data: ChatData,
    ):
        # Yield the text response
        async def _chat_response_generator():
            final_response = ""
            async for token in response.async_response_gen():
                final_response += token
                yield VercelStreamResponse.convert_text(token)
            # Generate questions that user might interested to
            conversation = chat_data.messages + [
                Message(role="assistant", content=final_response)
            ]
            questions = await NextQuestionSuggestion.suggest_next_questions(
                conversation
            )
            if len(questions) > 0:
                yield VercelStreamResponse.convert_data(
                    {
                        "type": "suggested_questions",
                        "data": questions,
                    }
                )
            # the text_generator is the leading stream, once it's finished, also finish the event stream
            event_handler.is_done = True
            # Yield the source nodes
            yield cls.convert_data(
                {
                    "type": "sources",
                    "data": {
                        "nodes": [
                            SourceNodes.from_source_node(node).dict()
                            for node in response.source_nodes
                        ]
                    },
                }
            )
        # Yield the events from the event handler
        async def _event_generator():
            async for event in event_handler.async_event_gen():
                event_response = event.to_response()
                if event_response is not None:
                    yield VercelStreamResponse.convert_data(event_response)
        combine = stream.merge(_chat_response_generator(), _event_generator())
        is_stream_started = False
        async with combine.stream() as streamer:
            async for output in streamer:
                if not is_stream_started:
                    is_stream_started = True
                    # Stream a blank message to start the stream
                    yield VercelStreamResponse.convert_text("")
                yield output
                if await request.is_disconnected():
                    break
@@ -48,6 +48,8 @@ async def chat(
 ):
    try:
        last_message_content = data.get_last_message_content()
        # 由于基于历史消息的提示词没有调整好，所以暂时屏蔽历史消息
        data.messages.clear()
        messages = data.get_history_messages()
        doc_ids = data.get_chat_document_ids()
@@ -124,7 +126,7 @@ async def chat_config() -> ChatConfig:
    starter_questions = None
    conversation_starters = os.getenv("CONVERSATION_STARTERS")
    if conversation_starters and conversation_starters.strip():
-        starter_questions = conversation_starters.strip().split("\n")
+        starter_questions = conversation_starters.strip().split("\\n")
    return ChatConfig(starter_questions=starter_questions)
@@ -4,7 +4,7 @@ from typing import Any, Dict, List, Literal, Optional, Set
 from llama_index.core.llms import ChatMessage, MessageRole
 from llama_index.core.schema import NodeWithScore
-from pydantic import BaseModel, Field, validator
+from pydantic import BaseModel, Field, validator, field_validator
 from pydantic.alias_generators import to_camel
 logger = logging.getLogger("uvicorn")
@@ -89,7 +89,7 @@ class ChatData(BaseModel):
            }
        }
-    @validator("messages")
+    @field_validator("messages")
    def messages_must_not_be_empty(cls, v):
        if len(v) == 0:
            raise ValueError("Messages must not be empty")
@@ -173,7 +173,8 @@ class SourceNodes(BaseModel):
    def from_source_node(cls, source_node: NodeWithScore):
        metadata = source_node.node.metadata
        url = cls.get_url_from_metadata(metadata)
-        text = 'filename' in metadata and metadata['filename'] or source_node.node.node_id
+        #text = 'filename' in metadata and metadata['filename'] or source_node.node.node_id
        text = source_node.node.text
        return cls(
            id=source_node.node.node_id,
            metadata=metadata,
@@ -0,0 +1,113 @@
 import base64
 import mimetypes
 import os
 from pathlib import Path
 from typing import Dict, List
 from uuid import uuid4
 from app.engine.index import get_index
 from llama_index.core import VectorStoreIndex
 from llama_index.core.ingestion import IngestionPipeline
 from llama_index.core.readers.file.base import (
    _try_loading_included_file_formats as get_file_loaders_map,
 )
 from llama_index.core.readers.file.base import (
    default_file_metadata_func,
 )
 from llama_index.core.schema import Document
 from llama_index.indices.managed.llama_cloud.base import LlamaCloudIndex
 from llama_index.readers.file import FlatReader
 def get_llamaparse_parser():
    from app.engine.loaders import load_configs
    from app.engine.loaders.file import FileLoaderConfig, llama_parse_parser
    config = load_configs()
    file_loader_config = FileLoaderConfig(**config["file"])
    if file_loader_config.use_llama_parse:
        return llama_parse_parser()
    else:
        return None
 def default_file_loaders_map():
    default_loaders = get_file_loaders_map()
    default_loaders[".txt"] = FlatReader
    return default_loaders
 class PrivateFileService:
    PRIVATE_STORE_PATH = "output/uploaded"
    @staticmethod
    def preprocess_base64_file(base64_content: str) -> tuple:
        header, data = base64_content.split(",", 1)
        mime_type = header.split(";")[0].split(":", 1)[1]
        extension = mimetypes.guess_extension(mime_type)
        # File data as bytes
        return base64.b64decode(data), extension
    @staticmethod
    def store_and_parse_file(file_data, extension) -> List[Document]:
        # Store file to the private directory
        os.makedirs(PrivateFileService.PRIVATE_STORE_PATH, exist_ok=True)
        # random file name
        file_name = f"{uuid4().hex}{extension}"
        file_path = Path(os.path.join(PrivateFileService.PRIVATE_STORE_PATH, file_name))
        # write file
        with open(file_path, "wb") as f:
            f.write(file_data)
        # Load file to documents
        # If LlamaParse is enabled, use it to parse the file
        # Otherwise, use the default file loaders
        reader = get_llamaparse_parser()
        if reader is None:
            reader_cls = default_file_loaders_map().get(extension)
            if reader_cls is None:
                raise ValueError(f"File extension {extension} is not supported")
            reader = reader_cls()
        documents = reader.load_data(file_path)
        # Add custom metadata
        for doc in documents:
            doc.metadata["file_name"] = file_name
            doc.metadata["private"] = "true"
        return documents
    @staticmethod
    def process_file(base64_content: str) -> List[str]:
        file_data, extension = PrivateFileService.preprocess_base64_file(base64_content)
        documents = PrivateFileService.store_and_parse_file(file_data, extension)
        # Only process nodes, no store the index
        pipeline = IngestionPipeline()
        nodes = pipeline.run(documents=documents)
        # Add the nodes to the index and persist it
        current_index = get_index()
        # Insert the documents into the index
        if isinstance(current_index, LlamaCloudIndex):
            # LlamaCloudIndex is a managed index so we don't need to process the nodes
            # just insert the documents
            for doc in documents:
                current_index.insert(doc)
        else:
            # Only process nodes, no store the index
            pipeline = IngestionPipeline()
            nodes = pipeline.run(documents=documents)
            # Add the nodes to the index and persist it
            if current_index is None:
                current_index = VectorStoreIndex(nodes=nodes)
            else:
                current_index.insert_nodes(nodes=nodes)
            current_index.storage_context.persist(
                persist_dir=os.environ.get("STORAGE_DIR", "storage")
            )
        # Return the document ids
        return [doc.doc_id for doc in documents]
@@ -0,0 +1,114 @@
 import logging
 import os
 from typing import Any, Dict, List, Optional
 import requests
 from app.api.routers.models import LlamaCloudFile
 logger = logging.getLogger("uvicorn")
 class LLamaCloudFileService:
    LLAMA_CLOUD_URL = "https://cloud.llamaindex.ai/api/v1"
    LOCAL_STORE_PATH = "output/llamacloud"
    DOWNLOAD_FILE_NAME_TPL = "{pipeline_id}${filename}"
    @classmethod
    def get_all_projects(cls) -> List[Dict[str, Any]]:
        url = f"{cls.LLAMA_CLOUD_URL}/projects"
        return cls._make_request(url)
    @classmethod
    def get_all_pipelines(cls) -> List[Dict[str, Any]]:
        url = f"{cls.LLAMA_CLOUD_URL}/pipelines"
        return cls._make_request(url)
    @classmethod
    def get_all_projects_with_pipelines(cls) -> List[Dict[str, Any]]:
        try:
            projects = cls.get_all_projects()
            pipelines = cls.get_all_pipelines()
            return [
                {
                    **project,
                    "pipelines": [p for p in pipelines if p["project_id"] == project["id"]],
                }
                for project in projects
            ]
        except Exception as error:
            logger.error(f"Error listing projects and pipelines: {error}")
            return []
    @classmethod
    def _get_files(cls, pipeline_id: str) -> List[Dict[str, Any]]:
        url = f"{cls.LLAMA_CLOUD_URL}/pipelines/{pipeline_id}/files"
        return cls._make_request(url)
    @classmethod
    def _get_file_detail(cls, project_id: str, file_id: str) -> Dict[str, Any]:
        url = f"{cls.LLAMA_CLOUD_URL}/files/{file_id}/content?project_id={project_id}"
        return cls._make_request(url)
    @classmethod
    def _download_file(cls, url: str, local_file_path: str):
        logger.info(f"Downloading file to {local_file_path}")
        # Create directory if it doesn't exist
        os.makedirs(cls.LOCAL_STORE_PATH, exist_ok=True)
        # Download the file
        with requests.get(url, stream=True) as r:
            r.raise_for_status()
            with open(local_file_path, "wb") as f:
                for chunk in r.iter_content(chunk_size=8192):
                    f.write(chunk)
        logger.info("File downloaded successfully")
    @classmethod
    def download_llamacloud_pipeline_file(
        cls,
        file: LlamaCloudFile,
        force_download: bool = False,
    ):
        file_name = file.file_name
        pipeline_id = file.pipeline_id
        # Check is the file already exists
        downloaded_file_path = cls.get_file_path(file_name, pipeline_id)
        if os.path.exists(downloaded_file_path) and not force_download:
            logger.debug(f"File {file_name} already exists in local storage")
            return
        try:
            logger.info(f"Downloading file {file_name} for pipeline {pipeline_id}")
            files = cls._get_files(pipeline_id)
            if not files or not isinstance(files, list):
                raise Exception("No files found in LlamaCloud")
            for file_entry in files:
                if file_entry["name"] == file_name:
                    file_id = file_entry["file_id"]
                    project_id = file_entry["project_id"]
                    file_detail = cls._get_file_detail(project_id, file_id)
                    cls._download_file(file_detail["url"], downloaded_file_path)
                    break
        except Exception as error:
            logger.info(f"Error fetching file from LlamaCloud: {error}")
    @classmethod
    def get_file_name(cls, name: str, pipeline_id: str) -> str:
        return cls.DOWNLOAD_FILE_NAME_TPL.format(pipeline_id=pipeline_id, filename=name)
    @classmethod
    def get_file_path(cls, name: str, pipeline_id: str) -> str:
        return os.path.join(cls.LOCAL_STORE_PATH, cls.get_file_name(name, pipeline_id))
    @staticmethod
    def _make_request(
        url: str, data=None, headers: Optional[Dict] = None, method: str = "get"
    ):
        if headers is None:
            headers = {
                "Accept": "application/json",
                "Authorization": f'Bearer {os.getenv("LLAMA_CLOUD_API_KEY")}',
            }
        response = requests.request(method, url, headers=headers, data=data)
        response.raise_for_status()
        return response.json()
@@ -0,0 +1,48 @@
 from typing import List
 from app.api.routers.models import Message
 from llama_index.core.prompts import PromptTemplate
 from llama_index.core.settings import Settings
 from pydantic import BaseModel
 NEXT_QUESTIONS_SUGGESTION_PROMPT = PromptTemplate(
    "你是一个乐于助人的助手！你的任务是对用户可能会问的下一个问题给出建议。 "
    "\n这是对话历史记录"
    "\n---------------------\n{conversation}\n---------------------"
    "考虑到对话历史记录，仅限于现在知识库已有内容, 请给我 $number_of_questions 个你接下来可能会问题的问题！"
 )
 N_QUESTION_TO_GENERATE = 3
 class NextQuestions(BaseModel):
    """A list of questions that user might ask next"""
    questions: List[str]
 class NextQuestionSuggestion:
    @staticmethod
    async def suggest_next_questions(
        messages: List[Message],
        number_of_questions: int = N_QUESTION_TO_GENERATE,
    ) -> List[str]:
        # Reduce the cost by only using the last two messages
        last_user_message = None
        last_assistant_message = None
        for message in reversed(messages):
            if message.role == "user":
                last_user_message = f"User: {message.content}"
            elif message.role == "assistant":
                last_assistant_message = f"Assistant: {message.content}"
            if last_user_message and last_assistant_message:
                break
        conversation: str = f"{last_user_message}\n{last_assistant_message}"
        output: NextQuestions = await Settings.llm.astructured_predict(
            NextQuestions,
            prompt=NEXT_QUESTIONS_SUGGESTION_PROMPT,
            conversation=conversation,
            nun_questions=number_of_questions,
        )
        return output.questions
@@ -87,9 +87,7 @@ class PrivateFileService:
        nodes = pipeline.run(documents=documents)
        # Add the nodes to the index and persist it
-        indexs = get_index()
+        current_index = get_index()
        if len(indexs) > 0:
            current_index = list(indexs.values())[0]
        # Insert the documents into the index
        if isinstance(current_index, LlamaCloudIndex):
@@ -6,10 +6,10 @@ from llama_index.core.settings import Settings
 from pydantic import BaseModel
 NEXT_QUESTIONS_SUGGESTION_PROMPT = PromptTemplate(
-    "You're a helpful assistant! Your task is to suggest the next question that user might ask. "
+    "你是一个乐于助人的助手！你的任务是对用户可能会问的下一个问题给出建议。 "
-    "\nHere is the conversation history"
+    "\n这是对话历史记录"
    "\n---------------------\n{conversation}\n---------------------"
-    "Given the conversation history, please give me $number_of_questions questions that you might ask next!"
+    "考虑到对话历史记录，仅限于现在知识库已有内容, 请给我 $number_of_questions 个你接下来可能会问题的问题！"
 )
 N_QUESTION_TO_GENERATE = 3
@@ -0,0 +1,22 @@
 import logging
 from llama_index.core.indices import VectorStoreIndex
 from app.engine.vectordb import get_vector_store
 logger = logging.getLogger("uvicorn")
 index = None
 def get_index(params=None):
    global index
    if index is None:
        logger.info("Connecting vector store...")
        store = get_vector_store()
        # Load the index from the vector store
        # If you are using a vector store that doesn't store text,
        # you must load the index from both the vector store and the document store
        index = VectorStoreIndex.from_vector_store(store)
        logger.info("Finished load index from vector store.")
    return index
@@ -0,0 +1,61 @@
 import os
 from llama_index.core.agent import AgentRunner, ReActChatFormatter
 from llama_index.core.settings import Settings
 from llama_index.core.tools.query_engine import QueryEngineTool
 from app.engine.engine import create_query_engine, create_summary_query_engine
 from app.engine.index import get_index
 #from app.engine.loaders.db import makeDescriptionByEngine
 from app.engine.tools import ToolFactory
 def get_chat_engine(filters=None, params=None):
    system_prompt = os.getenv("SYSTEM_PROMPT")
    top_k = int(os.getenv("TOP_K", "3"))
    use_reranker = os.getenv("RERANK_ENABLED")
    tools = []
    # 创建SQL查询工具
 #    sql_query_engine = create_summary_query_engine(index)
    # sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine,
    #                                                name="zjdata_query_tool",
    #                                                description="来源于一个由博微公司电力造价软件编制的造价工程文件。该文件以多张表格的形式存储存储了整个工程的全部数据内容。适用于以详细的自然语言查询表格数据方式查询造价工程各项具体属性、费用的数值。请先使用“zj_query_tool”无法解决才使用本工具"
    #                                                )
    #tools.append(sql_query_tool)
    # Add query tool if index exists
    index = get_index()
    if index is not None:
        summary_query_engine = create_summary_query_engine(index,top_k,use_reranker,filters)
        summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool",
                                                            description="适用于任何需要进行全面总结、概括的要求。",
                                                            )
        query_engine = create_query_engine(index,top_k,use_reranker,filters)
        query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool",
                                                          description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后建议使用“zjdata_query_tool”工具。",
                                                          )
        tools.append(summary_query_tool)
        tools.append(query_engine_tool)
    # Add additional tools
    tools += ToolFactory.from_env()
    prefix_messages = ("""您的设计旨在帮助完成各种任务，从回答问题到提供其他类型分析的摘要。\n\n##工具\n\n你可以访问各种工具。你有责任按照你认为合适的顺序使用这些工具来完成当前的任务。\n这可能需要将任务分解为子任务，并使用不同的工具来完成每个子任务。\n\n你可以访问以下工具：\n{tool_desc}\n\n\n##输出格式\n\n请用与问题相同的语言回答，并使用以下格式：\n\n   \nThought: 用户当前的语言是：(user's language)。我需要使用工具来帮助我回答问题。\nAction: 如果使用工具,则为工具名称(one of {tool_names})。\nAction Input: 输入给工具的内容，使用JSON格式表示kwargs（例如{{\"input\": \"hello world\", \"num_beams\": 5}}）\n   \n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n切勿用Markdown代码标记包围你的响应。如果需要，可以在响应中使用代码标记。\n\n请为Action Input使用有效的JSON格式。不要这样做{{\'input\': \'hello world\', \'num_beams\': 5}}。\n\n如果使用此格式，用户将以下面的格式进行回应：\n\n   \nObservation: 工具响应\n   \n\n你应该继续重复上述格式，直到你有足够的信息来回答问题而无需使用更多工具。此时，你必须使用以下两种格式之一进行回答：\n\n   \nThought: 我可以不用任何工具来回答。我将使用用户的语言来回答。\nAnswer: [你的答案（与用户问题相同的语言）]\n   \n\n   \nThought: 我无法使用提供的工具回答问题。\nAnswer: [你的答案（与用户问题相同的语言）]\n   \n\n##如果从工具中得到的回应是Empty Response，那么只需要回答“我不知道”，不需要额外回答别的内容。## 当前对话\n\n以下是当前对话，由人类和助手的消息交替组成。\n""")
    react_chat_formatter = ReActChatFormatter.from_defaults(prefix_messages)
    agentrunner = AgentRunner.from_llm(
        llm=Settings.llm,
        tools=tools,
        react_chat_formatter=react_chat_formatter,
        system_prompt=system_prompt,
        verbose=True,
    )
    return agentrunner
    # create the function calling worker for reasoning
    # worker = FunctionCallingAgentWorker.from_tools(
    #     tools, verbose=True
    # )
    #
    # # wrap the worker in the top-level planner
    # return StructuredPlannerAgent(worker, tools)
@@ -0,0 +1 @@
 STORAGE_DIR = "storage"  # directory to cache the generated index
@@ -0,0 +1,108 @@
 import os
 from llama_index.core import SummaryIndex, SQLDatabase, VectorStoreIndex
 from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
 from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex, SQLTableSchema
 from llama_index.core.query_engine import RetrieverQueryEngine
 from llama_index.core.response_synthesizers import ResponseMode
 from llama_index.readers.database import DatabaseReader
 from sqlalchemy import create_engine
 from app.engine.prompt import text_qa_template, refine_template, summary_template, simple_template
 from app.engine.retriever.HybridRetriever import HybridRetriever
 from app.settings import get_node_postprocessors
 def makeDescriptionByEngine(sql_database:SQLDatabase):
    reader = DatabaseReader(sql_database)
    table_names = sql_database.get_usable_table_names()
    table_schema_objs = []
    for table_name in table_names:
        columns = sql_database.get_table_columns(table_name)
        if len(columns) > 150:
            continue
        stats_txt = ""
        if table_name == 'gongchengshuxing':
            stats_txt = '该表中有以下属性:'
            documents = reader.load_data(query='select name from gongchengshuxing')
            for index in range(len(documents) if len(documents) < 30 else 30):
                if index == 0:
                    continue
                elif index > 1:
                    stats_txt += ','
                stats_txt += documents[index].text.split(':')[1]
        tbSchema = (SQLTableSchema(table_name=table_name, context_str=stats_txt))
        table_schema_objs.append(tbSchema)
    return table_schema_objs
 def get_Retriever(index,**kwargs):
    strEnableHybrid = os.getenv("HYBRID_ENABLED",'False')
    bEnableHybrid = True if strEnableHybrid is not None and strEnableHybrid.title() == 'True' else False
    if bEnableHybrid:
        alpha = float(os.getenv("HYBRID_ALPHA", "0.5"))
        retriever = HybridRetriever(index,alpha = alpha,**kwargs)
    else:
        retriever = index.as_retriever(**kwargs)
    return retriever
 sql_database = None
 sql_obj_index = None
 # Create a summary query engine
 def create_summary_query_engine(top_k=3, use_reranker=False, filters=None):
    global sql_obj_index
    global sql_database
    if sql_obj_index is None or sql_database is None:
        sqlengine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
        sql_database = SQLDatabase(sqlengine)
        table_schema_objs = makeDescriptionByEngine(sql_database)
        table_node_mapping = SQLTableNodeMapping(sql_database)
        sql_obj_index = ObjectIndex.from_objects(
            table_schema_objs,
            table_node_mapping,
            index_cls=VectorStoreIndex,
        )
    # 创建SQL查询工具
    sql_query_engine = SQLTableRetrieverQueryEngine(sql_database,
                                                    sql_obj_index.as_retriever(similarity_top_k=top_k),
                                                    verbose=True,
                                                    )
    return sql_query_engine
 # Create a summary query engine
 def create_summary_query_engine(index, top_k=3, use_reranker=False, filters=None):
    summary_index = SummaryIndex(index.vector_store.get_nodes(node_ids=None))
    summary_query_engine = summary_index.as_query_engine(
    response_mode=ResponseMode.TREE_SUMMARIZE,
    use_async=True,
    streaming=True,
    )
    return summary_query_engine
 # Create a query engine
 def create_query_engine(index, top_k=3, use_reranker=False, filters=None):
    # 创建向量检索查询工具
    postprocess = None
    if use_reranker:
        postprocess = get_node_postprocessors()
    query_engine = RetrieverQueryEngine.from_args(
        get_Retriever(index,
                      similarity_top_k=top_k,
                      filters=filters),
        text_qa_template=text_qa_template,
        refine_template=refine_template,
        summary_template = summary_template,
        simple_template = simple_template,
        node_postprocessors=postprocess,
        use_async=True,
        streaming=True,
    )
    return query_engine
@@ -0,0 +1,94 @@
 from dotenv import load_dotenv
 load_dotenv()
 import logging
 import os
 from app.engine.loaders import get_documents
 from app.engine.vectordb import get_vector_store
 from app.settings import init_settings
 from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
 from llama_index.core.ingestion import IngestionPipeline
 from llama_index.core.node_parser import SentenceSplitter
 from llama_index.core.settings import Settings
 from llama_index.core.storage import StorageContext
 from llama_index.core.storage.docstore import SimpleDocumentStore
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger()
 STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
 def get_doc_store():
    # If the storage directory is there, load the document store from it.
    # If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
    if os.path.exists(STORAGE_DIR):
        return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
    else:
        return SimpleDocumentStore()
 def run_pipeline(docstore, vector_store, documents):
    pipeline = IngestionPipeline(
        transformations=[
            SentenceSplitter(
                chunk_size=Settings.chunk_size,
                chunk_overlap=Settings.chunk_overlap,
            ),
            Settings.embed_model,
        ],
        docstore=docstore,
        docstore_strategy="upserts_and_delete",
        vector_store=vector_store,
    )
    # Run the ingestion pipeline and store the results
    nodes = pipeline.run(show_progress=True, documents=documents)
    return nodes
 def persist_storage(docstore, vector_store):
    storage_context = StorageContext.from_defaults(
        docstore=docstore,
        vector_store=vector_store,
    )
    storage_context.persist(STORAGE_DIR)
 def persist_BMRetriever(vector_store):
    STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
    top_k = int(os.getenv("TOP_K", "3"))
    bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes=vector_store.get_nodes([]))
    bmRetriver.persist(STORAGE_DIR)
 def generate_datasource():
    init_settings()
    logger.info("Generate index for the provided data")
    # Get the stores and documents or create new ones
    documents = get_documents()
    # Set private=false to mark the document as public (required for filtering)
    for doc in documents:
        doc.metadata["private"] = "false"
    docstore = get_doc_store()
    vector_store = get_vector_store()
    # Run the ingestion pipeline
    _ = run_pipeline(docstore, vector_store, documents)
    # Build the index and persist storage
    persist_storage(docstore, vector_store)
    persist_BMRetriever(vector_store)
    logger.info("Finished generating the index")
 if __name__ == "__main__":
    from phoenix.trace import using_project
    with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj:
        generate_datasource()
@@ -0,0 +1,93 @@
 from llama_index.core import PromptTemplate
 text_qa_template_str = (
    "# 角色\n"
    "你是一名博微造价工程数据查询助手，专精于电力工程文件中的信息。"
    "你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答，"
    "如同直接从文件中提取的内容。\n"
    "知识库中已经导入一个工程的全部数据，请你站在当前工程的角度回答用户关于工程文件的问题。\n"
    "例如：询问“此工程”指当前导入的工程。询问“此工程名称”指当前导入的工程的工程名称。\n"
    "## 技能\n"
    "### 技能 1: 数据查询与提供\n"
    "- 准确回答所有关于电力工程造价的相关问题。\n"
    "- 提供具体数据，如成本估算、材料清单、劳动力需求等。\n"
    "- 确保提供的信息严格基于工程文档中的记录。\n"
    "### 技能 2: 技术性解释\n"
    "- 解释造价工程中的技术术语和概念。\n"
    "- 为复杂的工程细节提供清晰易懂的说明。\n"
    "## 约束\n"
    "- 仅回答与电力工程造价文件相关的具体问题。\n"
    "- 不进行任何超出文件内容的猜测或假设。\n"
    "- 所有回答均基于文件内容，采用客观和技术性的语言。\n"
    "- 请基于这些信息回答问题。如果无法找到相关信息，请不要额外发散回答，不要回答多余的信息，只需要回答“我不知道这个问题的答案”。\n"
    "以下为上下文信息\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "请根据上下文信息而非先前知识回答我的问题或回复我的指令。前面的上下文信息可能有用，也可能没用，你需要从我给出的上下文信息中选出与我的问题最相关的那些，来为你的回答提供依据。回答一定要忠于原文，简洁但不丢信息，不要胡乱编造。如果无法找到相关信息，请不要额外发散回答，不要回答多余的信息，只需要回答“我不知道这个问题的答案”。我的问题或指令是什么语种，你就用什么语种回复。\n"
    "如果是表结构或者是数据库的相关内容，只用于推导问题，不需要告诉用户数据库或表结构等物理信息。\n"
    "问题：{query_str}\n"
    "你的回复： "
 )
 text_qa_template = PromptTemplate(text_qa_template_str)
 refine_template_str = (
    "这是原本的问题： {query_str}\n"
    "我们已经提供了回答: {existing_answer}\n"
    "现在我们有机会改进这个回答 "
    "使用以下更多上下文（仅当有助于改进回答时使用）\n"
    "如果新的上下文对回答没有影响，或者原来的回答已经正确，不要在上次回答的后边再加上多余的补充信息，直接返回原本的回答。\n"
    "如果新的上下文对回答没有影响，或者原来的回答已经正确，不要在上次回答的后边再加上多余的补充信息，直接返回原本的回答。\n"
    "------------\n"
    "{context_msg}\n"
    "------------\n"
    "如果回答中已经包含有正确答案，不要返回多余的解释等信息，只返回正确答案\n"
    "如果是表结构或者是数据库的相关内容，仅用于推导问题，不需要告诉用户数据库或表结构等物理信息。\n"
    "改进的回答: "
 )
 refine_template = PromptTemplate(refine_template_str)
 summary_template_str = (
    "# 角色\n"
    "你是一名博微造价工程数据查询助手，专精于电力工程文件中的信息。"
    "你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答，"
    "如同直接从文件中提取的内容。\n"
    "## 技能\n"
    "### 技能 1: 数据查询与提供\n"
    "- 准确回答所有关于电力工程造价的相关问题。\n"
    "- 提供具体数据，如成本估算、材料清单、劳动力需求等。\n"
    "- 确保提供的信息严格基于工程文档中的记录。\n"
    "### 技能 2: 技术性解释\n"
    "- 解释造价工程中的技术术语和概念。\n"
    "- 为复杂的工程细节提供清晰易懂的说明。\n"
    "## 约束\n"
    "- 仅回答与电力工程造价文件相关的具体问题。\n"
    "- 不进行任何超出文件内容的猜测或假设。\n"
    "- 所有回答均基于文件内容，采用客观和技术性的语言。\n"
    "- 请基于这些信息回答问题。如果无法找到相关信息，请不要额外发散回答，不要回答多余的信息，只需要回答“我不知道这个问题的答案”。\n"
    "来自多个来源的上下文信息如下。\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "鉴于来自多个来源的信息而非先验知识， "
    "回答查询。\n"
    "如果是表结构或者是数据库的相关内容，只用于推导问题，不需要告诉用户数据库或表结构等物理信息。\n"
    "Query: {query_str}\n"
    "Answer: "
 )
 summary_template = PromptTemplate(summary_template_str)
 simple_template_str = (
    "{query_str}"
 )
 simple_template = PromptTemplate(simple_template_str)
@@ -0,0 +1,71 @@
 import os
 from llama_index.vector_stores.chroma import ChromaVectorStore
 from llama_index.vector_stores.qdrant import QdrantVectorStore
 from qdrant_client import qdrant_client
 qclient = None
 def get_qdrant_vector_store():
    collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
    vector_store_path = os.getenv("VECTOR_STORE_PATH")
    host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"),
    port=int(os.getenv("VECTOR_STORE_PORT", "6333")),
    if not vector_store_path or not host:
        raise ValueError(
            "Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
        )
    # if VECTOR_STORE_PATH is set, use a local QdrantVectorStore from the path
    # otherwise, use a remote QdrantVectorStore
    global qclient
    if qclient == None:
        if vector_store_path:
            qclient = qdrant_client.QdrantClient(
                path=vector_store_path,
            )
        else:
            qclient = qdrant_client.QdrantClient(
                host=host,
                port=port,
            )
    vector_store = QdrantVectorStore(client=qclient, collection_name=collection_name)
    return vector_store
 def get_chroma_vector_store():
    collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
    vector_store_path = os.getenv("VECTOR_STORE_PATH")
    # if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path
    # otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
    if vector_store_path:
        store = ChromaVectorStore.from_params(
            persist_dir=vector_store_path, collection_name=collection_name,
            collection_kwargs={"metadata":{"hnsw:space":"cosine"}},
        )
    else:
        if not os.getenv("VECTOR_STORE_HOST") or not os.getenv("VECTOR_STORE_PORT"):
            raise ValueError(
                "Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
            )
        store = ChromaVectorStore.from_params(
            host=os.getenv("VECTOR_STORE_HOST"),
            port=int(os.getenv("VECTOR_STORE_PORT")),
            collection_name=collection_name,
            collection_kwargs={"metadata":{"hnsw:space":"cosine"}},
        )
    return store
 def get_vector_store():
    store_type=os.getenv("VECTOR_STORE_TYPE")
    store = None
    match store_type:
        case "chroma":
            store = get_chroma_vector_store()
        case "qdrant":
            store = get_qdrant_vector_store()
        case _:
            raise ValueError(f"Invalid vector store type: {store_type}")
    return store
@@ -1,79 +1,57 @@
 import os
-from llama_index.core import SQLDatabase, SummaryIndex, VectorStoreIndex
+from llama_index.core.agent import AgentRunner, ReActChatFormatter
 from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
 from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
 from llama_index.core.settings import Settings
 from llama_index.core.agent import AgentRunner, StructuredPlannerAgent, FunctionCallingAgentWorker
 from llama_index.core.tools.query_engine import QueryEngineTool
 from sqlalchemy import create_engine, Engine
-from app.engine.loaders.db import makeDescriptionByEngine
+from app.engine.engine import create_query_engine, create_summary_query_engine
 from app.engine.tools import ToolFactory
 from app.engine.index import get_index
 #from app.engine.loaders.db import makeDescriptionByEngine
 from app.engine.tools import ToolFactory
 sql_database = None
 sql_obj_index = None
 def get_chat_engine(filters=None, params=None):
    system_prompt = os.getenv("SYSTEM_PROMPT")
    top_k = int(os.getenv("TOP_K", "3"))
    use_reranker = os.getenv("RERANK_ENABLED")
    tools = []
    global sql_obj_index
    global sql_database
    if sql_obj_index is None:
        sqlengine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
        sql_database = SQLDatabase(sqlengine)
        table_schema_objs = makeDescriptionByEngine(sql_database)
        table_node_mapping = SQLTableNodeMapping(sql_database)
        sql_obj_index = ObjectIndex.from_objects(
            table_schema_objs,
            table_node_mapping,
            index_cls=VectorStoreIndex,
        )
    # 创建SQL查询工具
-    sql_query_engine = SQLTableRetrieverQueryEngine(sql_database,
+#    sql_query_engine = create_summary_query_engine(index)
-                                                    sql_obj_index.as_retriever(similarity_top_k=top_k),
+    # sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine,
-                                                    verbose=True,)
+    #                                                name="zjdata_query_tool",
-    sql_query_tool = QueryEngineTool.from_defaults(query_engine=sql_query_engine,
+    #                                                description="来源于一个由博微公司电力造价软件编制的造价工程文件。该文件以多张表格的形式存储存储了整个工程的全部数据内容。适用于以详细的自然语言查询表格数据方式查询造价工程各项具体属性、费用的数值。请先使用“zj_query_tool”无法解决才使用本工具"
-                                                   name="zjdata_query_tool",
+    #                                                )
-                                                   description="来源于一个由博微公司电力造价软件编制的造价工程文件。该文件以多张表格的形式存储存储了整个工程的全部数据内容。适用于以详细的自然语言查询表格数据方式查询造价工程各项具体属性、费用的数值。请先使用“zj_query_tool”无法解决才使用本工具")
+    #tools.append(sql_query_tool)
    # Add query tool if index exists
-    indexs = get_index()
+    index = get_index()
    if len(indexs) > 0:
        index = list(indexs.values())[0]
    if index is not None:
-        summary_index = SummaryIndex(index.vector_store.get_nodes(node_ids=None))
+        summary_query_engine = create_summary_query_engine(index,top_k,use_reranker,filters)
        summary_query_engine = summary_index.as_query_engine()
        summary_query_tool = QueryEngineTool.from_defaults( query_engine=summary_query_engine, name="summary_query_tool",
                                                            description="适用于任何需要进行全面总结、概括的要求。",
                                                            #description="适用于任何需要对所有内容进行全面总结的请求。有关电力造价领域更具体部分的问题，请使用zj_query_engine_tool",
                                                            )
-
+        query_engine = create_query_engine(index,top_k,use_reranker,filters)
        # 创建向量检索查询工具
        query_engine = index.as_query_engine(
            similarity_top_k=top_k, filters=filters
        )
        query_engine_tool = QueryEngineTool.from_defaults(query_engine=query_engine, name="zj_query_tool",
                                                          description="由博微公司编制的关于电力造价知识、电力造价编制软件知识和造价工程文件结构的知识库。适用于查询电力领域、电力造价领域、博微、博微电力、博微造价等业务等内容。如果本知识库没有直接答案但有解决思路的可以返回解决办法后建议使用“zjdata_query_tool”工具。",
                                                          )
        tools.append(summary_query_tool)
        tools.append(query_engine_tool)
        #tools.append(sql_query_tool)
    # Add additional tools
    tools += ToolFactory.from_env()
-    return AgentRunner.from_llm(
+    prefix_messages = ("""您的设计旨在帮助完成各种任务，从回答问题到提供其他类型分析的摘要。\n\n##工具\n\n你可以访问各种工具。你有责任按照你认为合适的顺序使用这些工具来完成当前的任务。\n这可能需要将任务分解为子任务，并使用不同的工具来完成每个子任务。\n\n你可以访问以下工具：\n{tool_desc}\n\n\n##输出格式\n\n请用与问题相同的语言回答，并使用以下格式：\n\n   \nThought: 用户当前的语言是：(user's language)。我需要使用工具来帮助我回答问题。\nAction: 如果使用工具,则为工具名称(one of {tool_names})。\nAction Input: 输入给工具的内容，使用JSON格式表示kwargs（例如{{\"input\": \"hello world\", \"num_beams\": 5}}）\n   \n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n请始终以Thought开始。\n\n切勿用Markdown代码标记包围你的响应。如果需要，可以在响应中使用代码标记。\n\n请为Action Input使用有效的JSON格式。不要这样做{{\'input\': \'hello world\', \'num_beams\': 5}}。\n\n如果使用此格式，用户将以下面的格式进行回应：\n\n   \nObservation: 工具响应\n   \n\n你应该继续重复上述格式，直到你有足够的信息来回答问题而无需使用更多工具。此时，你必须使用以下两种格式之一进行回答：\n\n   \nThought: 我可以不用任何工具来回答。我将使用用户的语言来回答。\nAnswer: [你的答案（与用户问题相同的语言）]\n   \n\n   \nThought: 我无法使用提供的工具回答问题。\nAnswer: [你的答案（与用户问题相同的语言）]\n   \n\n##如果从工具中得到的回应是Empty Response，那么只需要回答“我不知道”，不需要额外回答别的内容。## 当前对话\n\n以下是当前对话，由人类和助手的消息交替组成。\n""")
    react_chat_formatter = ReActChatFormatter.from_defaults(prefix_messages)
    agentrunner = AgentRunner.from_llm(
        llm=Settings.llm,
        tools=tools,
        react_chat_formatter=react_chat_formatter,
        system_prompt=system_prompt,
        verbose=True,
    )
    return agentrunner
    # create the function calling worker for reasoning
    # worker = FunctionCallingAgentWorker.from_tools(
    #     tools, verbose=True
@@ -0,0 +1,108 @@
 import os
 from llama_index.core import SummaryIndex, SQLDatabase, VectorStoreIndex
 from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
 from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex, SQLTableSchema
 from llama_index.core.query_engine import RetrieverQueryEngine
 from llama_index.core.response_synthesizers import ResponseMode
 from llama_index.readers.database import DatabaseReader
 from sqlalchemy import create_engine
 from app.engine.prompt import text_qa_template, refine_template, summary_template, simple_template
 from app.engine.retriever.HybridRetriever import HybridRetriever
 from app.settings import get_node_postprocessors
 def makeDescriptionByEngine(sql_database:SQLDatabase):
    reader = DatabaseReader(sql_database)
    table_names = sql_database.get_usable_table_names()
    table_schema_objs = []
    for table_name in table_names:
        columns = sql_database.get_table_columns(table_name)
        if len(columns) > 150:
            continue
        stats_txt = ""
        if table_name == 'gongchengshuxing':
            stats_txt = '该表中有以下属性:'
            documents = reader.load_data(query='select name from gongchengshuxing')
            for index in range(len(documents) if len(documents) < 30 else 30):
                if index == 0:
                    continue
                elif index > 1:
                    stats_txt += ','
                stats_txt += documents[index].text.split(':')[1]
        tbSchema = (SQLTableSchema(table_name=table_name, context_str=stats_txt))
        table_schema_objs.append(tbSchema)
    return table_schema_objs
 def get_Retriever(index,**kwargs):
    strEnableHybrid = os.getenv("HYBRID_ENABLED",'False')
    bEnableHybrid = True if strEnableHybrid is not None and strEnableHybrid.title() == 'True' else False
    if bEnableHybrid:
        alpha = float(os.getenv("HYBRID_ALPHA", "0.5"))
        retriever = HybridRetriever(index,alpha = alpha,**kwargs)
    else:
        retriever = index.as_retriever(**kwargs)
    return retriever
 sql_database = None
 sql_obj_index = None
 # Create a summary query engine
 def create_summary_query_engine(top_k=3, use_reranker=False, filters=None):
    global sql_obj_index
    global sql_database
    if sql_obj_index is None or sql_database is None:
        sqlengine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
        sql_database = SQLDatabase(sqlengine)
        table_schema_objs = makeDescriptionByEngine(sql_database)
        table_node_mapping = SQLTableNodeMapping(sql_database)
        sql_obj_index = ObjectIndex.from_objects(
            table_schema_objs,
            table_node_mapping,
            index_cls=VectorStoreIndex,
        )
    # 创建SQL查询工具
    sql_query_engine = SQLTableRetrieverQueryEngine(sql_database,
                                                    sql_obj_index.as_retriever(similarity_top_k=top_k),
                                                    verbose=True,
                                                    )
    return sql_query_engine
 # Create a summary query engine
 def create_summary_query_engine(index, top_k=3, use_reranker=False, filters=None):
    summary_index = SummaryIndex(index.vector_store.get_nodes(node_ids=None))
    summary_query_engine = summary_index.as_query_engine(
    response_mode=ResponseMode.TREE_SUMMARIZE,
    use_async=True,
    streaming=True,
    )
    return summary_query_engine
 # Create a query engine
 def create_query_engine(index, top_k=3, use_reranker=False, filters=None):
    # 创建向量检索查询工具
    postprocess = None
    if use_reranker:
        postprocess = get_node_postprocessors()
    query_engine = RetrieverQueryEngine.from_args(
        get_Retriever(index,
                      similarity_top_k=top_k,
                      filters=filters),
        text_qa_template=text_qa_template,
        refine_template=refine_template,
        summary_template = summary_template,
        simple_template = simple_template,
        node_postprocessors=postprocess,
        use_async=True,
        streaming=True,
    )
    return query_engine
@@ -5,9 +5,10 @@ load_dotenv()
 import logging
 import os
-from app.engine.loaders import get_document_Types, get_documents
+from app.engine.loaders import get_documents
 from app.engine.vectordb import get_vector_store
 from app.settings import init_settings
 from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
 from llama_index.core.ingestion import IngestionPipeline
 from llama_index.core.node_parser import SentenceSplitter
 from llama_index.core.settings import Settings
@@ -19,16 +20,17 @@ logger = logging.getLogger()
 STORAGE_DIR = os.getenv("STORAGE_DIR", "storage")
-def get_doc_store(docType:str):
+
 def get_doc_store():
    # If the storage directory is there, load the document store from it.
    # If not, set up an in-memory document store since we can't load from a directory that doesn't exist.
-    storeDir = os.path.join(STORAGE_DIR,docType)
+    if os.path.exists(STORAGE_DIR):
-    if os.path.exists(storeDir):
+        return SimpleDocumentStore.from_persist_dir(STORAGE_DIR)
        return SimpleDocumentStore.from_persist_dir(storeDir)
    else:
        return SimpleDocumentStore()
 def run_pipeline(docstore, vector_store, documents):
    pipeline = IngestionPipeline(
        transformations=[
@@ -48,6 +50,7 @@ def run_pipeline(docstore, vector_store, documents):
    return nodes
 def persist_storage(docstore, vector_store):
    storage_context = StorageContext.from_defaults(
        docstore=docstore,
@@ -55,28 +58,36 @@ def persist_storage(docstore, vector_store):
    )
    storage_context.persist(STORAGE_DIR)
 def persist_BMRetriever(vector_store):
    STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
    top_k = int(os.getenv("TOP_K", "3"))
    bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=top_k,nodes=vector_store.get_nodes([]))
    bmRetriver.persist(STORAGE_DIR)
 def generate_datasource():
    init_settings()
    logger.info("Generate index for the provided data")
    # Get the stores and documents or create new ones
-    docTypes = get_document_Types()
+    documents = get_documents()
-    for docType in docTypes:
+    # Set private=false to mark the document as public (required for filtering)
-        documents = get_documents(docType)
+    for doc in documents:
-        # Set private=false to mark the document as public (required for filtering)
+        doc.metadata["private"] = "false"
-        for doc in documents:
+    docstore = get_doc_store()
-            doc.metadata["private"] = "false"
+    vector_store = get_vector_store()
        docstore = get_doc_store(docType)
        vector_store = get_vector_store(docType)
-        # Run the ingestion pipeline
+    # Run the ingestion pipeline
-        _ = run_pipeline(docstore, vector_store, documents)
+    _ = run_pipeline(docstore, vector_store, documents)
-        # Build the index and persist storage
+    # Build the index and persist storage
-        persist_storage(docstore, vector_store)
+    persist_storage(docstore, vector_store)
    persist_BMRetriever(vector_store)
    logger.info("Finished generating the index")
 if __name__ == "__main__":
    from phoenix.trace import using_project
    with using_project(os.getenv("PHOENIX_PROJECT_NAME") + "_generate") as obj:
@@ -1,23 +1,22 @@
 import logging
 from llama_index.core.indices import VectorStoreIndex
 from app.engine.vectordb import get_vector_store
-from app.engine.generate import get_document_Types
+
 logger = logging.getLogger("uvicorn")
-indexs = {}
+index = None
 def get_index(params=None):
-    global indexs
+    global index
-    if len(index) <= 0:
+    if index is None:
        logger.info("Connecting vector store...")
-        docTypes = get_document_Types()
+
-        for docType in docTypes:
+        store = get_vector_store()
-            store = get_vector_store(docType)
+        # Load the index from the vector store
-            # Load the index from the vector store
+        # If you are using a vector store that doesn't store text,
-            # If you are using a vector store that doesn't store text,
+        # you must load the index from both the vector store and the document store
-            # you must load the index from both the vector store and the document store
+        index = VectorStoreIndex.from_vector_store(store)
-            index = VectorStoreIndex.from_vector_store(store)
+        logger.info("Finished load index from vector store.")
-            logger.info("Finished load index from vector store.")
+
-            indexs[docType] = index
+    return index
    return indexs
@@ -0,0 +1,40 @@
 import logging
 import yaml
 from app.engine.loaders.db import DBLoaderConfig, get_db_documents
 from app.engine.loaders.file import FileLoaderConfig, get_file_documents
 from app.engine.loaders.web import WebLoaderConfig, get_web_documents
 logger = logging.getLogger(__name__)
 def load_configs():
    with open("config/loaders.yaml") as f:
        configs = yaml.safe_load(f)
    return configs
 def get_documents():
    documents = []
    config = load_configs()
    if config is None or len(config.items()) == 0:
        return  documents
    for loader_type, loader_config in config.items():
        logger.info(
            f"Loading documents from loader: {loader_type}, config: {loader_config}"
        )
        loader_config = loader_config or []
        match loader_type:
            case "file":
                document = get_file_documents(FileLoaderConfig(**loader_config))
            case "web":
                document = get_web_documents(WebLoaderConfig(**loader_config))
            case "db":
                document = get_db_documents(configs=[DBLoaderConfig(**cfg) for cfg in loader_config])
            case _:
                raise ValueError(f"Invalid loader type: {loader_type}")
        documents.extend(document)
    return documents
@@ -0,0 +1,140 @@
 import logging
 from typing import Any, List, Optional
 from llama_index.core import SQLDatabase, Document
 from llama_index.readers.database import DatabaseReader
 from pydantic import BaseModel
 from sqlalchemy import create_engine, text
 from sqlalchemy.engine import Engine
 logger = logging.getLogger(__name__)
 class CustomDatabaseReader(DatabaseReader):
    """Simple Database reader.
    Concatenates each row into Document used by LlamaIndex.
    Args:
        sql_database (Optional[SQLDatabase]): SQL database to use,
            including table names to specify.
            See :ref:`Ref-Struct-Store` for more details.
        OR
        engine (Optional[Engine]): SQLAlchemy Engine object of the database connection.
        OR
        uri (Optional[str]): uri of the database connection.
        OR
        scheme (Optional[str]): scheme of the database connection.
        host (Optional[str]): host of the database connection.
        port (Optional[int]): port of the database connection.
        user (Optional[str]): user of the database connection.
        password (Optional[str]): password of the database connection.
        dbname (Optional[str]): dbname of the database connection.
    Returns:
        DatabaseReader: A DatabaseReader object.
    """
    def __init__(
            self,
            sql_database: Optional[SQLDatabase] = None,
            engine: Optional[Engine] = None,
            uri: Optional[str] = None,
            scheme: Optional[str] = None,
            host: Optional[str] = None,
            port: Optional[str] = None,
            user: Optional[str] = None,
            password: Optional[str] = None,
            dbname: Optional[str] = None,
            *args: Any,
            **kwargs: Any,
    ) -> None:
        """Initialize with parameters."""
        if sql_database:
            self.sql_database = sql_database
        elif engine:
            self.sql_database = SQLDatabase(engine, *args, **kwargs)
        elif uri:
            self.uri = uri
            self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs)
        elif scheme and host and port and user and password and dbname:
            uri = f"{scheme}://{user}:{password}@{host}:{port}/{dbname}"
            self.uri = uri
            self.sql_database = SQLDatabase.from_uri(uri, *args, **kwargs)
        else:
            raise ValueError(
                "You must provide either a SQLDatabase, "
                "a SQL Alchemy Engine, a valid connection URI, or a valid "
                "set of credentials."
            )
    def load_data(self, query: str, explanation: str) -> List[Document]:
        """Query and load data from the Database, returning a list of Documents.
        Args:
            query (str): Query parameter to filter tables and rows.
            explanation (str): Explanation for the query to be included in the document.
        Returns:
            List[Document]: A list of Document objects.
        """
        dco_str = explanation + "\n"  
        with self.sql_database.engine.connect() as connection:
            if query is None:
                raise ValueError("A query parameter is necessary to filter the data")
            else:
                result = connection.execute(text(query))
            dco_str += ", ".join(
                [f"{entry}" for entry in result.keys()]
            ) + "\n"
            for item in result.fetchall():
                # Fetch each item
                record_str = ", ".join(
                    [f"{entry}" for col, entry in zip(result.keys(), item)]
                )
                dco_str += record_str + "\n"
        doc = Document(text=dco_str)
        doc.metadata["name"] = query
        doc.metadata["context"] = query
        doc.metadata["file_type"] = "application/vnd.ms-excel"
        return [doc]
 class DBLoaderConfig(BaseModel):
    uri: str
    queries: List[dict]  
 def get_db_documents(configs: list[DBLoaderConfig]):
    docs = []
    if len(configs) == 0 or configs[0].uri == "":
        logger.warning(
            f"Failed to load database, error message: uri is empty. Return as empty document list."
        )
        return docs
    metadata = {
        'file_type': 'application/booway.document.zj',
    }
    for entry in configs:
        engine = create_engine(entry.uri)
        sql_database = SQLDatabase(engine)
        loader = CustomDatabaseReader(sql_database)
        for query_dict in entry.queries:
            query = query_dict.get("sql", "")
            explanation = query_dict.get("explanation", "")
            logger.info(f"Loading data from database with query: {query}")
            documents = loader.load_data(query=query, explanation=explanation)
            docs.extend(documents)
    return docs
@@ -0,0 +1,88 @@
 import os
 import logging
 from typing import Dict
 from llama_index.core.readers.base import BaseReader
 from llama_index.core.readers.json import JSONReader
 from llama_parse import LlamaParse
 from pydantic import BaseModel, validator
 logger = logging.getLogger(__name__)
 class FileLoaderConfig(BaseModel):
    data_dir: str = "data"
    use_llama_parse: bool = False
    @validator("data_dir")
    def data_dir_must_exist(cls, v):
        if not os.path.isdir(v):
            raise ValueError(f"Directory '{v}' does not exist")
        return v
 def llama_parse_parser():
    if os.getenv("LLAMA_CLOUD_API_KEY") is None:
        raise ValueError(
            "LLAMA_CLOUD_API_KEY environment variable is not set. "
            "Please set it in .env file or in your shell environment then run again!"
        )
    parser = LlamaParse(
        result_type="markdown",
        verbose=True,
        language="en",
        ignore_errors=False,
    )
    return parser
 def llama_parse_extractor() -> Dict[str, LlamaParse]:
    from llama_parse.utils import SUPPORTED_FILE_TYPES
    parser = llama_parse_parser()
    return {file_type: parser for file_type in SUPPORTED_FILE_TYPES}
 def llama_local_extractor() -> Dict[str, BaseReader]:
    return {".json" : JSONReader(clean_json=False,levels_back=0)}
 def get_file_documents(config: FileLoaderConfig):
    from llama_index.core.readers import SimpleDirectoryReader
    try:
        file_extractor = None
        if config.use_llama_parse:
            # LlamaParse is async first,
            # so we need to use nest_asyncio to run it in sync mode
            import nest_asyncio
            nest_asyncio.apply()
            file_extractor = llama_parse_extractor()
        else:
            file_extractor = llama_local_extractor()
        reader = SimpleDirectoryReader(
            config.data_dir,
            recursive=True,
            filename_as_id=True,
            raise_on_error=True,
            file_extractor=file_extractor,
        )
        return reader.load_data()
    except Exception as e:
        import sys
        import traceback
        # Catch the error if the data dir is empty
        # and return as empty document list
        _, _, exc_traceback = sys.exc_info()
        function_name = traceback.extract_tb(exc_traceback)[-1].name
        if function_name == "_add_files":
            logger.warning(
                f"Failed to load file documents, error message: {e} . Return as empty document list."
            )
            return []
        else:
            # Raise the error if it is not the case of empty data dir
            raise e
@@ -0,0 +1,37 @@
 import os
 import json
 from pydantic import BaseModel, Field
 class CrawlUrl(BaseModel):
    base_url: str
    prefix: str
    max_depth: int = Field(default=1, ge=0)
 class WebLoaderConfig(BaseModel):
    driver_arguments: list[str] = Field(default=None)
    urls: list[CrawlUrl] = []
 def get_web_documents(config: WebLoaderConfig):
    from llama_index.readers.web import WholeSiteReader
    from selenium import webdriver
    from selenium.webdriver.chrome.options import Options
    options = Options()
    driver_arguments = config.driver_arguments or []
    for arg in driver_arguments:
        options.add_argument(arg)
    docs = []
    urls = config.urls or []
    for url in config.urls:
        scraper = WholeSiteReader(
            prefix=url.prefix,
            max_depth=url.max_depth,
            driver=webdriver.Chrome(options=options),
        )
        docs.extend(scraper.load_data(url.base_url))
    return docs
@@ -13,48 +13,8 @@ def load_configs():
        configs = yaml.safe_load(f)
    return configs
 def path_difference(path1:str, path2:str):
    import os
    path1 = os.path.abspath(path1)
    path2 = os.path.abspath(path2)
-    path1_parts = path1.split(os.path.sep)
+def get_documents():
    path2_parts = path2.split(os.path.sep)
    for i, part in enumerate(path1_parts):
        if part != path2_parts[i]:
            break
    else:
        i += 1
    pathKey = ''
    for j in range(i,len(path2_parts)):
        pathKey+=path2_parts[j] + '_'
    return pathKey[0:-1]
 def get_document_Types():
    import os
    rootPath = 'data'
    configs = load_configs()
    if configs is not None and len(configs.items()) > 0:
        for loader_type, loader_config in configs.items():
            if loader_type == "file":
                rootPath = FileLoaderConfig(**loader_config).data_dir
                break
    types = []
    dirStack = [rootPath]
    while len(dirStack) > 0:
        curDir = dirStack.pop()
        dirs = [os.path.join(curDir, d) for d in os.listdir(curDir) if os.path.isdir(os.path.join(curDir, d))]
        if len(dirs) > 0:
            for dir in dirs:
                dirStack.append(dir)
        else:
            types.append(path_difference(rootPath,curDir))
    return types
 def get_documents(docType:str):
    documents = []
    config = load_configs()
    if config is None or len(config.items()) == 0:
@@ -68,7 +28,7 @@ def get_documents(docType:str):
        loader_config = loader_config or []
        match loader_type:
            case "file":
-                document = get_file_documents(FileLoaderConfig(**loader_config),docType)
+                document = get_file_documents(FileLoaderConfig(**loader_config))
            case "web":
                document = get_web_documents(WebLoaderConfig(**loader_config))
            case "db":
@@ -1,20 +1,14 @@
 import os
 import logging
 from typing import List
 from typing import Any, List, Optional
 from llama_index.core.readers.base import BaseReader
 from llama_index.core.schema import Document
 from llama_index.core.utilities.sql_wrapper import SQLDatabase
 from sqlalchemy import text
 from sqlalchemy.engine import Engine
 from llama_index.core import SQLDatabase, Document
-from llama_index.core.objects import SQLTableSchema, SQLTableNodeMapping
+from llama_index.core.objects import SQLTableSchema
 from llama_index.core.readers.base import BaseReader
 from llama_index.readers.database import DatabaseReader
-from pydantic import BaseModel, validator
+from pydantic import BaseModel
 from llama_index.core.indices.vector_store import VectorStoreIndex
 from sqlalchemy import create_engine
 from sqlalchemy import text
 from sqlalchemy.engine import Engine
 logger = logging.getLogger(__name__)
@@ -119,32 +113,6 @@ class DBLoaderConfig(BaseModel):
    uri: str
    queries: List[str]
 def makeDescriptionByEngine(sql_database:SQLDatabase):
    reader = DatabaseReader(sql_database)
    table_names = sql_database.get_usable_table_names()
    table_schema_objs = []
    for table_name in table_names:
        columns = sql_database.get_table_columns(table_name)
        if len(columns) > 150:
            continue
        stats_txt = ""
        if table_name == 'gongchengshuxing':
            stats_txt = '该表中有以下属性:'
            documents = reader.load_data(query='select name from gongchengshuxing')
            for index in range(len(documents) if len(documents) < 30 else 30):
                if index == 0:
                    continue
                elif index > 1:
                    stats_txt += ','
                stats_txt += documents[index].text.split(':')[1]
        tbSchema = (SQLTableSchema(table_name=table_name, context_str=stats_txt))
        table_schema_objs.append(tbSchema)
    return table_schema_objs
 def get_db_documents(configs: list[DBLoaderConfig]):
    docs = []
@@ -168,14 +136,14 @@ def get_db_documents(configs: list[DBLoaderConfig]):
        engine = create_engine(entry.uri)
        sql_database = SQLDatabase(engine)
-        table_schema_objs = makeDescriptionByEngine(sql_database)
+        # table_schema_objs = makeDescriptionByEngine(sql_database)
-        table_node_mapping = SQLTableNodeMapping(sql_database)
+        # table_node_mapping = SQLTableNodeMapping(sql_database)
-
+        #
-        nodes = table_node_mapping.to_nodes(table_schema_objs)
+        # nodes = table_node_mapping.to_nodes(table_schema_objs)
-        for node in nodes:
+        # for node in nodes:
-            node.metadata.update(metadata)
+        #     node.metadata.update(metadata)
-
+        #
-        docs.extend(nodes)
+        # docs.extend(nodes)
        queries = entry.queries or []
        loader = CustomDatabaseReader(sql_database)
@@ -20,6 +20,7 @@ class FileLoaderConfig(BaseModel):
            raise ValueError(f"Directory '{v}' does not exist")
        return v
 def llama_parse_parser():
    if os.getenv("LLAMA_CLOUD_API_KEY") is None:
        raise ValueError(
@@ -34,6 +35,7 @@ def llama_parse_parser():
    )
    return parser
 def llama_parse_extractor() -> Dict[str, LlamaParse]:
    from llama_parse.utils import SUPPORTED_FILE_TYPES
@@ -41,9 +43,10 @@ def llama_parse_extractor() -> Dict[str, LlamaParse]:
    return {file_type: parser for file_type in SUPPORTED_FILE_TYPES}
 def llama_local_extractor() -> Dict[str, BaseReader]:
-    return {"json" : JSONReader}
+    return {".json" : JSONReader(clean_json=False,levels_back=0)}
-def get_file_documents(config: FileLoaderConfig, childPath: str):
+
 def get_file_documents(config: FileLoaderConfig):
    from llama_index.core.readers import SimpleDirectoryReader
    try:
@@ -60,7 +63,7 @@ def get_file_documents(config: FileLoaderConfig, childPath: str):
            file_extractor = llama_local_extractor()
        reader = SimpleDirectoryReader(
-            os.path.join(config.data_dir,childPath.replace('_','\\')),
+            config.data_dir,
            recursive=True,
            filename_as_id=True,
            raise_on_error=True,
@@ -0,0 +1,89 @@
 from llama_index.core import PromptTemplate
 text_qa_template_str = (
    "# 角色\n"
    "你是一名博微造价工程数据查询助手，专精于电力工程文件中的信息。"
    "你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答，"
    "如同直接从文件中提取的内容。\n"
    "## 技能\n"
    "### 技能 1: 数据查询与提供\n"
    "- 准确回答所有关于电力工程造价的相关问题。\n"
    "- 提供具体数据，如成本估算、材料清单、劳动力需求等。\n"
    "- 确保提供的信息严格基于工程文档中的记录。\n"
    "### 技能 2: 技术性解释\n"
    "- 解释造价工程中的技术术语和概念。\n"
    "- 为复杂的工程细节提供清晰易懂的说明。\n"
    "## 约束\n"
    "- 仅回答与电力工程造价文件相关的具体问题。\n"
    "- 不进行任何超出文件内容的猜测或假设。\n"
    "- 所有回答均基于文件内容，采用客观和技术性的语言。\n"
    "- 请基于这些信息回答问题。如果无法找到相关信息，请不要额外发散回答，不要回答多余的信息，只需要回答“我不知道这个问题的答案”。\n"
    "以下为上下文信息\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "请根据上下文信息而非先前知识回答我的问题或回复我的指令。前面的上下文信息可能有用，也可能没用，你需要从我给出的上下文信息中选出与我的问题最相关的那些，来为你的回答提供依据。回答一定要忠于原文，简洁但不丢信息，不要胡乱编造。如果无法找到相关信息，请不要额外发散回答，不要回答多余的信息，只需要回答“我不知道这个问题的答案”。我的问题或指令是什么语种，你就用什么语种回复。\n"
    "如果是表结构或者是数据库的相关内容，只用于推导问题，不需要告诉用户数据库或表结构等物理信息。\n"
    "问题：{query_str}\n"
    "你的回复： "
 )
 text_qa_template = PromptTemplate(text_qa_template_str)
 refine_template_str = (
    "这是原本的问题： {query_str}\n"
    "我们已经提供了回答: {existing_answer}\n"
    "现在我们有机会改进这个回答 "
    "使用以下更多上下文（仅当需要用时）\n"
    "------------\n"
    "{context_msg}\n"
    "------------\n"
    "根据新的上下文, 请改进原来的回答。"
    "如果新的上下文没有用, 直接返回原本的回答。\n"
    "如果是表结构或者是数据库的相关内容，只用于推导问题，不需要告诉用户数据库或表结构等物理信息。\n"
    "改进的回答: "
 )
 refine_template = PromptTemplate(refine_template_str)
 summary_template_str = (
    "# 角色\n"
    "你是一名博微造价工程数据查询助手，专精于电力工程文件中的信息。"
    "你的职责是提供有关电力造价、造价编制软件、文件结构及相关数据的精准、客观的回答，"
    "如同直接从文件中提取的内容。\n"
    "## 技能\n"
    "### 技能 1: 数据查询与提供\n"
    "- 准确回答所有关于电力工程造价的相关问题。\n"
    "- 提供具体数据，如成本估算、材料清单、劳动力需求等。\n"
    "- 确保提供的信息严格基于工程文档中的记录。\n"
    "### 技能 2: 技术性解释\n"
    "- 解释造价工程中的技术术语和概念。\n"
    "- 为复杂的工程细节提供清晰易懂的说明。\n"
    "## 约束\n"
    "- 仅回答与电力工程造价文件相关的具体问题。\n"
    "- 不进行任何超出文件内容的猜测或假设。\n"
    "- 所有回答均基于文件内容，采用客观和技术性的语言。\n"
    "- 请基于这些信息回答问题。如果无法找到相关信息，请不要额外发散回答，不要回答多余的信息，只需要回答“我不知道这个问题的答案”。\n"
    "来自多个来源的上下文信息如下。\n"
    "---------------------\n"
    "{context_str}\n"
    "---------------------\n"
    "鉴于来自多个来源的信息而非先验知识， "
    "回答查询。\n"
    "如果是表结构或者是数据库的相关内容，只用于推导问题，不需要告诉用户数据库或表结构等物理信息。\n"
    "Query: {query_str}\n"
    "Answer: "
 )
 summary_template = PromptTemplate(summary_template_str)
 simple_template_str = (
    "{query_str}"
 )
 simple_template = PromptTemplate(simple_template_str)
@@ -0,0 +1,133 @@
 import json
 import logging
 import os
 from typing import Any, Callable, Dict, List, Optional, cast
 from llama_index.core.base.base_retriever import BaseRetriever
 from llama_index.core.callbacks.base import CallbackManager
 from llama_index.core.constants import DEFAULT_SIMILARITY_TOP_K
 from llama_index.core.indices.vector_store.base import VectorStoreIndex
 from llama_index.core.schema import BaseNode, IndexNode, NodeWithScore, QueryBundle
 from llama_index.core.storage.docstore.types import BaseDocumentStore
 from llama_index.core.vector_stores.utils import (
    node_to_metadata_dict,
    metadata_dict_to_node,
 )
 import bm25s
 from app.engine.retriever.CHTokener import chTokenize
 CHDEFAULT_PERSIST_ARGS = {"similarity_top_k": "similarity_top_k", "_verbose": "verbose"}
 CHDEFAULT_PERSIST_FILENAME = "retriever.json"
 class CHBM25Retriever(BaseRetriever):
    def __init__(
        self,
        nodes: Optional[List[BaseNode]] = None,
        existing_bm25: Optional[bm25s.BM25] = None,
        similarity_top_k: int = DEFAULT_SIMILARITY_TOP_K,
        callback_manager: Optional[CallbackManager] = None,
        objects: Optional[List[IndexNode]] = None,
        object_map: Optional[dict] = None,
        verbose: bool = False,
    ) -> None:
        self.similarity_top_k = similarity_top_k
        if existing_bm25 is not None:
            self.bm25 = existing_bm25
            self.corpus = existing_bm25.corpus
        else:
            from nltk.corpus import stopwords
            if nodes is None:
                raise ValueError("Please pass nodes or an existing BM25 object.")
            self.corpus = [node_to_metadata_dict(node) for node in nodes]
            corpus_tokens = chTokenize(
                [node.get_content() for node in nodes],
                show_progress=verbose,
            )
            self.bm25 = bm25s.BM25()
            self.bm25.index(corpus_tokens, show_progress=verbose)
        super().__init__(
            callback_manager=callback_manager,
            object_map=object_map,
            objects=objects,
            verbose=verbose,
        )
    @classmethod
    def from_defaults(
        cls,
        index: Optional[VectorStoreIndex] = None,
        nodes: Optional[List[BaseNode]] = None,
        docstore: Optional[BaseDocumentStore] = None,
        similarity_top_k: int = DEFAULT_SIMILARITY_TOP_K,
        verbose: bool = False,
    ) -> "CHBM25Retriever":
        if sum(bool(val) for val in [index, nodes, docstore]) != 1:
            raise ValueError("Please pass exactly one of index, nodes, or docstore.")
        if index is not None:
            docstore = index.docstore
        if docstore is not None:
            nodes = cast(List[BaseNode], list(docstore.docs.values()))
        assert (
            nodes is not None
        ), "Please pass exactly one of index, nodes, or docstore."
        return cls(
            nodes=nodes,
            similarity_top_k=similarity_top_k,
            verbose=verbose,
        )
    def get_persist_args(self) -> Dict[str, Any]:
        """Get Persist Args Dict to Save."""
        return {
            CHDEFAULT_PERSIST_ARGS[key]: getattr(self, key)
            for key in CHDEFAULT_PERSIST_ARGS
            if hasattr(self, key)
        }
    def persist(self, path: str, **kwargs: Any) -> None:
        """Persist the retriever to a directory."""
        self.bm25.save(path, corpus=self.corpus, **kwargs)
        with open(os.path.join(path, CHDEFAULT_PERSIST_FILENAME), "w") as f:
            json.dump(self.get_persist_args(), f, indent=2)
    @classmethod
    def from_persist_dir(cls, path: str, **kwargs: Any) -> "CHBM25Retriever":
        """Load the retriever from a directory."""
        bm25 = bm25s.BM25.load(path, load_corpus=True, **kwargs)
        with open(os.path.join(path, CHDEFAULT_PERSIST_FILENAME)) as f:
            retriever_data = json.load(f)
        return cls(existing_bm25=bm25, **retriever_data)
    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        query = query_bundle.query_str
        tokenized_query = chTokenize(
            query,show_progress=self._verbose
        )
        indexes, scores = self.bm25.retrieve(
            tokenized_query, k=self.similarity_top_k, show_progress=self._verbose
        )
        # batched, but only one query
        indexes = indexes[0]
        scores = scores[0]
        nodes: List[NodeWithScore] = []
        for idx, score in zip(indexes, scores):
            # idx can be an int or a dict of the node
            if isinstance(idx, dict):
                node = metadata_dict_to_node(idx)
            else:
                node_dict = self.corpus[int(idx)]
                node = metadata_dict_to_node(node_dict)
            nodes.append(NodeWithScore(node=node, score=float(score)))
        return nodes
@@ -0,0 +1,46 @@
 from typing import Any, Dict, List, Union, Callable, NamedTuple
 from bm25s.tokenization import *
 try:
    from tqdm.auto import tqdm
 except ImportError:
    def tqdm(iterable, *args, **kwargs):
        return iterable
 def chinese_tokenizer(text: str) -> List[str]:
    import jieba
    from nltk.corpus import stopwords
    tokens = jieba.lcut(text)
    return [token for token in tokens if token not in stopwords.words('chinese')]
 def chTokenize(
    texts,
    show_progress: bool = True,
    leave: bool = False,
 ) -> Union[List[List[str]], Tokenized]:
    if isinstance(texts, str):
        texts = [texts]
    corpus_ids = []
    token_to_index = {}
    for text in tqdm(
        texts, desc="Split strings", leave=leave, disable=not show_progress
    ):
        splitted = chinese_tokenizer(text)
        doc_ids = []
        for token in splitted:
            if token not in token_to_index:
                token_to_index[token] = len(token_to_index)
            token_id = token_to_index[token]
            doc_ids.append(token_id)
        corpus_ids.append(doc_ids)
    return Tokenized(ids=corpus_ids, vocab=token_to_index)
@@ -0,0 +1,67 @@
 import os
 from typing import Optional, Any, Dict, List
 from llama_index.core.base.base_retriever import BaseRetriever
 from llama_index.core.schema import NodeWithScore, QueryBundle
 from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
 class HybridRetriever(BaseRetriever):
    def __init__(
            self,
            vector_index,
            similarity_top_k: int = 2,
            out_top_k: Optional[int] = None,
            alpha: float = 0.5,
            filters = None,
            **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self._vector_index = vector_index
        self._embed_model = vector_index._embed_model
        self._out_top_k = out_top_k or similarity_top_k
        self._vecRetriever = vector_index.as_retriever(
            similarity_top_k=similarity_top_k,filters = filters
        )
        STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
        if os.path.exists(STORAGE_DIR) and len(os.listdir(STORAGE_DIR)) > 0:
            self._bm25Retriever = CHBM25Retriever.from_persist_dir(STORAGE_DIR)
        else:
            bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=similarity_top_k,nodes=self._vector_index.vector_store.get_nodes(None))
            bmRetriver.persist(STORAGE_DIR)
        self._alpha = alpha
    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        vecNodes:List[NodeWithScore] = self._vecRetriever.retrieve(query_bundle.query_str)
        bmNodes:List[NodeWithScore] = self._bm25Retriever.retrieve(query_bundle.query_str)
        bmDic:Dict[str,NodeWithScore] = {}
        for node in bmNodes:
            bmDic[node.node_id] = node
        result_tups = []
        for i in range(len(vecNodes)):
            node = vecNodes[i]
            bmScore = 0.0
            if node.node_id in bmDic:
                bmScore = bmDic[node.node_id].score
                bmDic.pop(node.node_id)
            else:
                bmScore = 0.0
            full_similarity = (self._alpha * node.score) + (
                    (1 - self._alpha) * bmScore
            )
            result_tups.append((full_similarity, node))
        for _,node in bmDic.items():
            full_similarity = (1 - self._alpha) * node.score
            result_tups.append((full_similarity, node))
        result_tups = sorted(result_tups, key=lambda x: x[0], reverse=True)
        for full_score, node in result_tups:
            node.score = full_score
        return [n for _, n in result_tups][:self._out_top_k]
@@ -0,0 +1,133 @@
 import json
 import logging
 import os
 from typing import Any, Callable, Dict, List, Optional, cast
 from llama_index.core.base.base_retriever import BaseRetriever
 from llama_index.core.callbacks.base import CallbackManager
 from llama_index.core.constants import DEFAULT_SIMILARITY_TOP_K
 from llama_index.core.indices.vector_store.base import VectorStoreIndex
 from llama_index.core.schema import BaseNode, IndexNode, NodeWithScore, QueryBundle
 from llama_index.core.storage.docstore.types import BaseDocumentStore
 from llama_index.core.vector_stores.utils import (
    node_to_metadata_dict,
    metadata_dict_to_node,
 )
 import bm25s
 from app.engine.retriever.CHTokener import chTokenize
 CHDEFAULT_PERSIST_ARGS = {"similarity_top_k": "similarity_top_k", "_verbose": "verbose"}
 CHDEFAULT_PERSIST_FILENAME = "retriever.json"
 class CHBM25Retriever(BaseRetriever):
    def __init__(
        self,
        nodes: Optional[List[BaseNode]] = None,
        existing_bm25: Optional[bm25s.BM25] = None,
        similarity_top_k: int = DEFAULT_SIMILARITY_TOP_K,
        callback_manager: Optional[CallbackManager] = None,
        objects: Optional[List[IndexNode]] = None,
        object_map: Optional[dict] = None,
        verbose: bool = False,
    ) -> None:
        self.similarity_top_k = similarity_top_k
        if existing_bm25 is not None:
            self.bm25 = existing_bm25
            self.corpus = existing_bm25.corpus
        else:
            from nltk.corpus import stopwords
            if nodes is None:
                raise ValueError("Please pass nodes or an existing BM25 object.")
            self.corpus = [node_to_metadata_dict(node) for node in nodes]
            corpus_tokens = chTokenize(
                [node.get_content() for node in nodes],
                show_progress=verbose,
            )
            self.bm25 = bm25s.BM25()
            self.bm25.index(corpus_tokens, show_progress=verbose)
        super().__init__(
            callback_manager=callback_manager,
            object_map=object_map,
            objects=objects,
            verbose=verbose,
        )
    @classmethod
    def from_defaults(
        cls,
        index: Optional[VectorStoreIndex] = None,
        nodes: Optional[List[BaseNode]] = None,
        docstore: Optional[BaseDocumentStore] = None,
        similarity_top_k: int = DEFAULT_SIMILARITY_TOP_K,
        verbose: bool = False,
    ) -> "CHBM25Retriever":
        if sum(bool(val) for val in [index, nodes, docstore]) != 1:
            raise ValueError("Please pass exactly one of index, nodes, or docstore.")
        if index is not None:
            docstore = index.docstore
        if docstore is not None:
            nodes = cast(List[BaseNode], list(docstore.docs.values()))
        assert (
            nodes is not None
        ), "Please pass exactly one of index, nodes, or docstore."
        return cls(
            nodes=nodes,
            similarity_top_k=similarity_top_k,
            verbose=verbose,
        )
    def get_persist_args(self) -> Dict[str, Any]:
        """Get Persist Args Dict to Save."""
        return {
            CHDEFAULT_PERSIST_ARGS[key]: getattr(self, key)
            for key in CHDEFAULT_PERSIST_ARGS
            if hasattr(self, key)
        }
    def persist(self, path: str, **kwargs: Any) -> None:
        """Persist the retriever to a directory."""
        self.bm25.save(path, corpus=self.corpus, **kwargs)
        with open(os.path.join(path, CHDEFAULT_PERSIST_FILENAME), "w") as f:
            json.dump(self.get_persist_args(), f, indent=2)
    @classmethod
    def from_persist_dir(cls, path: str, **kwargs: Any) -> "CHBM25Retriever":
        """Load the retriever from a directory."""
        bm25 = bm25s.BM25.load(path, load_corpus=True, **kwargs)
        with open(os.path.join(path, CHDEFAULT_PERSIST_FILENAME)) as f:
            retriever_data = json.load(f)
        return cls(existing_bm25=bm25, **retriever_data)
    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        query = query_bundle.query_str
        tokenized_query = chTokenize(
            query,show_progress=self._verbose
        )
        indexes, scores = self.bm25.retrieve(
            tokenized_query, k=self.similarity_top_k, show_progress=self._verbose
        )
        # batched, but only one query
        indexes = indexes[0]
        scores = scores[0]
        nodes: List[NodeWithScore] = []
        for idx, score in zip(indexes, scores):
            # idx can be an int or a dict of the node
            if isinstance(idx, dict):
                node = metadata_dict_to_node(idx)
            else:
                node_dict = self.corpus[int(idx)]
                node = metadata_dict_to_node(node_dict)
            nodes.append(NodeWithScore(node=node, score=float(score)))
        return nodes
@@ -0,0 +1,46 @@
 from typing import Any, Dict, List, Union, Callable, NamedTuple
 from bm25s.tokenization import *
 try:
    from tqdm.auto import tqdm
 except ImportError:
    def tqdm(iterable, *args, **kwargs):
        return iterable
 def chinese_tokenizer(text: str) -> List[str]:
    import jieba
    from nltk.corpus import stopwords
    tokens = jieba.lcut(text)
    return [token for token in tokens if token not in stopwords.words('chinese')]
 def chTokenize(
    texts,
    show_progress: bool = True,
    leave: bool = False,
 ) -> Union[List[List[str]], Tokenized]:
    if isinstance(texts, str):
        texts = [texts]
    corpus_ids = []
    token_to_index = {}
    for text in tqdm(
        texts, desc="Split strings", leave=leave, disable=not show_progress
    ):
        splitted = chinese_tokenizer(text)
        doc_ids = []
        for token in splitted:
            if token not in token_to_index:
                token_to_index[token] = len(token_to_index)
            token_id = token_to_index[token]
            doc_ids.append(token_id)
        corpus_ids.append(doc_ids)
    return Tokenized(ids=corpus_ids, vocab=token_to_index)
@@ -0,0 +1,67 @@
 import os
 from typing import Optional, Any, Dict, List
 from llama_index.core.base.base_retriever import BaseRetriever
 from llama_index.core.schema import NodeWithScore, QueryBundle
 from app.engine.retriever.CHBM25Retriever import CHBM25Retriever
 class HybridRetriever(BaseRetriever):
    def __init__(
            self,
            vector_index,
            similarity_top_k: int = 2,
            out_top_k: Optional[int] = None,
            alpha: float = 0.5,
            filters = None,
            **kwargs: Any,
    ) -> None:
        super().__init__(**kwargs)
        self._vector_index = vector_index
        self._embed_model = vector_index._embed_model
        self._out_top_k = out_top_k or similarity_top_k
        self._vecRetriever = vector_index.as_retriever(
            similarity_top_k=similarity_top_k,filters = filters
        )
        STORAGE_DIR = os.getenv("BM_RETRIEVER_PATH", "storage_bm")
        if os.path.exists(STORAGE_DIR) and len(os.listdir(STORAGE_DIR)) > 0:
            self._bm25Retriever = CHBM25Retriever.from_persist_dir(STORAGE_DIR)
        else:
            bmRetriver = CHBM25Retriever.from_defaults(similarity_top_k=similarity_top_k,nodes=self._vector_index.vector_store.get_nodes(None))
            bmRetriver.persist(STORAGE_DIR)
        self._alpha = alpha
    def _retrieve(self, query_bundle: QueryBundle) -> List[NodeWithScore]:
        vecNodes:List[NodeWithScore] = self._vecRetriever.retrieve(query_bundle.query_str)
        bmNodes:List[NodeWithScore] = self._bm25Retriever.retrieve(query_bundle.query_str)
        bmDic:Dict[str,NodeWithScore] = {}
        for node in bmNodes:
            bmDic[node.node_id] = node
        result_tups = []
        for i in range(len(vecNodes)):
            node = vecNodes[i]
            bmScore = 0.0
            if node.node_id in bmDic:
                bmScore = bmDic[node.node_id].score
                bmDic.pop(node.node_id)
            else:
                bmScore = 0.0
            full_similarity = (self._alpha * node.score) + (
                    (1 - self._alpha) * bmScore
            )
            result_tups.append((full_similarity, node))
        for _,node in bmDic.items():
            full_similarity = (1 - self._alpha) * node.score
            result_tups.append((full_similarity, node))
        result_tups = sorted(result_tups, key=lambda x: x[0], reverse=True)
        for full_score, node in result_tups:
            node.score = full_score
        return [n for _, n in result_tups][:self._out_top_k]
@@ -0,0 +1,36 @@
 from llama_index.core.tools.function_tool import FunctionTool
 def duckduckgo_search(
    query: str,
    region: str = "wt-wt",
    max_results: int = 10,
 ):
    """
    Use this function to search for any query in DuckDuckGo.
    Args:
        query (str): The query to search in DuckDuckGo.
        region Optional(str): The region to be used for the search in [country-language] convention, ex us-en, uk-en, ru-ru, etc...
        max_results Optional(int): The maximum number of results to be returned. Default is 10.
    """
    try:
        from duckduckgo_search import DDGS
    except ImportError:
        raise ImportError(
            "duckduckgo_search package is required to use this function."
            "Please install it by running: `poetry add duckduckgo_search` or `pip install duckduckgo_search`"
        )
    params = {
        "keywords": query,
        "region": region,
        "max_results": max_results,
    }
    results = []
    with DDGS() as ddg:
        results = list(ddg.text(**params))
    return results
 def get_tools(**kwargs):
    return [FunctionTool.from_defaults(duckduckgo_search)]
@@ -0,0 +1,60 @@
 import os
 import yaml
 import json
 import importlib
 from cachetools import cached, LRUCache
 from llama_index.core.tools.tool_spec.base import BaseToolSpec
 from llama_index.core.tools.function_tool import FunctionTool
 class ToolType:
    LLAMAHUB = "llamahub"
    LOCAL = "local"
 class ToolFactory:
    TOOL_SOURCE_PACKAGE_MAP = {
        ToolType.LLAMAHUB: "llama_index.tools",
        ToolType.LOCAL: "app.engine.tools",
    }
    def load_tools(tool_type: str, tool_name: str, config: dict) -> list[FunctionTool]:
        source_package = ToolFactory.TOOL_SOURCE_PACKAGE_MAP[tool_type]
        try:
            if "ToolSpec" in tool_name:
                tool_package, tool_cls_name = tool_name.split(".")
                module_name = f"{source_package}.{tool_package}"
                module = importlib.import_module(module_name)
                tool_class = getattr(module, tool_cls_name)
                tool_spec: BaseToolSpec = tool_class(**config)
                return tool_spec.to_tool_list()
            else:
                module = importlib.import_module(f"{source_package}.{tool_name}")
                tools = module.get_tools(**config)
                if not all(isinstance(tool, FunctionTool) for tool in tools):
                    raise ValueError(
                        f"The module {module} does not contain valid tools"
                    )
                return tools
        except ImportError as e:
            raise ValueError(f"Failed to import tool {tool_name}: {e}")
        except AttributeError as e:
            raise ValueError(f"Failed to load tool {tool_name}: {e}")
    @staticmethod
    def from_env() -> list[FunctionTool]:
        tools = []
        if os.path.exists("config/tools.yaml"):
            with open("config/tools.yaml", "r") as f:
                tool_configs = yaml.safe_load(f)
                if tool_configs != None and len(tool_configs.items()) != 0:
                    for tool_type, config_entries in tool_configs.items():
                        if config_entries == None or len(config_entries.items()) == 0:
                            continue
                        for tool_name, config in config_entries.items():
                            tools.extend(
                                ToolFactory.load_tools(tool_type, tool_name, config)
                            )
        return tools
@@ -0,0 +1,108 @@
 import os
 import uuid
 import logging
 import requests
 from typing import Optional
 from pydantic import BaseModel, Field
 from llama_index.core.tools import FunctionTool
 logger = logging.getLogger(__name__)
 class ImageGeneratorToolOutput(BaseModel):
    is_success: bool = Field(
        ...,
        description="Whether the image generation was successful.",
    )
    image_url: Optional[str] = Field(
        None,
        description="The URL of the generated image.",
    )
    error_message: Optional[str] = Field(
        None,
        description="The error message if the image generation failed.",
    )
 class ImageGeneratorTool:
    _IMG_OUTPUT_FORMAT = "webp"
    _IMG_OUTPUT_DIR = "output/tool"
    _IMG_GEN_API = "https://api.stability.ai/v2beta/stable-image/generate/core"
    def __init__(self, api_key: str = None):
        if not api_key:
            api_key = os.getenv("STABILITY_API_KEY")
        self._api_key = api_key
        self.fileserver_url_prefix = os.getenv("FILESERVER_URL_PREFIX")
        if self._api_key is None:
            raise ValueError(
                "STABILITY_API_KEY key is required to run image generator. Get it here: https://platform.stability.ai/account/keys"
            )
        if self.fileserver_url_prefix is None:
            raise ValueError("FILESERVER_URL_PREFIX is required.")
    def _prepare_output_dir(self):
        """
        Create the output directory if it doesn't exist
        """
        if not os.path.exists(self._IMG_OUTPUT_DIR):
            os.makedirs(self._IMG_OUTPUT_DIR, exist_ok=True)
    def _save_image(self, image_data: bytes):
        self._prepare_output_dir()
        filename = f"{uuid.uuid4()}.{self._IMG_OUTPUT_FORMAT}"
        output_path = os.path.join(self._IMG_OUTPUT_DIR, filename)
        with open(output_path, "wb") as f:
            f.write(image_data)
        url = f"{os.getenv('FILESERVER_URL_PREFIX')}/{self._IMG_OUTPUT_DIR}/{filename}"
        logger.info(f"Saved image to {output_path}.\nURL: {url}")
        return url
    def _call_stability_api(self, prompt: str):
        headers = {
            "authorization": f"Bearer {self._api_key}",
            "accept": "image/*",
        }
        data = {
            "prompt": prompt,
            "output_format": self._IMG_OUTPUT_FORMAT,
        }
        response = requests.post(
            self._IMG_GEN_API,
            headers=headers,
            files={"none": ""},
            data=data,
        )
        response.raise_for_status()
        return response
    def generate_image(self, prompt: str) -> ImageGeneratorToolOutput:
        """
        Use this tool to generate an image based on the prompt.
        Args:
            prompt (str): The prompt to generate the image from.
        """
        try:
            # Call the Stability API
            response = self._call_stability_api(prompt)
            # Save the image and get the URL
            image_url = self._save_image(response.content)
            return ImageGeneratorToolOutput(
                is_success=True,
                image_url=image_url,
            )
        except Exception as e:
            logger.exception(e, exc_info=True)
            return ImageGeneratorToolOutput(
                is_success=False,
                error_message=str(e),
            )
 def get_tools(**kwargs):
    return [FunctionTool.from_defaults(ImageGeneratorTool(**kwargs).generate_image)]
@@ -0,0 +1,143 @@
 import os
 import logging
 import base64
 import uuid
 from pydantic import BaseModel
 from typing import List, Tuple, Dict, Optional
 from llama_index.core.tools import FunctionTool
 from e2b_code_interpreter import CodeInterpreter
 from e2b_code_interpreter.models import Logs
 logger = logging.getLogger(__name__)
 class InterpreterExtraResult(BaseModel):
    type: str
    content: Optional[str] = None
    filename: Optional[str] = None
    url: Optional[str] = None
 class E2BToolOutput(BaseModel):
    is_error: bool
    logs: Logs
    results: List[InterpreterExtraResult] = []
 class E2BCodeInterpreter:
    output_dir = "output/tool"
    def __init__(self, api_key: str = None):
        if api_key is None:
            api_key = os.getenv("E2B_API_KEY")
        filesever_url_prefix = os.getenv("FILESERVER_URL_PREFIX")
        if not api_key:
            raise ValueError(
                "E2B_API_KEY key is required to run code interpreter. Get it here: https://e2b.dev/docs/getting-started/api-key"
            )
        if not filesever_url_prefix:
            raise ValueError(
                "FILESERVER_URL_PREFIX is required to display file output from sandbox"
            )
        self.filesever_url_prefix = filesever_url_prefix
        self.interpreter = CodeInterpreter(api_key=api_key)
    def __del__(self):
        self.interpreter.close()
    def get_output_path(self, filename: str) -> str:
        # if output directory doesn't exist, create it
        if not os.path.exists(self.output_dir):
            os.makedirs(self.output_dir, exist_ok=True)
        return os.path.join(self.output_dir, filename)
    def save_to_disk(self, base64_data: str, ext: str) -> Dict:
        filename = f"{uuid.uuid4()}.{ext}"  # generate a unique filename
        buffer = base64.b64decode(base64_data)
        output_path = self.get_output_path(filename)
        try:
            with open(output_path, "wb") as file:
                file.write(buffer)
        except IOError as e:
            logger.error(f"Failed to write to file {output_path}: {str(e)}")
            raise e
        logger.info(f"Saved file to {output_path}")
        return {
            "outputPath": output_path,
            "filename": filename,
        }
    def get_file_url(self, filename: str) -> str:
        return f"{self.filesever_url_prefix}/{self.output_dir}/{filename}"
    def parse_result(self, result) -> List[InterpreterExtraResult]:
        """
        The result could include multiple formats (e.g. png, svg, etc.) but encoded in base64
        We save each result to disk and return saved file metadata (extension, filename, url)
        """
        if not result:
            return []
        output = []
        try:
            formats = result.formats()
            results = [result[format] for format in formats]
            for ext, data in zip(formats, results):
                match ext:
                    case "png" | "svg" | "jpeg" | "pdf":
                        result = self.save_to_disk(data, ext)
                        filename = result["filename"]
                        output.append(
                            InterpreterExtraResult(
                                type=ext,
                                filename=filename,
                                url=self.get_file_url(filename),
                            )
                        )
                    case _:
                        output.append(
                            InterpreterExtraResult(
                                type=ext,
                                content=data,
                            )
                        )
        except Exception as error:
            logger.exception(error, exc_info=True)
            logger.error("Error when parsing output from E2b interpreter tool", error)
        return output
    def interpret(self, code: str) -> E2BToolOutput:
        """
        Execute python code in a Jupyter notebook cell, the toll will return result, stdout, stderr, display_data, and error.
        Parameters:
            code (str): The python code to be executed in a single cell.
        """
        logger.info(
            f"\n{'='*50}\n> Running following AI-generated code:\n{code}\n{'='*50}"
        )
        exec = self.interpreter.notebook.exec_cell(code)
        if exec.error:
            logger.error("Error when executing code", exec.error)
            output = E2BToolOutput(is_error=True, logs=exec.logs, results=[])
        else:
            if len(exec.results) == 0:
                output = E2BToolOutput(is_error=False, logs=exec.logs, results=[])
            else:
                results = self.parse_result(exec.results[0])
                output = E2BToolOutput(is_error=False, logs=exec.logs, results=results)
        return output
 def get_tools(**kwargs):
    return [FunctionTool.from_defaults(E2BCodeInterpreter(**kwargs).interpret)]
@@ -0,0 +1,78 @@
 from typing import Dict, List, Tuple
 from llama_index.tools.openapi import OpenAPIToolSpec
 from llama_index.tools.requests import RequestsToolSpec
 class OpenAPIActionToolSpec(OpenAPIToolSpec, RequestsToolSpec):
    """
    A combination of OpenAPI and Requests tool specs that can parse OpenAPI specs and make requests.
    openapi_uri: str: The file path or URL to the OpenAPI spec.
    domain_headers: dict: Whitelist domains and the headers to use.
    """
    spec_functions = OpenAPIToolSpec.spec_functions + RequestsToolSpec.spec_functions
    # Cached parsed specs by URI
    _specs: Dict[str, Tuple[Dict, List[str]]] = {}
    def __init__(self, openapi_uri: str, domain_headers: dict = None, **kwargs):
        if domain_headers is None:
            domain_headers = {}
        if openapi_uri not in self._specs:
            openapi_spec, servers = self._load_openapi_spec(openapi_uri)
            self._specs[openapi_uri] = (openapi_spec, servers)
        else:
            openapi_spec, servers = self._specs[openapi_uri]
        # Add the servers to the domain headers if they are not already present
        for server in servers:
            if server not in domain_headers:
                domain_headers[server] = {}
        OpenAPIToolSpec.__init__(self, spec=openapi_spec)
        RequestsToolSpec.__init__(self, domain_headers)
    @staticmethod
    def _load_openapi_spec(uri: str) -> Tuple[Dict, List[str]]:
        """
        Load an OpenAPI spec from a URI.
        Args:
            uri (str): A file path or URL to the OpenAPI spec.
        Returns:
            List[Document]: A list of Document objects.
        """
        import yaml
        from urllib.parse import urlparse
        if uri.startswith("http"):
            import requests
            response = requests.get(uri)
            if response.status_code != 200:
                raise ValueError(
                    "Could not initialize OpenAPIActionToolSpec: "
                    f"Failed to load OpenAPI spec from {uri}, status code: {response.status_code}"
                )
            spec = yaml.safe_load(response.text)
        elif uri.startswith("file"):
            filepath = urlparse(uri).path
            with open(filepath, "r") as file:
                spec = yaml.safe_load(file)
        else:
            raise ValueError(
                "Could not initialize OpenAPIActionToolSpec: Invalid OpenAPI URI provided. "
                "Only HTTP and file path are supported."
            )
        # Add the servers to the whitelist
        try:
            servers = [
                urlparse(server["url"]).netloc for server in spec.get("servers", [])
            ]
        except KeyError as e:
            raise ValueError(
                "Could not initialize OpenAPIActionToolSpec: Invalid OpenAPI spec provided. "
                "Could not get `servers` from the spec."
            ) from e
        return spec, servers
@@ -0,0 +1,73 @@
 """Open Meteo weather map tool spec."""
 import logging
 import requests
 import pytz
 from llama_index.core.tools import FunctionTool
 logger = logging.getLogger(__name__)
 class OpenMeteoWeather:
    geo_api = "https://geocoding-api.open-meteo.com/v1"
    weather_api = "https://api.open-meteo.com/v1"
    @classmethod
    def _get_geo_location(cls, location: str) -> dict:
        """Get geo location from location name."""
        params = {"name": location, "count": 10, "language": "en", "format": "json"}
        response = requests.get(f"{cls.geo_api}/search", params=params)
        if response.status_code != 200:
            raise Exception(f"Failed to fetch geo location: {response.status_code}")
        else:
            data = response.json()
            result = data["results"][0]
            geo_location = {
                "id": result["id"],
                "name": result["name"],
                "latitude": result["latitude"],
                "longitude": result["longitude"],
            }
            return geo_location
    @classmethod
    def get_weather_information(cls, location: str) -> dict:
        """Use this function to get the weather of any given location.
        Note that the weather code should follow WMO Weather interpretation codes (WW):
        0: Clear sky
        1, 2, 3: Mainly clear, partly cloudy, and overcast
        45, 48: Fog and depositing rime fog
        51, 53, 55: Drizzle: Light, moderate, and dense intensity
        56, 57: Freezing Drizzle: Light and dense intensity
        61, 63, 65: Rain: Slight, moderate and heavy intensity
        66, 67: Freezing Rain: Light and heavy intensity
        71, 73, 75: Snow fall: Slight, moderate, and heavy intensity
        77: Snow grains
        80, 81, 82: Rain showers: Slight, moderate, and violent
        85, 86: Snow showers slight and heavy
        95: Thunderstorm: Slight or moderate
        96, 99: Thunderstorm with slight and heavy hail
        """
        logger.info(
            f"Calling open-meteo api to get weather information of location: {location}"
        )
        geo_location = cls._get_geo_location(location)
        timezone = pytz.timezone("UTC").zone
        params = {
            "latitude": geo_location["latitude"],
            "longitude": geo_location["longitude"],
            "current": "temperature_2m,weather_code",
            "hourly": "temperature_2m,weather_code",
            "daily": "weather_code",
            "timezone": timezone,
        }
        response = requests.get(f"{cls.weather_api}/forecast", params=params)
        if response.status_code != 200:
            raise Exception(
                f"Failed to fetch weather information: {response.status_code}"
            )
        return response.json()
 def get_tools(**kwargs):
    return [FunctionTool.from_defaults(OpenMeteoWeather.get_weather_information)]
@@ -5,14 +5,12 @@ from qdrant_client import qdrant_client
 qclient = None
-def get_qdrant_vector_store(docType:str):
+def get_qdrant_vector_store():
-    collection_name = docType
+    collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
    #collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
    vector_store_path = os.getenv("VECTOR_STORE_PATH")
    host=os.getenv("VECTOR_STORE_HOST", "127.0.0.1"),
    port=int(os.getenv("VECTOR_STORE_PORT", "6333")),
    vector_store_path =os.path.join(vector_store_path,docType)
    if not vector_store_path or not host:
        raise ValueError(
            "Please provide either VECTOR_STORE_PATH or VECTOR_STORE_HOST and VECTOR_STORE_PORT"
@@ -34,11 +32,9 @@ def get_qdrant_vector_store(docType:str):
    vector_store = QdrantVectorStore(client=qclient, collection_name=collection_name)
    return vector_store
-def get_chroma_vector_store(docType:str):
+def get_chroma_vector_store():
-    #collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
+    collection_name = os.getenv("VECTOR_STORE_COLLECTION", "default")
    collection_name = docType
    vector_store_path = os.getenv("VECTOR_STORE_PATH")
    vector_store_path =os.path.join(vector_store_path,docType)
    # if VECTOR_STORE_PATH is set, use a local ChromaVectorStore from the path
    # otherwise, use a remote ChromaVectorStore (ChromaDB Cloud is not supported yet)
    if vector_store_path:
@@ -59,16 +55,16 @@ def get_chroma_vector_store(docType:str):
        )
    return store
-def get_vector_store(docType:str):
+def get_vector_store():
    store_type=os.getenv("VECTOR_STORE_TYPE")
    store = None
    match store_type:
        case "chroma":
-            store = get_chroma_vector_store(docType)
+            store = get_chroma_vector_store()
        case "qdrant":
-            store = get_qdrant_vector_store(docType)
+            store = get_qdrant_vector_store()
        case _:
            raise ValueError(f"Invalid vector store type: {store_type}")
@@ -3,7 +3,25 @@ from typing import Dict
 from llama_index.core.constants import DEFAULT_TEMPERATURE
 from llama_index.core.settings import Settings
 from llama_index.llms.xinference import Xinference
 from llama_index.llms.xinference.base import DEFAULT_XINFERENCE_TEMP
 from app.xinference.base import XinferenceEmbedding, XinferenceRerank
 def get_node_postprocessors():
    rerank_enabled = os.getenv("RERANK_ENABLED").title()
    if rerank_enabled is None or rerank_enabled == 'False':
        return []
    rerank_model = os.getenv("RERANK_MODEL")
    rerank_url = os.getenv("RERANK_BASE_URL")
    rerank_top_n = os.getenv("RERANK_TOP_N")
    rerank_threshold = os.getenv("RERANK_THRESHOLD")
    postprocess = None
    if rerank_model is not None:
        postprocess = [XinferenceRerank(rerank_model, rerank_url, top_n=rerank_top_n, threshold=rerank_threshold)]
    return postprocess
 def init_settings():
    model_provider = os.getenv("MODEL_PROVIDER")
@@ -26,8 +44,9 @@ def init_settings():
            init_azure_openai()
        case "t-systems":
            from .llmhub import init_llmhub
            init_llmhub()
        case "xinference":
            init_xinference()
        case _:
            raise ValueError(f"Invalid model provider: {model_provider}")
@@ -52,6 +71,21 @@ def init_ollama():
    # )
    pass
 def init_xinference():
    base_url = os.getenv("BASE_URL")
    model = os.getenv("MODEL")
    max_tokens = int(os.getenv("LLM_MAX_TOKENS")) if os.getenv("LLM_MAX_TOKENS") is not None else None
    temperature = float(os.getenv("LLM_TEMPERATURE", DEFAULT_XINFERENCE_TEMP))
    Settings.llm = Xinference(model, base_url, temperature, max_tokens)
    embedding_base_url = os.getenv("EMBEDDING_BASE_URL")
    embedding_base_url = embedding_base_url if embedding_base_url != None and embedding_base_url != "" else base_url
    embed_model_name = os.getenv("EMBEDDING_MODEL")
    dimensions = os.getenv("EMBEDDING_DIM")
    dimensions = int(dimensions) if dimensions is not None else None
    Settings.embed_model = XinferenceEmbedding(embed_model_name, embedding_base_url, dimensions=dimensions)
 def init_openai():
    from llama_index.core.constants import DEFAULT_TEMPERATURE
@@ -0,0 +1,272 @@
 """Xinference embeddings file."""
 import logging
 from enum import Enum
 from http import HTTPStatus
 from typing import Any, Dict, List, Optional, Union, Tuple
 from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding, dispatcher
 from llama_index.core.bridge.pydantic import PrivateAttr
 from llama_index.core.callbacks import CBEventType, EventPayload
 from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding
 from llama_index.core.instrumentation.events.rerank import ReRankStartEvent, ReRankEndEvent
 from llama_index.core.postprocessor.types import BaseNodePostprocessor
 from llama_index.core.schema import ImageType, NodeWithScore, QueryBundle
 from pydantic import Field
 logger = logging.getLogger(__name__)
 EMBED_MAX_INPUT_LENGTH = 2048
 EMBED_MAX_BATCH_SIZE = 1
 class XinferenceEmbedding(BaseEmbedding):
    """Xinference class for text embedding.
     """
    model_description: Dict[str, Any] = Field(
        description="The model description from Xinference."
    )
    _generator: Any = PrivateAttr()
    _model_uid: str = Field(description="The Xinference model to use.")
    _endpoint: str = Field(description="The Xinference endpoint URL to use.")
    def __init__(
            self,
            model_uid: str,
            endpoint: str,
            embed_batch_size: int = EMBED_MAX_BATCH_SIZE,
            dimensions: Optional[int] = None,
            additional_kwargs: Optional[Dict[str, Any]] = None,
            api_key: Optional[str] = None,
            api_base: Optional[str] = None,
            api_version: Optional[str] = None,
            max_retries: int = 10,
            # timeout: float = 60.0,
            # reuse_client: bool = True,
            # callback_manager: Optional[CallbackManager] = None,
            # default_headers: Optional[Dict[str, str]] = None,
            # http_client: Optional[httpx.Client] = None,
            # async_http_client: Optional[httpx.AsyncClient] = None,
            # num_workers: Optional[int] = None,
            **kwargs: Any,
    ) -> None:
        generator, model_description, embed_batch_size, dimensions = self.load_model(
                model_uid, endpoint
            )
        self._generator = generator
        #self._model_uid = model_uid
        #self._endpoint = endpoint
        super().__init__(
            embed_batch_size=embed_batch_size,
            dimensions=dimensions,
            #callback_manager=callback_manager,
            model_name=model_uid,
            additional_kwargs=additional_kwargs,
            api_key=api_key,
            api_base=api_base,
            api_version=api_version,
            max_retries=max_retries,
            # reuse_client=reuse_client,
            # timeout=timeout,
            # default_headers=default_headers,
            # num_workers=num_workers,
            **kwargs,
        )
    def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
        try:
            from xinference.client import RESTfulClient
        except ImportError:
            raise ImportError(
                "Could not import Xinference library."
                'Please install Xinference with `pip install "xinference[all]"`'
            )
        client = RESTfulClient(endpoint)
        try:
            assert isinstance(client, RESTfulClient)
        except AssertionError:
            raise RuntimeError(
                "Could not create RESTfulClient instance."
                "Please make sure Xinference endpoint is running at the correct port."
            )
        generator = client.get_model(model_uid)
        model_description = client.list_models()[model_uid]
        try:
            assert generator is not None
            assert model_description is not None
        except AssertionError:
            raise RuntimeError(
                "Could not get model from endpoint."
                "Please make sure Xinference endpoint is running at the correct port."
            )
        model = model_description["model_name"]
        replica = model_description['replica']
        dimensions = model_description['dimensions']
        max_tokens = model_description['max_tokens']
        return generator, model_description, replica, dimensions
    @classmethod
    def class_name(cls) -> str:
        return "XinferenceEmbedding"
    def _get_text_embedding(self, text: str) -> Embedding:
        """
        Embed the input text synchronously.
        Subclasses should implement this method. Reference get_text_embedding's
        docstring for more information.
        """
        assert self._generator is not None
        response = self._generator.create_embedding(input=text)
        return response['data'][0]['embedding']
    def _get_query_embedding(self, query: str) -> Embedding:
        """
        Embed the input query synchronously.
        Subclasses should implement this method. Reference get_query_embedding's
        docstring for more information.
        """
        return self._get_text_embedding(query)
    async def _aget_query_embedding(self, query: str) -> Embedding:
        """
        Embed the input query asynchronously.
        Subclasses should implement this method. Reference get_query_embedding's
        docstring for more information.
        """
        return self._get_query_embedding(query)
 class XinferenceRerank(BaseNodePostprocessor):
    """Xinference class for rerank.
     """
    model_description: Dict[str, Any] = Field(
        description="The model description from Xinference."
    )
    _generator: Any = PrivateAttr()
    _model_uid: str = Field(description="The Xinference model to use.")
    _endpoint: str = Field(description="The Xinference endpoint URL to use.")
    model: str = Field(description="Dashscope rerank model name.")
    top_n: int = Field(description="Top N nodes to return.")
    threshold: float = Field(description="threshold nodes to return.")
    def __init__(
            self,
            model_uid: str,
            endpoint: str,
            top_n: int = None,
            threshold: float = None,
            return_documents: bool = False
    ):
        _model_uid = model_uid
        _endpoint = endpoint
        _op_n = top_n
        threshold = threshold
        generator, model_description = self.load_model(
            model_uid, endpoint
        )
        self._generator = generator
        super().__init__(top_n=top_n, model=model_uid, model_uid=model_uid, threshold = threshold, return_documents=return_documents)
    @classmethod
    def class_name(cls) -> str:
        return "XinferenceRerank"
    def _postprocess_nodes(
            self,
            nodes: List[NodeWithScore],
            query_bundle: Optional[QueryBundle] = None,
    ) -> List[NodeWithScore]:
        if query_bundle is None:
            raise ValueError("Missing query bundle in extra info.")
        if len(nodes) == 0:
            return []
        dispatcher.event(
            ReRankStartEvent(
                nodes = nodes,
                top_n = self.top_n,
                query = query_bundle,
                model_name = self.model
            )
        )
        with self.callback_manager.event(
                CBEventType.RERANKING,
                payload={
                    EventPayload.NODES: nodes,
                    EventPayload.MODEL_NAME: self._model_uid,
                    EventPayload.QUERY_STR: query_bundle.query_str,
                    EventPayload.TOP_K: self.top_n,
                },
        ) as event:
            texts = [node.node.get_content() for node in nodes]
            response = self._generator.rerank(texts,query_bundle.query_str)
            new_nodes = []
            for result in response['results']:
                new_node_with_score = NodeWithScore(
                    node=nodes[result['index']].node, score=result['relevance_score']
                )
                if self.threshold is not None:
                    if new_node_with_score.score >=self.threshold:
                        new_nodes.append(new_node_with_score)
                if self.top_n is not None:
                    if len(new_nodes) > self.top_n:
                        for index in new_nodes[self.top_n:-1]:
                            new_nodes.remove(index)
            event.on_end(payload={EventPayload.NODES: new_nodes})
        dispatcher.event(
            ReRankEndEvent(
                nodes= new_nodes
            )
        )
        return new_nodes
    def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
        try:
            from xinference.client import RESTfulClient
        except ImportError:
            raise ImportError(
                "Could not import Xinference library."
                'Please install Xinference with `pip install "xinference[all]"`'
            )
        client = RESTfulClient(endpoint)
        try:
            assert isinstance(client, RESTfulClient)
        except AssertionError:
            raise RuntimeError(
                "Could not create RESTfulClient instance."
                "Please make sure Xinference endpoint is running at the correct port."
            )
        generator = client.get_model(model_uid)
        model_description = client.list_models()[model_uid]
        try:
            assert generator is not None
            assert model_description is not None
        except AssertionError:
            raise RuntimeError(
                "Could not get model from endpoint."
                "Please make sure Xinference endpoint is running at the correct port."
            )
        model = model_description["model_name"]
        return generator, model_description
@@ -0,0 +1,272 @@
 """Xinference embeddings file."""
 import logging
 from enum import Enum
 from http import HTTPStatus
 from typing import Any, Dict, List, Optional, Union, Tuple
 from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding, dispatcher
 from llama_index.core.bridge.pydantic import PrivateAttr
 from llama_index.core.callbacks import CBEventType, EventPayload
 from llama_index.core.embeddings.multi_modal_base import MultiModalEmbedding
 from llama_index.core.instrumentation.events.rerank import ReRankStartEvent, ReRankEndEvent
 from llama_index.core.postprocessor.types import BaseNodePostprocessor
 from llama_index.core.schema import ImageType, NodeWithScore, QueryBundle
 from pydantic import Field
 logger = logging.getLogger(__name__)
 EMBED_MAX_INPUT_LENGTH = 2048
 EMBED_MAX_BATCH_SIZE = 1
 class XinferenceEmbedding(BaseEmbedding):
    """Xinference class for text embedding.
     """
    model_description: Dict[str, Any] = Field(
        description="The model description from Xinference."
    )
    _generator: Any = PrivateAttr()
    _model_uid: str = Field(description="The Xinference model to use.")
    _endpoint: str = Field(description="The Xinference endpoint URL to use.")
    def __init__(
            self,
            model_uid: str,
            endpoint: str,
            embed_batch_size: int = EMBED_MAX_BATCH_SIZE,
            dimensions: Optional[int] = None,
            additional_kwargs: Optional[Dict[str, Any]] = None,
            api_key: Optional[str] = None,
            api_base: Optional[str] = None,
            api_version: Optional[str] = None,
            max_retries: int = 10,
            # timeout: float = 60.0,
            # reuse_client: bool = True,
            # callback_manager: Optional[CallbackManager] = None,
            # default_headers: Optional[Dict[str, str]] = None,
            # http_client: Optional[httpx.Client] = None,
            # async_http_client: Optional[httpx.AsyncClient] = None,
            # num_workers: Optional[int] = None,
            **kwargs: Any,
    ) -> None:
        generator, model_description, embed_batch_size, dimensions = self.load_model(
                model_uid, endpoint
            )
        self._generator = generator
        #self._model_uid = model_uid
        #self._endpoint = endpoint
        super().__init__(
            embed_batch_size=embed_batch_size,
            dimensions=dimensions,
            #callback_manager=callback_manager,
            model_name=model_uid,
            additional_kwargs=additional_kwargs,
            api_key=api_key,
            api_base=api_base,
            api_version=api_version,
            max_retries=max_retries,
            # reuse_client=reuse_client,
            # timeout=timeout,
            # default_headers=default_headers,
            # num_workers=num_workers,
            **kwargs,
        )
    def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
        try:
            from xinference.client import RESTfulClient
        except ImportError:
            raise ImportError(
                "Could not import Xinference library."
                'Please install Xinference with `pip install "xinference[all]"`'
            )
        client = RESTfulClient(endpoint)
        try:
            assert isinstance(client, RESTfulClient)
        except AssertionError:
            raise RuntimeError(
                "Could not create RESTfulClient instance."
                "Please make sure Xinference endpoint is running at the correct port."
            )
        generator = client.get_model(model_uid)
        model_description = client.list_models()[model_uid]
        try:
            assert generator is not None
            assert model_description is not None
        except AssertionError:
            raise RuntimeError(
                "Could not get model from endpoint."
                "Please make sure Xinference endpoint is running at the correct port."
            )
        model = model_description["model_name"]
        replica = model_description['replica']
        dimensions = model_description['dimensions']
        max_tokens = model_description['max_tokens']
        return generator, model_description, replica, dimensions
    @classmethod
    def class_name(cls) -> str:
        return "XinferenceEmbedding"
    def _get_text_embedding(self, text: str) -> Embedding:
        """
        Embed the input text synchronously.
        Subclasses should implement this method. Reference get_text_embedding's
        docstring for more information.
        """
        assert self._generator is not None
        response = self._generator.create_embedding(input=text)
        return response['data'][0]['embedding']
    def _get_query_embedding(self, query: str) -> Embedding:
        """
        Embed the input query synchronously.
        Subclasses should implement this method. Reference get_query_embedding's
        docstring for more information.
        """
        return self._get_text_embedding(query)
    async def _aget_query_embedding(self, query: str) -> Embedding:
        """
        Embed the input query asynchronously.
        Subclasses should implement this method. Reference get_query_embedding's
        docstring for more information.
        """
        return self._get_query_embedding(query)
 class XinferenceRerank(BaseNodePostprocessor):
    """Xinference class for rerank.
     """
    model_description: Dict[str, Any] = Field(
        description="The model description from Xinference."
    )
    _generator: Any = PrivateAttr()
    _model_uid: str = Field(description="The Xinference model to use.")
    _endpoint: str = Field(description="The Xinference endpoint URL to use.")
    model: str = Field(description="Dashscope rerank model name.")
    top_n: int = Field(description="Top N nodes to return.")
    threshold: float = Field(description="threshold nodes to return.")
    def __init__(
            self,
            model_uid: str,
            endpoint: str,
            top_n: int = None,
            threshold: float = None,
            return_documents: bool = False
    ):
        _model_uid = model_uid
        _endpoint = endpoint
        _op_n = top_n
        threshold = threshold
        generator, model_description = self.load_model(
            model_uid, endpoint
        )
        self._generator = generator
        super().__init__(top_n=top_n, model=model_uid, model_uid=model_uid, threshold = threshold, return_documents=return_documents)
    @classmethod
    def class_name(cls) -> str:
        return "XinferenceRerank"
    def _postprocess_nodes(
            self,
            nodes: List[NodeWithScore],
            query_bundle: Optional[QueryBundle] = None,
    ) -> List[NodeWithScore]:
        if query_bundle is None:
            raise ValueError("Missing query bundle in extra info.")
        if len(nodes) == 0:
            return []
        dispatcher.event(
            ReRankStartEvent(
                nodes = nodes,
                top_n = self.top_n,
                query = query_bundle,
                model_name = self.model
            )
        )
        with self.callback_manager.event(
                CBEventType.RERANKING,
                payload={
                    EventPayload.NODES: nodes,
                    EventPayload.MODEL_NAME: self._model_uid,
                    EventPayload.QUERY_STR: query_bundle.query_str,
                    EventPayload.TOP_K: self.top_n,
                },
        ) as event:
            texts = [node.node.get_content() for node in nodes]
            response = self._generator.rerank(texts,query_bundle.query_str)
            new_nodes = []
            for result in response['results']:
                new_node_with_score = NodeWithScore(
                    node=nodes[result['index']].node, score=result['relevance_score']
                )
                if self.threshold is not None:
                    if new_node_with_score.score >=self.threshold:
                        new_nodes.append(new_node_with_score)
                if self.top_n is not None:
                    if len(new_nodes) > self.top_n:
                        for index in new_nodes[self.top_n:-1]:
                            new_nodes.remove(index)
            event.on_end(payload={EventPayload.NODES: new_nodes})
        dispatcher.event(
            ReRankEndEvent(
                nodes= new_nodes
            )
        )
        return new_nodes
    def load_model(self, model_uid: str, endpoint: str) -> Tuple[Any, int, dict]:
        try:
            from xinference.client import RESTfulClient
        except ImportError:
            raise ImportError(
                "Could not import Xinference library."
                'Please install Xinference with `pip install "xinference[all]"`'
            )
        client = RESTfulClient(endpoint)
        try:
            assert isinstance(client, RESTfulClient)
        except AssertionError:
            raise RuntimeError(
                "Could not create RESTfulClient instance."
                "Please make sure Xinference endpoint is running at the correct port."
            )
        generator = client.get_model(model_uid)
        model_description = client.list_models()[model_uid]
        try:
            assert generator is not None
            assert model_description is not None
        except AssertionError:
            raise RuntimeError(
                "Could not get model from endpoint."
                "Please make sure Xinference endpoint is running at the correct port."
            )
        model = model_description["model_name"]
        return generator, model_description
@@ -10,10 +10,23 @@ db:
  #- uri: mysql+pymysql://zjinfo:Y6EAjEEdSYmskA8B@110.42.234.166:3306/zjinfo
 #  - uri: mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
    queries:
-      - select * from ProjectProperties limit 30;
+      - sql: select * from ProjectProperties limit 30;
-      - select Name, Code, Amount, Amount_Total from TotalCalculateTable
+        explanation: "工程属性表数据，层级关系包含在博微电力造价工程文件格式_ProjectProperties.json文件中。"
-      - select SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 1  limit 30;
+
-      - select Name, Code, Rate, Amount from OtherFee
+      - sql: select Id, ParentId, Level, Name, Code, Amount, Amount_Total from TotalCalculateTable;
        explanation: "总算表数据，层级关系包含在博微电力造价工程文件格式_TotalCalculateTable.json文件中。"
      - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '线路' limit 50;
        explanation: "专业类型为线路的项目划分表数据，层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
      - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '余物清理' limit 50;
        explanation: "专业类型为余物清理的项目划分表数据，层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
      - sql: select Id, ParentId, Level, SerialNumber, Name, Quantity, Rate, Sum_Price from ProjectDivision where Level = 3 and ProfessionalType = '拆除线路' limit 50;
        explanation: "专业类型为拆除线路的项目划分表数据，层级关系包含在博微电力造价工程文件格式_ProjectDivision.json文件中。"
      - sql: select Id, ParentId, Level, Name, Code, Rate, Amount from OtherFee;
        explanation: "其他费用表数据，层级关系包含在博微电力造价工程文件格式_OtherFee.json文件中"
 #web:
 #  driver_arguments:
@@ -4,56 +4,66 @@
            "name": "FeeCollectionTable",
            "alias": "",
            "comment": "取费表是取费设置中各取费表明细。查询示例: SELECT Rate FROM FeeCollectionTable WHERE Name = 'findname'。",
-            "fileds": [
+            "fields": [
                {
                    "name": "FeeCollectionTableName",
-                    "alias": "取费名，取费名称，取费表名称",
+                    "alias": "取费表名称，取费名称，取费名",
-                    "comment": "取费表名称"
+                    "comment": "取费表名称",
                    "type": "VARCHAR"
                },
                {
                    "name": "Name",
-                    "alias": "费用名，名称，项目名",
+                    "alias": "名称，费用名，项目名",
-                    "comment": "费用名称，项目名称"
+                    "comment": "费用名称，项目名称",
                    "type": "VARCHAR"
                },
                {
                    "name": "SerialNumber",
-                    "alias": "费用序号，序号，序列号",
+                    "alias": "序号，序列号，费用序号",
-                    "comment": "费用表序号"
+                    "comment": "费用表序号",
                    "type": "VARCHAR"
                },
                {
                    "name": "Code",
-                    "alias": "代码，代号，编号",
+                    "alias": "编号，代号，代码",
-                    "comment": "费用代码"
+                    "comment": "费用代码",
                    "type": "VARCHAR"
                },
                {
                    "name": "CalculationFormula",
-                    "alias": "表达式，公式，计算式",
+                    "alias": "公式，表达式，计算式",
-                    "comment": "取费基数"
+                    "comment": "取费基数",
                    "type": "VARCHAR"
                },
                {
                    "name": "Rate",
                    "alias": "费用利率，费率",
-                    "comment": "取费费率"
+                    "comment": "取费费率",
                    "type": "REAL"
                },
                {
                    "name": "Remarks",
-                    "alias": "备注，说明",
+                    "alias": "说明，备注",
-                    "comment": "费用项备注说明"
+                    "comment": "费用项备注说明",
                    "type": "VARCHAR"
                },
                {
                    "name": "Major",
                    "alias": "专业",
-                    "comment": "取费表专业"
+                    "comment": "取费表专业",
                    "type": "VARCHAR"
                },
                {
                    "name": "Type",
                    "alias": "类型，取费类型",
-                    "comment": "取费表类型"
+                    "comment": "取费表类型",
                    "type": "VARCHAR"
                },
                {
                    "name": "Path",
-                    "alias": "费用全路径，路径",
+                    "alias": "路径，费用全路径",
-                    "comment": "费用项层级全路径"
+                    "comment": "费用项层级全路径",
                    "type": "VARCHAR"
                }
            ]
        }
@@ -4,71 +4,84 @@
            "name": "OtherFee",
            "alias": "",
            "comment": "其他费用表被称为“工程费用中其他费用明细”。其他费用是指为完成工程项目建设所必需的，但不属于建筑工程费、安装工程费、设备购置费、基本预备费的其他相关费用。包括建设场地征用及清理费、项目建设管理费、项目建设技术服务费、生产准备费、大件运输措施费、专业爆破服务费等。查询示例: SELECT Rate FROM OtherFee WHERE Name = 'findname'。",
-            "fileds": [
+            "fields": [
                {
                    "name": "Id",
-                    "alias": "id，项目id，费用id",
+                    "alias": "项目id，id，费用id",
-                    "comment": "费用项目id"
+                    "comment": "费用项目id",
                    "type": "INT"
                },
                {
                    "name": "ParentId",
-                    "alias": "父id，父级id",
+                    "alias": "父级id，父id",
-                    "comment": "费用项目父级id"
+                    "comment": "费用项目父级id",
                    "type": "INT"
                },
                {
                    "name": "Level",
-                    "alias": "层级，层编号，层号",
+                    "alias": "层号，层级，层编号",
-                    "comment": "层级编号，从1开始"
+                    "comment": "层级编号，从1开始",
                    "type": "INT"
                },
                {
                    "name": "Name",
-                    "alias": "费用名，名称，项目名",
+                    "alias": "名称，费用名，项目名",
-                    "comment": "费用名称，项目名称"
+                    "comment": "费用名称，项目名称",
                    "type": "VARCHAR"
                },
                {
                    "name": "SerialNumber",
                    "alias": "序号，序列号",
-                    "comment": "费用表序号"
+                    "comment": "费用表序号",
                    "type": "VARCHAR"
                },
                {
                    "name": "Code",
-                    "alias": "代码，代号，编号",
+                    "alias": "编号，代号，代码",
-                    "comment": "费用代码"
+                    "comment": "费用代码",
                    "type": "VARCHAR"
                },
                {
                    "name": "CalculationFormula",
-                    "alias": "表达式，公式，计算式",
+                    "alias": "公式，表达式，计算式",
-                    "comment": "取费基数"
+                    "comment": "取费基数",
                    "type": "VARCHAR"
                },
                {
                    "name": "Rate",
                    "alias": "费用利率，费率",
-                    "comment": "取费费率"
+                    "comment": "取费费率",
                    "type": "REAL"
                },
                {
                    "name": "Amount",
                    "alias": "金额，价格",
-                    "comment": "金额、合计、费用，\n单位为元"
+                    "comment": "金额、合计、费用，\n单位为元",
                    "type": "REAL"
                },
                {
                    "name": "Remarks",
-                    "alias": "备注，说明",
+                    "alias": "说明，备注",
-                    "comment": "费用项备注说明"
+                    "comment": "费用项备注说明",
                    "type": "VARCHAR"
                },
                {
                    "name": "Compilation_Basis",
                    "alias": "编制依据，编制来源",
-                    "comment": "费用项编制依据"
+                    "comment": "费用项编制依据",
                    "type": "VARCHAR"
                },
                {
                    "name": "WBS_Code",
                    "alias": "WBS编号，WBS编码",
-                    "comment": "费用项WBS编码"
+                    "comment": "费用项WBS编码",
                    "type": "VARCHAR"
                },
                {
                    "name": "Path",
-                    "alias": "费用全路径，路径",
+                    "alias": "路径，费用全路径",
-                    "comment": "费用项层级全路径"
+                    "comment": "费用项层级全路径",
                    "type": "VARCHAR"
                }
            ]
        }
@@ -4,121 +4,144 @@
            "name": "ProjectDivision",
            "alias": "",
            "comment": "项目划分表是用于存储工程项目划分树状数据。内部包含安装工程项目划分，建筑工程项目划分，线路项目划分，工程分部分项。查询示例: SELECT Sum_Price FROM ProjectDivision WHERE Name = 'findname'。",
-            "fileds": [
+            "fields": [
                {
                    "name": "Id",
-                    "alias": "id，项目id，费用id",
+                    "alias": "项目id，id，费用id",
-                    "comment": "项目划分id"
+                    "comment": "项目划分id",
                    "type": "INT"
                },
                {
                    "name": "ParentId",
-                    "alias": "父id，父级id",
+                    "alias": "父级id，父id",
-                    "comment": "项目划分父级id"
+                    "comment": "项目划分父级id",
                    "type": "INT"
                },
                {
                    "name": "Level",
-                    "alias": "层级，层编号，层号",
+                    "alias": "层号，层级，层编号",
-                    "comment": "层级编号，从1开始"
+                    "comment": "层级编号，从1开始",
                    "type": "INT"
                },
                {
                    "name": "Quantity",
                    "alias": "个数，数量，数目",
-                    "comment": "项目划分数量"
+                    "comment": "项目划分数量",
                    "type": "REAL"
                },
                {
                    "name": "SerialNumber",
-                    "alias": "项目序号，序号，序列号",
+                    "alias": "序号，序列号，项目序号",
-                    "comment": "项目划分序号"
+                    "comment": "项目划分序号",
                    "type": "VARCHAR"
                },
                {
                    "name": "Name",
-                    "alias": "项目名，名称",
+                    "alias": "名称，项目名",
-                    "comment": "项目名称"
+                    "comment": "项目名称",
                    "type": "VARCHAR"
                },
                {
                    "name": "Encoding",
-                    "alias": "译码，编码",
+                    "alias": "编码，译码",
-                    "comment": "项目划分编码"
+                    "comment": "项目划分编码",
                    "type": "VARCHAR"
                },
                {
                    "name": "Sum_Price",
                    "alias": "合计，合价",
-                    "comment": "项目划分合价，分部分项费用"
+                    "comment": "项目划分合价，分部分项费用",
                    "type": "REAL"
                },
                {
                    "name": "FeeCollectionTableName",
                    "alias": "取费表",
-                    "comment": "项目划分的取费表，此项目划分选用的取费表"
+                    "comment": "项目划分的取费表，此项目划分选用的取费表",
                    "type": "VARCHAR"
                },
                {
                    "name": "Remarks",
-                    "alias": "备注，说明",
+                    "alias": "说明，备注",
-                    "comment": "备注"
+                    "comment": "备注",
                    "type": "VARCHAR"
                },
                {
                    "name": "WBS_Code",
                    "alias": "WBS编号，WBS编码",
-                    "comment": "WBS编码"
+                    "comment": "WBS编码",
                    "type": "VARCHAR"
                },
                {
                    "name": "Manual_Adjustment_Coefficient",
                    "alias": "人工调差系数",
-                    "comment": "此项目划分下人工调差系数"
+                    "comment": "此项目划分下人工调差系数",
                    "type": "REAL"
                },
                {
                    "name": "Material_Adjustment_Coefficient",
                    "alias": "材料调差系数",
-                    "comment": "此项目划分下材料调差系数"
+                    "comment": "此项目划分下材料调差系数",
                    "type": "REAL"
                },
                {
                    "name": "Mechanical_Adjustment_Coefficient",
                    "alias": "机械调差系数",
-                    "comment": "此项目划分下机械调差系数"
+                    "comment": "此项目划分下机械调差系数",
                    "type": "REAL"
                },
                {
                    "name": "Demolition_Manual_Adjustment_Coefficient",
                    "alias": "拆除人工调差系数",
-                    "comment": "此项目划分下拆除人工调差系数"
+                    "comment": "此项目划分下拆除人工调差系数",
                    "type": "REAL"
                },
                {
                    "name": "Demolition_Material_Adjustment_Coefficient",
                    "alias": "拆除材料调差系数",
-                    "comment": "此项目划分下拆除材料调差系数"
+                    "comment": "此项目划分下拆除材料调差系数",
                    "type": "REAL"
                },
                {
                    "name": "Demolition_Mechanical_Adjustment_Coefficient",
                    "alias": "拆除机械调差系数",
-                    "comment": "此项目划分下拆除机械调差系数"
+                    "comment": "此项目划分下拆除机械调差系数",
                    "type": "REAL"
                },
                {
                    "name": "ProfessionalType",
                    "alias": "专业类型",
-                    "comment": "专业类型，字段值有变电安装、变电建筑、线路等。变电安装等于安装工程，变电建筑等于建筑工程，线路等于安装工程。"
+                    "comment": "专业类型，字段值有变电安装、变电建筑、线路等。变电安装等于安装工程，变电建筑等于建筑工程，线路等于安装工程。",
                    "type": "VARCHAR"
                },
                {
                    "name": "Unit",
                    "alias": "单位",
-                    "comment": "项目划分单位"
+                    "comment": "项目划分单位",
                    "type": "VARCHAR"
                },
                {
                    "name": "CalculationFormula",
-                    "alias": "表达式，公式，计算式",
+                    "alias": "公式，表达式，计算式",
-                    "comment": "项目划分计算式"
+                    "comment": "项目划分计算式",
                    "type": "VARCHAR"
                },
                {
                    "name": "Rate",
                    "alias": "费用利率，费率",
-                    "comment": "项目划分费率"
+                    "comment": "项目划分费率",
                    "type": "REAL"
                },
                {
                    "name": "Code",
-                    "alias": "代码，代号，编号",
+                    "alias": "编号，代号，代码",
-                    "comment": "项目划分代码"
+                    "comment": "项目划分代码",
                    "type": "VARCHAR"
                },
                {
                    "name": "Path",
                    "alias": "路径，项目全路径",
-                    "comment": "项目划分层级全路径"
+                    "comment": "项目划分层级全路径",
                    "type": "VARCHAR"
                }
            ]
        }
@@ -4,196 +4,234 @@
            "name": "ProjectDivisions_CostPreview",
            "alias": "",
            "comment": "项目划分_费用预览表也被称为“项目划分费用预览”、“项目划分取费费用”。其中包含项目划分合价、直接费、间接费、利润、税金、主材费等。查询示例: SELECT Total FROM ProjectDivisions_CostPreview WHERE Id = '15'。",
-            "fileds": [
+            "fields": [
                {
                    "name": "Id",
                    "alias": "id，项目id",
-                    "comment": "项目划分id"
+                    "comment": "项目划分id",
                    "type": "INT"
                },
                {
                    "name": "ParentId",
-                    "alias": "父id，父级id",
+                    "alias": "父级id，父id",
-                    "comment": "项目划分父级id"
+                    "comment": "项目划分父级id",
                    "type": "INT"
                },
                {
                    "name": "Level",
-                    "alias": "层级，层编号，层号",
+                    "alias": "层号，层级，层编号",
-                    "comment": "层级编号，从1开始"
+                    "comment": "层级编号，从1开始",
                    "type": "INT"
                },
                {
                    "name": "ProfessionalType",
                    "alias": "专业类型",
-                    "comment": "专业类型，字段值有变电安装、变电建筑、线路等。变电安装等于安装工程，变电建筑等于建筑工程，线路等于安装工程。"
+                    "comment": "专业类型，字段值有变电安装、变电建筑、线路等。变电安装等于安装工程，变电建筑等于建筑工程，线路等于安装工程。",
                    "type": "VARCHAR"
                },
                {
                    "name": "FeeCollectionTableName",
                    "alias": "取费表",
-                    "comment": "项目划分的取费表，此项目划分选用的取费表"
+                    "comment": "项目划分的取费表，此项目划分选用的取费表",
                    "type": "VARCHAR"
                },
                {
                    "name": "Direct_Cost",
                    "alias": "直接费",
-                    "comment": "直接费是指施工过程中直接耗用于建筑、安装工程产品的各项费用的总和。包括直接工程费和措施费。"
+                    "comment": "直接费是指施工过程中直接耗用于建筑、安装工程产品的各项费用的总和。包括直接工程费和措施费。",
                    "type": "REAL"
                },
                {
                    "name": "Direct_Project_Cost",
                    "alias": "直接工程费",
-                    "comment": "直接工程费是指按照正常的施工条件，在施工过程中耗费的构成工程实体的各项费用。包括人工费、材料费和施工机械使用费。"
+                    "comment": "直接工程费是指按照正常的施工条件，在施工过程中耗费的构成工程实体的各项费用。包括人工费、材料费和施工机械使用费。",
                    "type": "REAL"
                },
                {
                    "name": "Quota_Direct_Cost",
                    "alias": "定额直接费",
-                    "comment": "定额直接费，包含人工费、材料费中已进入定额基价的消耗性材料费和施工机械使用费。"
+                    "comment": "定额直接费，包含人工费、材料费中已进入定额基价的消耗性材料费和施工机械使用费。",
                    "type": "REAL"
                },
                {
                    "name": "Labor_Cost",
                    "alias": "人工费",
-                    "comment": "人工费是指支付给直接从事建筑安装工程施工作业的生产人员的各项费用。包括基本工资、工资性补贴、辅助工资、职工福利费、生产人员劳动保护费。"
+                    "comment": "人工费是指支付给直接从事建筑安装工程施工作业的生产人员的各项费用。包括基本工资、工资性补贴、辅助工资、职工福利费、生产人员劳动保护费。",
                    "type": "REAL"
                },
                {
                    "name": "Material_Cost",
                    "alias": "材料费",
-                    "comment": "材料费是指施工过程中一次性消耗材料及摊销材料的费用。指已进入定额基价的消耗性材料费。"
+                    "comment": "材料费是指施工过程中一次性消耗材料及摊销材料的费用。指已进入定额基价的消耗性材料费。",
                    "type": "REAL"
                },
                {
                    "name": "Construction_Machinery_Cost",
                    "alias": "施工机械使用费",
-                    "comment": "施工机械使用费是指施工机械作业所发生的机械使用费以及机械的现场安拆费和场外运费。包括折旧费、检修费、维护费、安装及拆卸费、场外运费、操作人员人工费、燃料动力费、其他费等。"
+                    "comment": "施工机械使用费是指施工机械作业所发生的机械使用费以及机械的现场安拆费和场外运费。包括折旧费、检修费、维护费、安装及拆卸费、场外运费、操作人员人工费、燃料动力费、其他费等。",
                    "type": "REAL"
                },
                {
                    "name": "Installation_Material_Cost",
                    "alias": "装置性材料费",
-                    "comment": "装置性材料费是指建设工程中构成工艺系统实体的工艺性材料，也称主要材料费。装置性材料通常在概算或预算定额中未计价，也称未计价材料，也称主材。"
+                    "comment": "装置性材料费是指建设工程中构成工艺系统实体的工艺性材料，也称主要材料费。装置性材料通常在概算或预算定额中未计价，也称未计价材料，也称主材。",
                    "type": "REAL"
                },
                {
                    "name": "A_Supply_Installation_Material_Cost",
                    "alias": "甲供装置性材料费",
-                    "comment": "供货方为甲供的装置性材料费。"
+                    "comment": "供货方为甲供的装置性材料费。",
                    "type": "REAL"
                },
                {
                    "name": "B_Supply_Installation_Material_Cost",
                    "alias": "乙供装置性材料费",
-                    "comment": "供货方为乙供的装置性材料费。"
+                    "comment": "供货方为乙供的装置性材料费。",
                    "type": "REAL"
                },
                {
                    "name": "Measure_Cost",
                    "alias": "措施费",
-                    "comment": "措施费是指为完成工程项目施工而进行施工准备、克服自然条件的不利影响和辅助施工所发生的不构成工程实体的各项费用。包括冬雨季施工增加费、夜间施工增加费、施工工具用具使用费、特殊地区施工增加费、临时设施费、施工机构迁移费、安全文明施工费。"
+                    "comment": "措施费是指为完成工程项目施工而进行施工准备、克服自然条件的不利影响和辅助施工所发生的不构成工程实体的各项费用。包括冬雨季施工增加费、夜间施工增加费、施工工具用具使用费、特殊地区施工增加费、临时设施费、施工机构迁移费、安全文明施工费。",
                    "type": "REAL"
                },
                {
                    "name": "WinterRainySeasons_Additional_Construction_Cost",
                    "alias": "冬雨季施工增加费",
-                    "comment": "冬雨季施工增加费是指按照合理的工期要求，建筑、安装工程必须在冬季、雨季期间连续施工而需要增加的费用。"
+                    "comment": "冬雨季施工增加费是指按照合理的工期要求，建筑、安装工程必须在冬季、雨季期间连续施工而需要增加的费用。",
                    "type": "REAL"
                },
                {
                    "name": "Night_Additional_Construction_Cost",
                    "alias": "夜间施工增加费",
-                    "comment": "夜间施工增加费是指按照规程要求，工程必须在夜间连续施工所发生的夜班补助、夜间施工降效、夜间施工照明设备摊销及照明用电等费用。"
+                    "comment": "夜间施工增加费是指按照规程要求，工程必须在夜间连续施工所发生的夜班补助、夜间施工降效、夜间施工照明设备摊销及照明用电等费用。",
                    "type": "REAL"
                },
                {
                    "name": "Construction_Tool_Usage_Cost",
                    "alias": "施工工具用具使用费",
-                    "comment": "施工工具用具使用费是指施工企业的生产、检验、试验部门使用的不属于固定资产的工具用具和仪器仪表的购置、摊销和维护费用。"
+                    "comment": "施工工具用具使用费是指施工企业的生产、检验、试验部门使用的不属于固定资产的工具用具和仪器仪表的购置、摊销和维护费用。",
                    "type": "REAL"
                },
                {
                    "name": "Special_Areas_Additional_Construction_Cost",
                    "alias": "特殊地区施工增加费",
-                    "comment": "特殊地区施工增加费是指在高海拔、酷热、严寒等地区施工:因特殊自然条件影响而需额外增加的施工费用。"
+                    "comment": "特殊地区施工增加费是指在高海拔、酷热、严寒等地区施工:因特殊自然条件影响而需额外增加的施工费用。",
                    "type": "REAL"
                },
                {
                    "name": "Temporary_Facility_Cost",
                    "alias": "临时设施费",
-                    "comment": "临时设施费是指施工企业为满足现场正常生产、生活需要在现场必须搭设的生产、生活用临时建筑物、构筑物和其他临时设施所发生的费用，以及维修、拆除、折旧及摊销费，或临时设施的租赁费等。"
+                    "comment": "临时设施费是指施工企业为满足现场正常生产、生活需要在现场必须搭设的生产、生活用临时建筑物、构筑物和其他临时设施所发生的费用，以及维修、拆除、折旧及摊销费，或临时设施的租赁费等。",
                    "type": "REAL"
                },
                {
                    "name": "Construction_Organization_Relocation_Cost",
                    "alias": "施工机构迁移费",
-                    "comment": "施工机构迁移费是指施工企业派遣施工队伍到所承建工程现场所发生的搬迁费用。包括职工调遣差旅费和调遣期间的工资，以及办公设备、工器具、家具、材料用品和施工机械等的搬迁费用。"
+                    "comment": "施工机构迁移费是指施工企业派遣施工队伍到所承建工程现场所发生的搬迁费用。包括职工调遣差旅费和调遣期间的工资，以及办公设备、工器具、家具、材料用品和施工机械等的搬迁费用。",
                    "type": "REAL"
                },
                {
                    "name": "Safe_Civilized_Construction_Cost",
                    "alias": "安全文明施工费",
-                    "comment": "安全文明施工费，包括安全生产费、文明施工费、环境保护费。"
+                    "comment": "安全文明施工费，包括安全生产费、文明施工费、环境保护费。",
                    "type": "REAL"
                },
                {
                    "name": "Indirect_Cost",
                    "alias": "间接费",
-                    "comment": "间接费是指建筑安装工程的施工过程中，为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。"
+                    "comment": "间接费是指建筑安装工程的施工过程中，为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。",
                    "type": "REAL"
                },
                {
                    "name": "Regulatory_Cost",
                    "alias": "规费",
-                    "comment": "规费是指按照国家行政主管部门或省级政府和省级有关权力部门规定必须缴纳并计入建筑安装工程造价的费用。包括社会保险费和住房公积金。"
+                    "comment": "规费是指按照国家行政主管部门或省级政府和省级有关权力部门规定必须缴纳并计入建筑安装工程造价的费用。包括社会保险费和住房公积金。",
                    "type": "REAL"
                },
                {
                    "name": "Social_Insurance_Premiums",
                    "alias": "社会保险费",
-                    "comment": "社会保险费包括养老保险费、失业保险费、医疗保险费、生育保险费和工伤保险费。"
+                    "comment": "社会保险费包括养老保险费、失业保险费、医疗保险费、生育保险费和工伤保险费。",
                    "type": "REAL"
                },
                {
                    "name": "Housing_Provident_Fund",
                    "alias": "住房公积金",
-                    "comment": "住房公积金是指企业按照规定标准为职工缴纳的住房公积金。"
+                    "comment": "住房公积金是指企业按照规定标准为职工缴纳的住房公积金。",
                    "type": "REAL"
                },
                {
                    "name": "Enterprise_Management_Cost",
                    "alias": "企业管理费",
-                    "comment": "企业管理费是指建筑安装施工企业为组织施工生产和经营管理所发生的费用。"
+                    "comment": "企业管理费是指建筑安装施工企业为组织施工生产和经营管理所发生的费用。",
                    "type": "REAL"
                },
                {
                    "name": "Construction_Enterprise_Cooperation_Debugging_Cost",
                    "alias": "施工企业配合调试费",
-                    "comment": "施工企业配合调试费是指在工程整套启动试运阶段，施工企业安装专业配合调试所发生的费用。"
+                    "comment": "施工企业配合调试费是指在工程整套启动试运阶段，施工企业安装专业配合调试所发生的费用。",
                    "type": "REAL"
                },
                {
                    "name": "Profit",
                    "alias": "利润",
-                    "comment": "利润是指施工企业完成所承包工程获得的盈利。"
+                    "comment": "利润是指施工企业完成所承包工程获得的盈利。",
                    "type": "REAL"
                },
                {
                    "name": "Taxes",
                    "alias": "税金",
-                    "comment": "税金是指按照国家税法规定应计入建筑安装工程造价内的销项税额。"
+                    "comment": "税金是指按照国家税法规定应计入建筑安装工程造价内的销项税额。",
                    "type": "REAL"
                },
                {
                    "name": "Equipment_Cost",
                    "alias": "设备费",
-                    "comment": "设备购置费是指为项目建设而购置或自制各种设备，并将设备运至施工现场指定位置所支出的费用。包括设备费和设备运杂费。"
+                    "comment": "设备购置费是指为项目建设而购置或自制各种设备，并将设备运至施工现场指定位置所支出的费用。包括设备费和设备运杂费。",
                    "type": "REAL"
                },
                {
                    "name": "B_Supply_Equipment_Excluding_Tax_Price",
                    "alias": "乙供设备不含税价",
-                    "comment": "设备费中，供货方为乙供设备，不含税价"
+                    "comment": "设备费中，供货方为乙供设备，不含税价",
                    "type": "REAL"
                },
                {
                    "name": "A_Supply_Equipment_Tax_Price",
                    "alias": "甲供设备含税价",
-                    "comment": "设备费中，供货方为甲供设备，含税价"
+                    "comment": "设备费中，供货方为甲供设备，含税价",
                    "type": "REAL"
                },
                {
                    "name": "Installation_Cost",
                    "alias": "安装费",
-                    "comment": "安装费包含定额直接费、措施费、间接费、利润、税金和一笔性费用。"
+                    "comment": "安装费包含定额直接费、措施费、间接费、利润、税金和一笔性费用。",
                    "type": "REAL"
                },
                {
                    "name": "Main_Material_Cost",
                    "alias": "主材费",
-                    "comment": "主材费指装置性材料费"
+                    "comment": "主材费指装置性材料费",
                    "type": "REAL"
                },
                {
                    "name": "Total",
-                    "alias": "总体费用，总计，总价，总的费用",
+                    "alias": "总价，总计，总体费用，总的费用",
-                    "comment": "总计包含安装费、主材费、设备费。"
+                    "comment": "总计包含安装费、主材费、设备费。",
                    "type": "REAL"
                },
                {
                    "name": "Sum",
                    "alias": "合计，合价",
-                    "comment": "项目划分合价，分部分项费用，项目划分费用。合计包含安装费和主材费。"
+                    "comment": "项目划分合价，分部分项费用，项目划分费用。合计包含安装费和主材费。",
                    "type": "REAL"
                },
                {
                    "name": "Path",
                    "alias": "路径，项目划分全路径",
-                    "comment": "项目划分层级全路径"
+                    "comment": "项目划分层级全路径",
                    "type": "VARCHAR"
                }
            ]
        }
@@ -4,26 +4,30 @@
            "name": "ProjectProperties",
            "alias": "",
            "comment": "工程属性表是用于存储整个工程的重要属性，访问该表都是为了通过属性名查找属性值。通常属性值有工程信息、工程属性、技经参数，表中包含工程总投资、工程总费用，工程主要费用，工程技经参数等。查询示例: SELECT Value FROM ProjectProperties WHERE Name = 'findname'。",
-            "fileds": [
+            "fields": [
                {
                    "name": "Name\n",
-                    "alias": "属性名，属性名称，属性",
+                    "alias": "名称、属性、属性名称、字段、字段名称、变量、参数，属性名",
-                    "comment": "属性的唯一标识"
+                    "comment": "属性的唯一标识",
                    "type": "VARCHAR"
                },
                {
                    "name": "Value",
-                    "alias": "属性值",
+                    "alias": "值、变量值、参数值、数值，属性值",
-                    "comment": "属性对应的实际值"
+                    "comment": "属性对应的实际值",
                    "type": "VARCHAR"
                },
                {
                    "name": "Type",
-                    "alias": "类型，属性类型",
+                    "alias": "类型、变量类型、数值类型，属性类型",
-                    "comment": "属性变量的类型"
+                    "comment": "属性变量的类型",
                    "type": "VARCHAR"
                },
                {
                    "name": "Unit",
                    "alias": "单位",
-                    "comment": "单位"
+                    "comment": "单位",
                    "type": "VARCHAR"
                }
            ]
        }
@@ -4,276 +4,330 @@
            "name": "ProjectQuantities",
            "alias": "",
            "comment": "工程量表是项目划分下工程量，包含定额、主材、设备、一笔性费用。查询示例: SELECT BudgetPrice FROM ProjectQuantities WHERE Name = 'findname'。",
-            "fileds": [
+            "fields": [
                {
                    "name": "Id",
                    "alias": "id",
-                    "comment": "消耗量id，工程量id"
+                    "comment": "消耗量id，工程量id",
                    "type": "INT"
                },
                {
                    "name": "ParentId",
-                    "alias": "父id，父级id",
+                    "alias": "父级id，父id",
-                    "comment": "父级id"
+                    "comment": "父级id",
                    "type": "INT"
                },
                {
                    "name": "ProjectDivisionId",
                    "alias": "项目划分id，项目id",
-                    "comment": "父级项目划分id"
+                    "comment": "父级项目划分id",
                    "type": "INT"
                },
                {
                    "name": "Quantity",
                    "alias": "个数，数量，数目",
-                    "comment": "数量，消耗量数量，工程量数量，主材数量，定额数量，设备数量，项目划分单位"
+                    "comment": "数量，消耗量数量，工程量数量，主材数量，定额数量，设备数量，项目划分单位",
                    "type": "REAL"
                },
                {
                    "name": "FeatureSegment",
                    "alias": "特征段",
-                    "comment": "线路特征段"
+                    "comment": "线路特征段",
                    "type": "VARCHAR"
                },
                {
                    "name": "ParentQuantity",
                    "alias": "父级个数，父级数量",
-                    "comment": "父级id的数量"
+                    "comment": "父级id的数量",
                    "type": "REAL"
                },
                {
                    "name": "Name",
                    "alias": "名称",
-                    "comment": "项目名称，工程量名称，消耗量名称，主材名称，定额名称，设备名称，材料名称"
+                    "comment": "项目名称，工程量名称，消耗量名称，主材名称，定额名称，设备名称，材料名称",
                    "type": "VARCHAR"
                },
                {
                    "name": "Encoding",
-                    "alias": "译码，编码",
+                    "alias": "编码，译码",
-                    "comment": "编码，定额编码，主材编码，设备编码"
+                    "comment": "编码，定额编码，主材编码，设备编码",
                    "type": "VARCHAR"
                },
                {
                    "name": "SpecificationModel",
                    "alias": "规格型号",
-                    "comment": "规格型号，主材规格型号，设备规格型号"
+                    "comment": "规格型号，主材规格型号，设备规格型号",
                    "type": "VARCHAR"
                },
                {
                    "name": "Unit",
                    "alias": "单位",
-                    "comment": "单位，主材单位，定额单位，设备单位，项目划分单位"
+                    "comment": "单位，主材单位，定额单位，设备单位，项目划分单位",
                    "type": "VARCHAR"
                },
                {
                    "name": "BasePrice",
                    "alias": "基价",
-                    "comment": "定额基价"
+                    "comment": "定额基价",
                    "type": "REAL"
                },
                {
                    "name": "LaborCost",
                    "alias": "人工费",
-                    "comment": "定额人工费"
+                    "comment": "定额人工费",
                    "type": "REAL"
                },
                {
                    "name": "MaterialCost",
                    "alias": "材料费",
-                    "comment": "定额材料费"
+                    "comment": "定额材料费",
                    "type": "REAL"
                },
                {
                    "name": "MachineryCost",
                    "alias": "机械费",
-                    "comment": "定额机械费"
+                    "comment": "定额机械费",
                    "type": "REAL"
                },
                {
                    "name": "QuotaCoefficient",
                    "alias": "定额系数",
-                    "comment": "定额系数"
+                    "comment": "定额系数",
                    "type": "REAL"
                },
                {
                    "name": "LaborCoefficient",
                    "alias": "人工系数",
-                    "comment": "定额人工系数"
+                    "comment": "定额人工系数",
                    "type": "REAL"
                },
                {
                    "name": "MaterialCoefficient",
                    "alias": "材料系数",
-                    "comment": "定额材料系数"
+                    "comment": "定额材料系数",
                    "type": "REAL"
                },
                {
                    "name": "MechanicalCoefficient",
                    "alias": "机械系数",
-                    "comment": "定额机械系数"
+                    "comment": "定额机械系数",
                    "type": "REAL"
                },
                {
                    "name": "ExpenseType",
                    "alias": "费用类型",
-                    "comment": "费用类型，取值为取费、不取费"
+                    "comment": "费用类型，取值为取费、不取费",
                    "type": "VARCHAR"
                },
                {
                    "name": "BudgetPrice",
                    "alias": "预算价",
-                    "comment": "预算价"
+                    "comment": "预算价",
                    "type": "REAL"
                },
                {
                    "name": "MarketPrice",
                    "alias": "市场价",
-                    "comment": "间接费是指建筑安装工程的施工过程中，为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。"
+                    "comment": "间接费是指建筑安装工程的施工过程中，为全工程项目服务而不直接消耗在特定产品对象上的费用。包括规费、企业管理费和施工企业配合调试费。",
                    "type": "REAL"
                },
                {
                    "name": "Supplier",
                    "alias": "供货方",
-                    "comment": "供货方，设备供货方，主材供货方，取值为甲供、乙供"
+                    "comment": "供货方，设备供货方，主材供货方，取值为甲供、乙供",
                    "type": "VARCHAR"
                },
                {
                    "name": "Type",
                    "alias": "类型",
-                    "comment": "工程量类型，取值定额、主材、设备、一笔性费用"
+                    "comment": "工程量类型，取值定额、主材、设备、一笔性费用",
                    "type": "VARCHAR"
                },
                {
                    "name": "QuotaRange",
                    "alias": "定额范围",
-                    "comment": "定额范围，取值概算、预算"
+                    "comment": "定额范围，取值概算、预算",
                    "type": "VARCHAR"
                },
                {
                    "name": "A_Supply_Material_Cost_Excluding_Tax",
                    "alias": "甲供材料费不含税",
-                    "comment": "甲供材料费不含税"
+                    "comment": "甲供材料费不含税",
                    "type": "REAL"
                },
                {
                    "name": "A_Supply_Material_Cost_Including_Tax",
                    "alias": "甲供材料费含税",
-                    "comment": "甲供材料费含税"
+                    "comment": "甲供材料费含税",
                    "type": "REAL"
                },
                {
                    "name": "B_Supply_Material_Cost_Excluding_Tax",
                    "alias": "乙供材料费不含税",
-                    "comment": "乙供材料费不含税"
+                    "comment": "乙供材料费不含税",
                    "type": "REAL"
                },
                {
                    "name": "B_Supply_Material_Cost_Including_Tax",
                    "alias": "乙供材料费含税",
-                    "comment": "乙供材料费含税"
+                    "comment": "乙供材料费含税",
                    "type": "REAL"
                },
                {
                    "name": "ScaffoldCalculation",
                    "alias": "脚手架计取",
-                    "comment": "脚手架计取，取值计取、不计取"
+                    "comment": "脚手架计取，取值计取、不计取",
                    "type": "VARCHAR"
                },
                {
                    "name": "Remarks",
-                    "alias": "备注，说明",
+                    "alias": "说明，备注",
-                    "comment": "备注，说明"
+                    "comment": "备注，说明",
                    "type": "VARCHAR"
                },
                {
                    "name": "FeeCollectionTableName",
                    "alias": "取费表",
-                    "comment": "项目划分的取费表，工程量的取费表"
+                    "comment": "项目划分的取费表，工程量的取费表",
                    "type": "VARCHAR"
                },
                {
                    "name": "Quota_Section_Name",
                    "alias": "定额章节名称",
-                    "comment": "定额章节名称"
+                    "comment": "定额章节名称",
                    "type": "VARCHAR"
                },
                {
                    "name": "ProfessionalType",
                    "alias": "专业类型",
-                    "comment": "专业类型，字段值有变电安装、变电建筑、线路等。变电安装等于安装工程，变电建筑等于建筑工程，线路等于安装工程。"
+                    "comment": "专业类型，字段值有变电安装、变电建筑、线路等。变电安装等于安装工程，变电建筑等于建筑工程，线路等于安装工程。",
                    "type": "VARCHAR"
                },
                {
                    "name": "split",
                    "alias": "拆分",
-                    "comment": "是否为拆分材料，取值1为拆分，取值0为不拆分"
+                    "comment": "是否为拆分材料，取值1为拆分，取值0为不拆分",
                    "type": "INT"
                },
                {
                    "name": "Loss",
                    "alias": "损耗",
-                    "comment": "损耗率，主材损耗率"
+                    "comment": "损耗率，主材损耗率",
                    "type": "REAL"
                },
                {
                    "name": "SingleWeight",
                    "alias": "单重",
-                    "comment": "单重，主材单重"
+                    "comment": "单重，主材单重",
                    "type": "REAL"
                },
                {
                    "name": "LineWeight",
                    "alias": "线重",
-                    "comment": "线重，主材线重"
+                    "comment": "线重，主材线重",
                    "type": "REAL"
                },
                {
                    "name": "SupervisedMaterials",
                    "alias": "监造物料",
-                    "comment": "监造物料，取值1为监造物料，取值0为非监造物料"
+                    "comment": "监造物料，取值1为监造物料，取值0为非监造物料",
                    "type": "INT"
                },
                {
                    "name": "EquipmentMaterials",
                    "alias": "设备性材料",
-                    "comment": "设备性材料，取值1为设备性材料，取值0为主材"
+                    "comment": "设备性材料，取值1为设备性材料，取值0为主材",
                    "type": "INT"
                },
                {
                    "name": "GrossWeight",
                    "alias": "毛重",
-                    "comment": "毛重，主材毛重"
+                    "comment": "毛重，主材毛重",
                    "type": "VARCHAR"
                },
                {
                    "name": "TransportationType",
                    "alias": "运输类型",
-                    "comment": "运输类型，主材运输类型"
+                    "comment": "运输类型，主材运输类型",
                    "type": "VARCHAR"
                },
                {
                    "name": "TransportationMiscellaneous",
                    "alias": "运杂费率",
-                    "comment": "运杂费率，设备运杂费率"
+                    "comment": "运杂费率，设备运杂费率",
                    "type": "REAL"
                },
                {
                    "name": "EquipmentType",
                    "alias": "设备类型",
-                    "comment": "设备类型，取值为主要设备、普通设备"
+                    "comment": "设备类型，取值为主要设备、普通设备",
                    "type": "VARCHAR"
                },
                {
                    "name": "UnitPrice",
                    "alias": "单价",
-                    "comment": "单价"
+                    "comment": "单价",
                    "type": "REAL"
                },
                {
                    "name": "Market_Price_Excluding_Tax",
                    "alias": "市场价不含税",
-                    "comment": "市场价不含税"
+                    "comment": "市场价不含税",
                    "type": "REAL"
                },
                {
                    "name": "Market_Price_Including_Tax",
                    "alias": "市场价含税",
-                    "comment": "市场价含税，设备含税价"
+                    "comment": "市场价含税，设备含税价",
                    "type": "REAL"
                },
                {
                    "name": "Budget_Price_Excluding_Tax",
                    "alias": "预算价不含税",
-                    "comment": "预算价不含税"
+                    "comment": "预算价不含税",
                    "type": "REAL"
                },
                {
                    "name": "Budget_Price_Including_Tax",
                    "alias": "预算价含税",
-                    "comment": "预算价含税"
+                    "comment": "预算价含税",
                    "type": "REAL"
                },
                {
                    "name": "Unit_Price_Excluding_Tax",
                    "alias": "单价不含税",
-                    "comment": "单价不含税，设备不含税价"
+                    "comment": "单价不含税，设备不含税价",
                    "type": "REAL"
                },
                {
                    "name": "GroupPrice",
                    "alias": "分组合价",
-                    "comment": "分组合价"
+                    "comment": "分组合价",
                    "type": "REAL"
                },
                {
                    "name": "Pump_Truck_Pouring",
                    "alias": "泵车浇制",
-                    "comment": "泵车浇制，取值1为泵车浇制，取值0为非泵车浇制"
+                    "comment": "泵车浇制，取值1为泵车浇制，取值0为非泵车浇制",
                    "type": "INT"
                },
                {
                    "name": "On_Site_Preparation",
                    "alias": "现场制备",
-                    "comment": "现场制备，取值1为现场制备，取值0为非现场制备"
+                    "comment": "现场制备，取值1为现场制备，取值0为非现场制备",
                    "type": "INT"
                },
                {
                    "name": "Clear_Water_Concrete",
                    "alias": "清水混凝土",
-                    "comment": "清水混凝土，取值1为清水混凝土，取值0为非清水混凝土"
+                    "comment": "清水混凝土，取值1为清水混凝土，取值0为非清水混凝土",
                    "type": "INT"
                },
                {
                    "name": "Debugging_Fee_Calculation",
                    "alias": "调试费计取",
-                    "comment": "调试费计取，取值计取、不计取"
+                    "comment": "调试费计取，取值计取、不计取",
                    "type": "VARCHAR"
                }
            ]
        }
@@ -2,83 +2,98 @@
    "Table": [
        {
            "name": "TotalCalculateTable",
-            "alias": "总算表",
+            "alias": "",
            "comment": "总算表也被称为“工程总费用”、“工程费用”。其中包含本地工程、辅助设施工程、编制基准期价差、设备购置费、其他费用、基本预备费、特殊费用、工程静态投资、动态费用、价差预备费、建设期贷款利息、工程动态投资、可抵扣增值税额。查询示例: SELECT Amount FROM TotalCalculateTable WHERE Name = 'findname'。",
-            "fileds": [
+            "fields": [
                {
                    "name": "Id",
-                    "alias": "id，项目id，费用id",
+                    "alias": "项目id，id，费用id",
-                    "comment": "费用项目id"
+                    "comment": "费用项目id",
                    "type": "INT"
                },
                {
                    "name": "ParentId",
-                    "alias": "父id，父级id",
+                    "alias": "父级id，父id",
-                    "comment": "费用项目父级id"
+                    "comment": "费用项目父级id",
                    "type": "INT"
                },
                {
                    "name": "Level",
-                    "alias": "层级，层编号，层号",
+                    "alias": "层号，层级，层编号",
-                    "comment": "层级编号，从1开始"
+                    "comment": "层级编号，从1开始",
                    "type": "INT"
                },
                {
                    "name": "Name",
-                    "alias": "费用名，名称，项目名",
+                    "alias": "名称，费用名，项目名",
-                    "comment": "费用名称，项目名称"
+                    "comment": "费用名称，项目名称",
                    "type": "VARCHAR"
                },
                {
                    "name": "SerialNumber",
                    "alias": "序号",
-                    "comment": "工程费用序号"
+                    "comment": "工程费用序号",
                    "type": "VARCHAR"
                },
                {
                    "name": "Code",
-                    "alias": "代码，代号，编号",
+                    "alias": "编号，代号，代码",
-                    "comment": "费用代码"
+                    "comment": "费用代码",
                    "type": "VARCHAR"
                },
                {
                    "name": "Rate",
                    "alias": "费用利率，费率",
-                    "comment": "费率"
+                    "comment": "费率",
                    "type": "REAL"
                },
                {
                    "name": "Amount",
                    "alias": "金额，价格",
-                    "comment": "合计费"
+                    "comment": "合计费",
                    "type": "REAL"
                },
                {
                    "name": "WBS_Code",
                    "alias": "WBS编号，WBS编码",
-                    "comment": "费用编码"
+                    "comment": "费用编码",
                    "type": "VARCHAR"
                },
                {
                    "name": "Path",
-                    "alias": "费用全路径，路径",
+                    "alias": "路径，费用全路径",
-                    "comment": "费用名称全路径"
+                    "comment": "费用名称全路径",
                    "type": "VARCHAR"
                },
                {
                    "name": "Amount_InstallationCost",
-                    "alias": "安装价格，安装金额，金额_安装费",
+                    "alias": "安装金额，金额_安装费，安装价格",
-                    "comment": "安装费金额"
+                    "comment": "安装费金额",
                    "type": "REAL"
                },
                {
                    "name": "Amount_EquipmentCost",
-                    "alias": "设备价格，金额_设备费，设备金额",
+                    "alias": "金额_设备费，设备金额，设备价格",
-                    "comment": "设备费金额"
+                    "comment": "设备费金额",
                    "type": "REAL"
                },
                {
                    "name": "Amount_OtherCost",
-                    "alias": "其他费用价格，其他费用金额，金额_其他费",
+                    "alias": "其他费用金额，金额_其他费，其他费用价格",
-                    "comment": "其他费金额"
+                    "comment": "其他费金额",
                    "type": "REAL"
                },
                {
                    "name": "Amount_Total",
                    "alias": "总的金额，金额_占总计，总体金额",
-                    "comment": "合计费占总计"
+                    "comment": "合计费占总计",
                    "type": "REAL"
                },
                {
                    "name": "Amount_UnitInvestment",
-                    "alias": "合计投资金额，金额_单位投资",
+                    "alias": "金额_单位投资，合计投资金额",
-                    "comment": "合计费单位投资"
+                    "comment": "合计费单位投资",
                    "type": "REAL"
                }
            ]
        }
@@ -1,4 +1,6 @@
 from dotenv import load_dotenv
 from llama_index.core.node_parser import SentenceSplitter
 load_dotenv()
@@ -13,55 +15,55 @@ from app.api.routers.upload import file_upload_router
 from app.settings import init_settings
 from app.observability import init_observability
 from fastapi.staticfiles import StaticFiles
 from phoenix.trace import using_project
 logger = logging.getLogger("uvicorn")
 app = None
 def init_webserver():
    global app
    app = FastAPI()
    environment = os.getenv("ENVIRONMENT", "dev")  # Default to 'development' if not set
    if environment == "dev":
        logger.warning("Running in development mode - allowing CORS for all origins")
        app.add_middleware(
            CORSMiddleware,
            allow_origins=["*"],
            allow_credentials=True,
            allow_methods=["*"],
            allow_headers=["*"],
        )
-    def mount_static_files(directory, path):
+usPrj = using_project(os.getenv("PHOENIX_PROJECT_NAME"))
-        if os.path.exists(directory):
+usPrj.__enter__()
            for dir, _, _ in os.walk(directory):
                relative_path = os.path.relpath(dir, directory)
                mount_path = path if relative_path == "." else f"{path}/{relative_path}"
                logger.info(f"Mounting static files '{dir}' at {mount_path}")
                app.mount(mount_path, StaticFiles(directory=dir), name=f"{dir}-static")
    # Mount the data files to serve the file viewer
    mount_static_files("data", "/api/files/data")
    # Mount the output files from tools
    mount_static_files("data_output", "/api/files/output")
    app.include_router(chat_router, prefix="/api/chat")
    app.include_router(file_upload_router, prefix="/api/chat/upload")
-    # Redirect to documentation page when accessing base URL
+init_settings()
-    @app.get("/")
+init_observability()
-    async def redirect_to_docs():
+
-        return RedirectResponse(url="/docs")
+app = FastAPI()
 environment = os.getenv("ENVIRONMENT", "dev")  # Default to 'development' if not set
 if environment == "dev":
    logger.warning("Running in development mode - allowing CORS for all origins")
    app.add_middleware(
        CORSMiddleware,
        allow_origins=["*"],
        allow_credentials=True,
        allow_methods=["*"],
        allow_headers=["*"],
    )
 def mount_static_files(directory, path):
    if os.path.exists(directory):
        for dir, _, _ in os.walk(directory):
            relative_path = os.path.relpath(dir, directory)
            mount_path = path if relative_path == "." else f"{path}/{relative_path}"
            logger.info(f"Mounting static files '{dir}' at {mount_path}")
            app.mount(mount_path, StaticFiles(directory=dir), name=f"{dir}-static")
 # Mount the data files to serve the file viewer
 mount_static_files("data", "/api/files/data")
 # Mount the output files from tools
 mount_static_files("data_output", "/api/files/output")
 app.include_router(chat_router, prefix="/api/chat")
 app.include_router(file_upload_router, prefix="/api/chat/upload")
@app.get("/")
 async def redirect_to_docs():
    return RedirectResponse(url="/docs")
 if __name__ == "__main__":
-    from phoenix.trace import using_project
+    app_host = os.getenv("APP_HOST", "0.0.0.0")
-    with using_project(os.getenv("PHOENIX_PROJECT_NAME")) as obj:
+    app_port = int(os.getenv("APP_PORT", "8000"))
    reload = True if environment == "dev" else False
    reload = False
    uvicorn.run(app="main:app", host=app_host, port=app_port, reload=reload)
        init_settings()
        init_observability()
        init_webserver()
        app_host = os.getenv("APP_HOST", "0.0.0.0")
        app_port = int(os.getenv("APP_PORT", "8000"))
        #reload = True if environment == "dev" else False
        reload = False
        uvicorn.run(app=app, host=app_host, port=app_port, reload=reload)
@@ -11,23 +11,30 @@ generate = "app.engine.generate:generate_datasource"
 [tool.poetry.dependencies]
 python = "^3.11,<3.12"
-fastapi = "^0.112.0"
+fastapi = "^0.110.3"
 python-dotenv = "^1.0.0"
 aiostream = "^0.6.2"
 llama-index = "0.10.63"
 cachetools = "^5.3.3"
 protobuf = "4.25.4"
 nltk = "^3.8.2"
 jieba = "^0.42.1"
 #arize-phoenix = "^4.12.0"
 openinference-instrumentation-llama-index="2.2.3"
 llama-index-callbacks-arize-phoenix = "^0.1.4"
 llama-index-llms-dashscope = "^0.1.2"
 llama-index-embeddings-dashscope = "^0.1.4"
 llama-index-postprocessor-dashscope-rerank-custom = "0.1.0"
 xinference  = "^0.14.1"
 xinference-client  = "^0.14.1"
 llama-index-llms-xinference = "^0.1.2"
 qdrant-client="^1.10.1"
 llama-index-vector-stores-qdrant = "^0.2.14"
-chroma="^0.5.5"
+chroma="^0.2.0"
 llama-index-vector-stores-chroma = "^0.1.10"
 llama-index-readers-json = "^0.1.5"
 llama-index-retrievers-bm25 = "^0.2.2"
 duckduckgo_search = "^6.2.6"
@@ -1,4 +1,4 @@
 rmdir /S /Q storage_vector
 rmdir /S /Q storage
-C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python tests/query.py
+python tests/query.py
@@ -1 +1 @@
-C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python main.py
+python main.py
@@ -0,0 +1,202 @@
 [
    {
        "question": "人工费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "临时设施费的费率是多少?",
        "answer": "费率是6.3500000000"
    },
    {
        "question": "乙供装置性材料费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "直接费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "甲供装置性材料费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "直接费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "夜间施工增加费的费率是多少?",
        "answer": "费率是0E-10"
    },
    {
        "question": "装置性材料费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "冬雨季施工增加费的费率是多少?",
        "answer": "费率是3.5700000000"
    },
    {
        "question": "材料费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "机械价差的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "规费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "直接工程费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "安全文明施工费的费率是多少?",
        "answer": "费率是3.5500000000"
    },
    {
        "question": "企业管理费的费率是多少?",
        "answer": "费率是35.7600000000"
    },
    {
        "question": "税金的费率是多少?",
        "answer": "费率是9.0000000000"
    },
    {
        "question": "直接费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "安全文明施工费的费率是多少?",
        "answer": "费率是3.5500000000"
    },
    {
        "question": "合计的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "税金的费率是多少?",
        "answer": "费率是9.0000000000"
    },
    {
        "question": "安全文明施工费的费率是多少?",
        "answer": "费率是3.5500000000"
    },
    {
        "question": "直接工程费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "税金的费率是多少?",
        "answer": "费率是9.0000000000"
    },
    {
        "question": "社会保险费的费率是多少?",
        "answer": "费率是15.0000000000"
    },
    {
        "question": "间接费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "合计的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "临时设施费的费率是多少?",
        "answer": "费率是0E-10"
    },
    {
        "question": "利润的费率是多少?",
        "answer": "费率是5.2400000000"
    },
    {
        "question": "税金的费率是多少?",
        "answer": "费率是9.0000000000"
    },
    {
        "question": "社会保险费的费率是多少?",
        "answer": "费率是15.0000000000"
    },
    {
        "question": "直接工程费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "乙供设备不含税价的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "企业管理费的费率是多少?",
        "answer": "费率是17.1300000000"
    },
    {
        "question": "企业管理费的费率是多少?",
        "answer": "费率是35.7600000000"
    },
    {
        "question": "夜间施工增加费的费率是多少?",
        "answer": "费率是0E-10"
    },
    {
        "question": "直接费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "夜间施工增加费的费率是多少?",
        "answer": "费率是0E-10"
    },
    {
        "question": "甲供设备含税价的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "施工机械使用费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "安全文明施工费的费率是多少?",
        "answer": "费率是3.5500000000"
    },
    {
        "question": "定额直接费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "主材费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "直接费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "施工企业配合调试费的费率是多少?",
        "answer": "费率是0E-10"
    },
    {
        "question": "施工机械使用费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "临时设施费的费率是多少?",
        "answer": "费率是6.3500000000"
    },
    {
        "question": "施工工具用具使用费的费率是多少?",
        "answer": "费率是3.8200000000"
    },
    {
        "question": "措施费的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "材料价差的费率是多少?",
        "answer": "费率是100.0000000000"
    },
    {
        "question": "措施费的费率是多少?",
        "answer": "费率是100.0000000000"
    }
 ]
@@ -0,0 +1,202 @@
 [
    {
        "question": "前期工作管理费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "特种设备安全监测费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "工程监理费的金额是多少?",
        "answer": "金额是131009.9200000000"
    },
    {
        "question": "水土保持方案编审费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "生产准备费的金额是多少?",
        "answer": "金额是472373669.4635599852"
    },
    {
        "question": "电力工程技术经济标准编制费的金额是多少?",
        "answer": "金额是84352440.9756360054"
    },
    {
        "question": "项目建设技术服务费的金额是多少?",
        "answer": "金额是16855957065.4302005768"
    },
    {
        "question": "工程保险费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "其他的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "施工图文件评审费的金额是多少?",
        "answer": "金额是24940.0000000000"
    },
    {
        "question": "节能评估费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "桩基检测费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "项目前期工作费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "其他的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "项目法人管理费的金额是多少?",
        "answer": "金额是986923559.4149370193"
    },
    {
        "question": "专业爆破服务费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "节能评估费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "用地预审费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "设备材料监造费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "环境监测及环境保护验收费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "环境监测及环境保护验收费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "设备材料监造费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "勘察费的金额是多少?",
        "answer": "金额是12122154260.0000000000"
    },
    {
        "question": "项目法人管理费的金额是多少?",
        "answer": "金额是986923559.4149370193"
    },
    {
        "question": "社会稳定风险评估费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "勘察费的金额是多少?",
        "answer": "金额是12122154260.0000000000"
    },
    {
        "question": "环境影响评价费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "水土保持方案编审费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "使用林地可行性研究费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "环境监测及环境保护验收费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "桩基检测费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "设计费的金额是多少?",
        "answer": "金额是4042055949.4299998283"
    },
    {
        "question": "环境监测及环境保护验收费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "建设场地征用及清理费的金额是多少?",
        "answer": "金额是16831284.2287110016"
    },
    {
        "question": "施工图文件评审费的金额是多少?",
        "answer": "金额是24940.0000000000"
    },
    {
        "question": "项目后评价费的金额是多少?",
        "answer": "金额是421762204.8781780005"
    },
    {
        "question": "水土保持方案编审费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "勘察设计费的金额是多少?",
        "answer": "金额是16164210209.4300003052"
    },
    {
        "question": "前期工作管理费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "节能评估费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "初步设计文件评审费的金额是多少?",
        "answer": "金额是18560.0000000000"
    },
    {
        "question": "特种设备安全监测费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "初步设计文件评审费的金额是多少?",
        "answer": "金额是18560.0000000000"
    },
    {
        "question": "桩基检测费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "矿产压覆评估费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "设计费的金额是多少?",
        "answer": "金额是4042055949.4299998283"
    },
    {
        "question": "水土保持方案编审费用的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "电力工程技术经济标准编制费的金额是多少?",
        "answer": "金额是84352440.9756360054"
    },
    {
        "question": "桩基检测费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "矿产压覆评估费用的金额是多少?",
        "answer": "金额是0E-10"
    }
 ]
@@ -0,0 +1,202 @@
 [
    {
        "question": "新增项目名称的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "预制基础的合价是多少?",
        "answer": "合价是40567.2639480000"
    },
    {
        "question": "绝缘子串及金具安装的合价是多少?",
        "answer": "合价是2897171.9878110001"
    },
    {
        "question": "杆塔工程材料工地运输的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "基础防护的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "护坡、挡土墙及排洪沟土石方工程的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "新增项目名称的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "(1)拆除后能利用的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "地基处理的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "灌注桩基础的合价是多少?",
        "answer": "合价是43466660.0544390008"
    },
    {
        "question": "(1)拆除后能利用的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "悬垂绝缘子串及金具安装的合价是多少?",
        "answer": "合价是1251465.0340440001"
    },
    {
        "question": "护坡、挡土墙及排洪沟土石方工程的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "附件安装工程的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "导地线跨越架设的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "辅助工程的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "新增项目名称的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "绝缘子串及金具安装的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "护坡、挡土墙及排洪沟砌筑的合价是多少?",
        "answer": "合价是709931.9013930000"
    },
    {
        "question": "锚杆基础的合价是多少?",
        "answer": "合价是15344967.9002950005"
    },
    {
        "question": "建筑工程的合价是多少?",
        "answer": "合价是25411.2790780000"
    },
    {
        "question": "辅助工程的合价是多少?",
        "answer": "合价是1046253.4135240000"
    },
    {
        "question": "导地线跨越架设的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "电缆工程的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "输、送电线路试运的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "基础土石方工程的合价是多少?",
        "answer": "合价是32872843180.7429008484"
    },
    {
        "question": "基础永久性围堰的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "基础永久性围堰的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "混凝土及钢筋混凝土结构的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "输、送电线路试运的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "混合结构的合价是多少?",
        "answer": "合价是16967.5193850000"
    },
    {
        "question": "杆塔组立的合价是多少?",
        "answer": "合价是2253906.0859830002"
    },
    {
        "question": "附件安装工程的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "接地工程材料工地运输的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "新增项目名称的合价是多少?",
        "answer": "合价是27148.0310160000"
    },
    {
        "question": "导地线架设的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "护坡、挡土墙及排洪沟的合价是多少?",
        "answer": "合价是709931.9013930000"
    },
    {
        "question": "(1)拆除后能利用的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "基础永久性围堰砌筑的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "(2)拆除后不能利用的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "安装工程的合价是多少?",
        "answer": "合价是65324.9496330000"
    },
    {
        "question": "尖峰、施工基面土石方工程的合价是多少?",
        "answer": "合价是325205.4178770000"
    },
    {
        "question": "架线工程的合价是多少?",
        "answer": "合价是4844399648.0778598785"
    },
    {
        "question": "杆塔组立的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "架线工程材料工地运输的合价是多少?",
        "answer": "合价是2088570123.2409000397"
    },
    {
        "question": "导地线架设的合价是多少?",
        "answer": "合价是0E-10"
    },
    {
        "question": "耐张绝缘子串及金具安装的合价是多少?",
        "answer": "合价是1645706.9537680000"
    },
    {
        "question": "架线工程材料工地运输的合价是多少?",
        "answer": "合价是2088570123.2409000397"
    },
    {
        "question": "其他基础的合价是多少?",
        "answer": "合价是3839666.7656879998"
    },
    {
        "question": "架线工程材料工地运输的合价是多少?",
        "answer": "合价是0E-10"
    }
 ]
@@ -0,0 +1,202 @@
 [
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是440877984.9458540082"
    },
    {
        "question": "线路取费表（拆除）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是1086586.9018659999"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表（拆除）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是51486.7898090000"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是3321.8139230000"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是78005.0340730000"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是3535892767.0972299576"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是24045.2334060000"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是336253.7482950000"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是142270.1346780000"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是61049.8665780000"
    },
    {
        "question": "线路取费表（拆除）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是933061.7795919999"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是182949.5997350000"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表（余物清理）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表（拆除）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是21220645.1637400016"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是933061.7795919999"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是2501470269.7231497765"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是51486.7898090000"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是55265.9111100000"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是442897633.6273120046"
    },
    {
        "question": "线路取费表（拆除）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是1057484.3306960000"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是442897633.6273120046"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是21220645.1637400016"
    },
    {
        "question": "线路取费表（余物清理）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是336253.7482950000"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "的直接费是多少?",
        "answer": "直接费是61049.8665780000"
    },
    {
        "question": "线路取费表（余物清理）(1)的直接费是多少?",
        "answer": "直接费是61049.8665780000"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是24045.2334060000"
    },
    {
        "question": "线路取费表（拆除）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表（拆除）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表（余物清理）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表（拆除）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表（拆除）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是659466.5955000001"
    },
    {
        "question": "线路取费表（拆除）的直接费是多少?",
        "answer": "直接费是0E-10"
    },
    {
        "question": "线路取费表的直接费是多少?",
        "answer": "直接费是2501470269.7231497765"
    }
 ]
@@ -0,0 +1,202 @@
 [
    {
        "question": "降阻剂_数量的属性值是多少?",
        "answer": "属性值是f"
    },
    {
        "question": "导线2_单位单价的属性值是多少?",
        "answer": "属性值是9"
    },
    {
        "question": "导线_单公里用量的属性值是多少?",
        "answer": "属性值是36"
    },
    {
        "question": "线路参数_导地线防震措施的属性值是多少?",
        "answer": "属性值是457"
    },
    {
        "question": "合成绝缘子_数量的属性值是多少?",
        "answer": "属性值是5"
    },
    {
        "question": "基础垫层的属性值是多少?",
        "answer": "属性值是"
    },
    {
        "question": "其中：基础护壁用量的属性值是多少?",
        "answer": "属性值是74394.212"
    },
    {
        "question": "铺石加混凝土的属性值是多少?",
        "answer": "属性值是0.0"
    },
    {
        "question": "导线用量（西北）的属性值是多少?",
        "answer": "属性值是-795976.0855"
    },
    {
        "question": "导线单公里用量（西北）的属性值是多少?",
        "answer": "属性值是-159195.2171"
    },
    {
        "question": "灰土垫层单公里用量（西北）的属性值是多少?",
        "answer": "属性值是8.0"
    },
    {
        "question": "地线瓷绝缘子单公里用量（西北）的属性值是多少?",
        "answer": "属性值是738.253"
    },
    {
        "question": "地形条件_高山的属性值是多少?",
        "answer": "属性值是7"
    },
    {
        "question": "流砂坑比例的属性值是多少?",
        "answer": "属性值是0.001"
    },
    {
        "question": "碎石_数量的属性值是多少?",
        "answer": "属性值是12"
    },
    {
        "question": "线路参数_导地线防震措施的属性值是多少?",
        "answer": "属性值是457"
    },
    {
        "question": "灰土垫层的属性值是多少?",
        "answer": "属性值是40.0"
    },
    {
        "question": "交叉跨越_弱电线路的属性值是多少?",
        "answer": "属性值是45"
    },
    {
        "question": "地线1_根数的属性值是多少?",
        "answer": "属性值是12"
    },
    {
        "question": "土质比例_岩石（人凿）的属性值是多少?",
        "answer": "属性值是49"
    },
    {
        "question": "耐张混凝土杆基数的属性值是多少?",
        "answer": "属性值是26.0"
    },
    {
        "question": "设计单位的属性值是多少?",
        "answer": "属性值是3"
    },
    {
        "question": "接地钢的属性值是多少?",
        "answer": "属性值是"
    },
    {
        "question": "间隔棒_单公里用量的属性值是多少?",
        "answer": "属性值是r"
    },
    {
        "question": "导线其中：跳线和导线弧垂单公里用量（西北）的属性值是多少?",
        "answer": "属性值是159203.0171"
    },
    {
        "question": "桩基础的属性值是多少?",
        "answer": "属性值是310.0"
    },
    {
        "question": "降阻剂的属性值是多少?",
        "answer": "属性值是"
    },
    {
        "question": "可抵扣增值税（万元）的属性值是多少?",
        "answer": "属性值是2005241.808822"
    },
    {
        "question": "主要技术经济指标2的属性值是多少?",
        "answer": "属性值是"
    },
    {
        "question": "合成绝缘子_数量的属性值是多少?",
        "answer": "属性值是5"
    },
    {
        "question": "土质比例_水坑的属性值是多少?",
        "answer": "属性值是47"
    },
    {
        "question": "基础_插入式的属性值是多少?",
        "answer": "属性值是3"
    },
    {
        "question": "耐张角钢塔比例的属性值是多少?",
        "answer": "属性值是250%"
    },
    {
        "question": "地线的属性值是多少?",
        "answer": "属性值是"
    },
    {
        "question": "回路数的属性值是多少?",
        "answer": "属性值是三回"
    },
    {
        "question": "导线其中：跳线和导线弧垂用量的属性值是多少?",
        "answer": "属性值是796015.0855"
    },
    {
        "question": "OPGW用量（西北）的属性值是多少?",
        "answer": "属性值是2904.737"
    },
    {
        "question": "现浇混凝土_单公里用量的属性值是多少?",
        "answer": "属性值是22"
    },
    {
        "question": "架线工程费用（万元）（含价差）的属性值是多少?",
        "answer": "属性值是3203726.0"
    },
    {
        "question": "耐张钢管塔比例的属性值是多少?",
        "answer": "属性值是300%"
    },
    {
        "question": "单公里土石方量_基面的属性值是多少?",
        "answer": "属性值是8*8"
    },
    {
        "question": "地线2的属性值是多少?",
        "answer": "属性值是"
    },
    {
        "question": "降阻剂的属性值是多少?",
        "answer": "属性值是"
    },
    {
        "question": "土质比例的属性值是多少?",
        "answer": "属性值是"
    },
    {
        "question": "地线1_单位单价的属性值是多少?",
        "answer": "属性值是113"
    },
    {
        "question": "绝缘子串型式_悬垂串的属性值是多少?",
        "answer": "属性值是48"
    },
    {
        "question": "基坑土石方量（西北）的属性值是多少?",
        "answer": "属性值是405403506.156"
    },
    {
        "question": "基坑坚土的属性值是多少?",
        "answer": "属性值是25585167.713"
    },
    {
        "question": "基坑普通土的属性值是多少?",
        "answer": "属性值是313873965.334"
    },
    {
        "question": "瓷绝缘子单公里用量（西北）的属性值是多少?",
        "answer": "属性值是201.0"
    }
 ]
@@ -0,0 +1,202 @@
 [
    {
        "question": "电杆坑、塔坑、拉线坑人工挖方（或爆破）及回填 水坑 坑深2.0m以内的编码是多少?",
        "answer": "编码是YX2-72"
    },
    {
        "question": "钢筋加工及制作的编码是多少?",
        "answer": "编码是YX3-43"
    },
    {
        "question": "船舶运输 线材 每件重400kg以内 运输的编码是多少?",
        "answer": "编码是YX1-132"
    },
    {
        "question": "船舶运输 钢管塔材 运输的编码是多少?",
        "answer": "编码是YX1-152"
    },
    {
        "question": "碎石的编码是多少?",
        "answer": "编码是C10020103"
    },
    {
        "question": "混凝土（保护帽）的编码是多少?",
        "answer": "编码是ZH1001"
    },
    {
        "question": "船舶运输 金具、绝缘子、零星钢材 运输的编码是多少?",
        "answer": "编码是YX1-144"
    },
    {
        "question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
        "answer": "编码是YX1-1"
    },
    {
        "question": "船舶运输 线材 每件重1000kg以内 运输的编码是多少?",
        "answer": "编码是YX1-136"
    },
    {
        "question": "混凝土搅拌及浇制 每基基础联系梁混凝土量20m³以内的编码是多少?",
        "answer": "编码是YX3-69"
    },
    {
        "question": "索道运输 循环式 塔材 荷载1t以内 装卸的编码是多少?",
        "answer": "编码是YX1-185"
    },
    {
        "question": "人力运输 混凝土预制品 每件重100kg以内的编码是多少?",
        "answer": "编码是YX1-6"
    },
    {
        "question": "船舶运输 混凝土杆 每件重1500kg以上 运输的编码是多少?",
        "answer": "编码是YX1-118"
    },
    {
        "question": "碎石的编码是多少?",
        "answer": "编码是C10020103"
    },
    {
        "question": "电杆坑、塔坑、拉线坑人工挖方（或爆破）及回填 泥水 坑深8.0m以上的编码是多少?",
        "answer": "编码是YX2-55"
    },
    {
        "question": "机械施工土方 场地平整的编码是多少?",
        "answer": "编码是GT1-1"
    },
    {
        "question": "汽车运输 混凝土预制品 每件重100kg以内 装卸的编码是多少?",
        "answer": "编码是YX1-69"
    },
    {
        "question": "汽车运输 其他建筑安装材料 运输的编码是多少?",
        "answer": "编码是YX1-108"
    },
    {
        "question": "钻孔灌注桩基础 混凝土搅拌及浇制 孔深10m以内的编码是多少?",
        "answer": "编码是YX3-171"
    },
    {
        "question": "线路复测及分坑 直线双杆及拉线塔的编码是多少?",
        "answer": "编码是YX2-3"
    },
    {
        "question": "氧化锌避雷器安装 35kV的编码是多少?",
        "answer": "编码是YX7-32"
    },
    {
        "question": "混凝土（保护帽）的编码是多少?",
        "answer": "编码是ZH1002"
    },
    {
        "question": "汽车运输 其他建筑安装材料 装卸的编码是多少?",
        "answer": "编码是YX1-107"
    },
    {
        "question": "船舶运输 混凝土杆 每件重500kg以内 装卸的编码是多少?",
        "answer": "编码是YX1-109"
    },
    {
        "question": "混凝土（保护帽）的编码是多少?",
        "answer": "编码是ZH1001"
    },
    {
        "question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
        "answer": "编码是YX1-1"
    },
    {
        "question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
        "answer": "编码是YX1-1"
    },
    {
        "question": "普通硅酸盐水泥的编码是多少?",
        "answer": "编码是C09010102"
    },
    {
        "question": "拖拉机运输 钢管塔材 运输的编码是多少?",
        "answer": "编码是YX1-44"
    },
    {
        "question": "尖峰及施工基面挖方（或爆破） 普通土的编码是多少?",
        "answer": "编码是YX2-226"
    },
    {
        "question": "汽车运输 角钢塔材 装卸的编码是多少?",
        "answer": "编码是YX1-103"
    },
    {
        "question": "接地槽挖方（或爆破）及回填 普通土的编码是多少?",
        "answer": "编码是YX2-213"
    },
    {
        "question": "水的编码是多少?",
        "answer": "编码是C21010101"
    },
    {
        "question": "直线(直线换位、直线转角)杆塔绝缘子串悬挂安装 35kV 针式单联串(悬垂串)的编码是多少?",
        "answer": "编码是YX6-21"
    },
    {
        "question": "直线(直线换位、直线转角)杆塔绝缘子串悬挂安装 35kV I型双联串(悬垂串)的编码是多少?",
        "answer": "编码是YX6-22"
    },
    {
        "question": "钻孔灌注桩基础 机械推钻成孔 砂砾石 孔深20m以内 孔径1.0m以内的编码是多少?",
        "answer": "编码是YX3-117"
    },
    {
        "question": "线路复测及分坑 直线自立塔的编码是多少?",
        "answer": "编码是YX2-6"
    },
    {
        "question": "钻孔灌注桩基础 凿桩头 桩径0.8m以上的编码是多少?",
        "answer": "编码是YX3-180"
    },
    {
        "question": "线路复测及分坑 耐张（转角）单杆的编码是多少?",
        "answer": "编码是YX2-2"
    },
    {
        "question": "中砂的编码是多少?",
        "answer": "编码是C10010101"
    },
    {
        "question": "人力运输 混凝土杆 每件重500kg以内的编码是多少?",
        "answer": "编码是YX1-1"
    },
    {
        "question": "带电跨越电力线 被跨线电压等级 35kV的编码是多少?",
        "answer": "编码是YX5-186"
    },
    {
        "question": "人工挖土方 普土 深2m以内的编码是多少?",
        "answer": "编码是YT1-1"
    },
    {
        "question": "混凝土杆的编码是多少?",
        "answer": "编码是"
    },
    {
        "question": "接地模块安装的编码是多少?",
        "answer": "编码是YX3-213"
    },
    {
        "question": "拖拉机运输 线材 每件重400kg以内 运输的编码是多少?",
        "answer": "编码是YX1-34"
    },
    {
        "question": "拖拉机运输 其他建筑安装材料 装卸的编码是多少?",
        "answer": "编码是YX1-45"
    },
    {
        "question": "普通硅酸盐水泥的编码是多少?",
        "answer": "编码是C09010102"
    },
    {
        "question": "船舶运输 线材 每件重4000kg以内 装卸的编码是多少?",
        "answer": "编码是YX1-139"
    },
    {
        "question": "水的编码是多少?",
        "answer": "编码是C21010101"
    }
 ]
@@ -0,0 +1,202 @@
 [
    {
        "question": "架空输电线路本体工程的金额是多少?",
        "answer": "金额是55105688268.5176010132"
    },
    {
        "question": "价差预备费的金额是多少?",
        "answer": "金额是22731130869.6655998230"
    },
    {
        "question": "工程静态投资的金额是多少?",
        "answer": "金额是715035853336.3909912109"
    },
    {
        "question": "工程动态投资的金额是多少?",
        "answer": "金额是776282009093.5660400391"
    },
    {
        "question": "其中：工程建设检测费的金额是多少?",
        "answer": "金额是185575370.1463980079"
    },
    {
        "question": "工程静态投资的金额是多少?",
        "answer": "金额是715035853336.3909912109"
    },
    {
        "question": "建设期贷款利息的金额是多少?",
        "answer": "金额是38515024887.5095977783"
    },
    {
        "question": "特殊项目的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "动态费用的金额是多少?",
        "answer": "金额是61246155757.1752014160"
    },
    {
        "question": "动态费用的金额是多少?",
        "answer": "金额是61246155757.1752014160"
    },
    {
        "question": "小计的金额是多少?",
        "answer": "金额是458257942570.3129882812"
    },
    {
        "question": "其他费用的金额是多少?",
        "answer": "金额是210942912572.8689880371"
    },
    {
        "question": "基本预备费的金额是多少?",
        "answer": "金额是14020310849.7332000732"
    },
    {
        "question": "其中：水土保持监测及验收费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "其中：工程建设检测费的金额是多少?",
        "answer": "金额是185575370.1463980079"
    },
    {
        "question": "其中：特种设备安全监测费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "工程静态投资的金额是多少?",
        "answer": "金额是715035853336.3909912109"
    },
    {
        "question": "其中：水土保持监测及验收费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "架空输电线路本体工程的金额是多少?",
        "answer": "金额是55105688268.5176010132"
    },
    {
        "question": "基本预备费的金额是多少?",
        "answer": "金额是14020310849.7332000732"
    },
    {
        "question": "其中：水土保持监测及验收费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "小计的金额是多少?",
        "answer": "金额是458257942570.3129882812"
    },
    {
        "question": "编制基准期价差的金额是多少?",
        "answer": "金额是29246752707.1180000305"
    },
    {
        "question": "其中：水土保持监测及验收费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "小计的金额是多少?",
        "answer": "金额是458257942570.3129882812"
    },
    {
        "question": "其他费用的金额是多少?",
        "answer": "金额是210942912572.8689880371"
    },
    {
        "question": "特殊项目的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "编制基准期价差的金额是多少?",
        "answer": "金额是29246752707.1180000305"
    },
    {
        "question": "特殊项目的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "小计的金额是多少?",
        "answer": "金额是458257942570.3129882812"
    },
    {
        "question": "工程动态投资的金额是多少?",
        "answer": "金额是776282009093.5660400391"
    },
    {
        "question": "其中：建设场地征用及清理费的金额是多少?",
        "answer": "金额是16831284.2287110016"
    },
    {
        "question": "其中：可抵扣增值税额的金额是多少?",
        "answer": "金额是20069645492.2888984680"
    },
    {
        "question": "小计的金额是多少?",
        "answer": "金额是458257942570.3129882812"
    },
    {
        "question": "动态费用的金额是多少?",
        "answer": "金额是61246155757.1752014160"
    },
    {
        "question": "建设期贷款利息的金额是多少?",
        "answer": "金额是38515024887.5095977783"
    },
    {
        "question": "工程静态投资的金额是多少?",
        "answer": "金额是715035853336.3909912109"
    },
    {
        "question": "其中：建设场地征用及清理费的金额是多少?",
        "answer": "金额是16831284.2287110016"
    },
    {
        "question": "建设期贷款利息的金额是多少?",
        "answer": "金额是38515024887.5095977783"
    },
    {
        "question": "工程动态投资的金额是多少?",
        "answer": "金额是776282009093.5660400391"
    },
    {
        "question": "架空输电线路本体工程的金额是多少?",
        "answer": "金额是55105688268.5176010132"
    },
    {
        "question": "其中：工程建设检测费的金额是多少?",
        "answer": "金额是185575370.1463980079"
    },
    {
        "question": "其中：水土保持监测及验收费的金额是多少?",
        "answer": "金额是0E-10"
    },
    {
        "question": "工程动态投资的金额是多少?",
        "answer": "金额是776282009093.5660400391"
    },
    {
        "question": "其中：可抵扣增值税额的金额是多少?",
        "answer": "金额是20069645492.2888984680"
    },
    {
        "question": "价差预备费的金额是多少?",
        "answer": "金额是22731130869.6655998230"
    },
    {
        "question": "一般线路本体工程的金额是多少?",
        "answer": "金额是55105688268.5176010132"
    },
    {
        "question": "其中：工程建设检测费的金额是多少?",
        "answer": "金额是185575370.1463980079"
    },
    {
        "question": "基本预备费的金额是多少?",
        "answer": "金额是14020310849.7332000732"
    },
    {
        "question": "设备购置费的金额是多少?",
        "answer": "金额是2567934636.3574500084"
    }
 ]
@@ -0,0 +1,19 @@
 import chromadb
 # 创建 ChromaDB 客户端
 chroma_client = chromadb.PersistentClient(path="/home/bw/ctr/zjdataai-app/backend/storage_vector-1/")
 # 获取已存在的 "default" 集合
 collection = chroma_client.get_collection(name="default")
 # 获取集合中的所有数据
 results = collection.get(
    include=['documents', 'metadatas', 'embeddings']  # 只包含允许的选项
 )
 # 将结果转换为字符串并保存到txt文件中
 with open('/home/bw/ctr/zjdataai-app/backend/test1/query_results-1.txt', 'w', encoding='utf-8') as file:
    file.write(str(results))
 # 打印结果
 print("查询结果已保存到 query_results.txt 文件中。")
@@ -0,0 +1,28 @@
 []错误问题: 税金的费率是多少?
 正确答案: 9.0000000000
 查询结果: 9
 错误问题: 冬雨季施工增加费的费率是多少?
 正确答案: 3.5700000000
 查询结果: 未找到有效数字
 错误问题: 住房公积金的费率是多少?
 正确答案: 15.0000000000
 查询结果: 15
 错误问题: 税金的费率是多少?
 正确答案: 9.0000000000
 查询结果: 9
 错误问题: 冬雨季施工增加费的费率是多少?
 正确答案: 3.5700000000
 查询结果: 3
 错误问题: 税金的费率是多少?
 正确答案: 9.0000000000
 查询结果: 9
 错误问题: 冬雨季施工增加费的费率是多少?
 正确答案: 3.5700000000
 查询结果: 未找到有效数字
@@ -0,0 +1 @@
@@ -0,0 +1,12 @@
 TOP_K: 5
 LLM_TEMPERATURE: 0.1
 similarity_top_k: 5.0
 问题: 税金的费率是多少?
 查询结果: SQL查询结果: 税金的费率是9.0%。请注意，查询结果中有多条重复的记录，但费率都是相同的，为9.0%。
 正确答案: 9.0000000000
 问题: 冬雨季施工增加费的费率是多少?
 查询结果: SQL查询结果: 对于"冬雨季施工增加费"的费率，当前数据库中没有找到具体信息。这可能是因为费率会根据不同的项目、地区或时间有所变化。建议您查阅最新的项目文件或与项目负责人联系以获取准确的费率信息。
 正确答案: 3.5700000000
@@ -0,0 +1,200 @@
 import re
 import os
 import sys
 import json
 from sqlalchemy import create_engine
 from llama_index.core import VectorStoreIndex, SQLDatabase
 from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
 from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
 from app.api.routers.chat import generate_filters
 from app.engine import get_index, makeDescriptionByEngine
 from app.engine.loaders.db import CustomDatabaseReader
 from app.engine.vectordb import get_vector_store
 from app.observability import init_observability
 from app.settings import init_settings
 from dotenv import load_dotenv
 load_dotenv()
 def read_questions_and_answers(file_path):
    questions_and_answers = []
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)  # 读取 JSON 数据
        for entry in data:
            question = entry.get("question", "").strip()  # 获取 question
            answer = entry.get("answer", "").strip()  # 直接获取 answer 而不是提取数字
            if question and answer:
                questions_and_answers.append((question, answer))
    return questions_and_answers
 def save_results_to_file(question, query_result, correct_answer, file_path):
    # 保存原始查询结果
    result_data = {
        "问题": question,
        "查询结果": str(query_result),  # 保存原始查询结果
        "正确答案": correct_answer
    }
    with open(file_path, 'a', encoding='utf-8') as file:
        json.dump(result_data, file, ensure_ascii=False)
        file.write('\n')  # 每个结果条目之间添加换行符
 def log_incorrect_answers(question, correct_answer, query_result, log_file_path):
    # 保存原始查询结果
    incorrect_data = {
        "错误问题": question,
        "正确答案": correct_answer,
        "查询结果": str(query_result)  # 保存原始查询结果
    }
    with open(log_file_path, 'a', encoding='utf-8') as file:
        json.dump(incorrect_data, file, ensure_ascii=False)
        file.write('\n')  # 每个结果条目之间添加换行符
 # 提取多个数字
 def extract_all_numbers_from_result(result_str):
    """从查询结果字符串中提取所有数字"""
    # 使用正则表达式匹配所有数值（包含小数和科学计数法）
    numbers = re.findall(r"-?\d+,\d+(\.\d+)?|0E-\d+|\d+(\.\d+)?", result_str)
    # 移除逗号并返回所有数字的列表
    return [num.replace(',', '') for num in numbers]
 # 判断两个浮点数是否接近
 def is_close_enough(val1, val2, epsilon=1e-5):
    """判断两个数值是否在指定的误差范围内接近"""
    return abs(val1 - val2) < epsilon
 def is_answer_correct(query_result_str, correct_answer_str):
    """检查查询结果是否与正确答案匹配"""
    # 提取查询结果中的数字或编码
    query_result_value = extract_number_or_code_from_result(query_result_str)
    # 提取正确答案中的数字或编码
    correct_answer_value = extract_number_or_code_from_result(correct_answer_str)
    # 对比提取的数字或编码
    if query_result_value and correct_answer_value:
        try:
            # 移除逗号，并转换为浮点数
            query_result_float = float(query_result_value.replace(',', ''))
            correct_answer_float = float(correct_answer_value.replace(',', ''))
            # 处理科学计数法中的零值
            if query_result_float == 0.0 and correct_answer_float == 0.0:
                return True
            # 四舍五入处理到小数点后5位
            rounded_query_result = round(query_result_float, 5)
            rounded_correct_answer = round(correct_answer_float, 5)
            # 比较四舍五入后的浮点数值
            return rounded_query_result == rounded_correct_answer
        except ValueError:
            # 如果无法转换为浮点数，则直接比较字符串
            return query_result_value == correct_answer_value
    return False  # 如果任何一方为空，则认为不匹配
 def extract_number_or_code_from_result(result_str):
    """从查询结果字符串中提取数字或编码，并处理逗号、百分号和科学计数法"""
    # 使用正则表达式匹配浮点数，包括可能的多位小数、逗号、百分比形式和科学计数法
    match = re.search(r"(\d{1,3}(,\d{3})*(\.\d+)?|0E-\d+)", result_str)
    if match:
        number_str = match.group(1).replace(',', '').replace('%', '')  # 移除逗号和百分号
        return number_str
    # 尝试从结果中提取所有可能的编码格式
    potential_codes = re.findall(r"\b[A-Z][A-Za-z\d-]+\b", result_str)
    # 返回第一个匹配的编码
    return potential_codes[0] if potential_codes else None
 def main(questions_file, query_type):
    # 获取脚本所在的目录
    script_dir = os.path.dirname(os.path.abspath(__file__))
    # 将文件扩展名更改为 .json
    questions_file_path = os.path.join(script_dir, questions_file)
    results_file_path = os.path.join(script_dir, "query_results.json")
    log_file_path = os.path.join(script_dir, "incorrect_answers_log.json")
    # 如果 .json 文件不存在，则生成一个空的 JSON 文件
    if not os.path.exists(questions_file_path):
        with open(questions_file_path, 'w', encoding='utf-8') as file:
            json.dump([], file)  # 写入空数组
    # 更新环境变量
    os.environ['TOP_K'] = str(5)  # 向量的TOP_K值
    os.environ['LLM_TEMPERATURE'] = str(0.1)  # 温度值
    os.environ['similarity_top_k'] = str(5)  # SQL的TOP_K值
    init_settings()
    init_observability()
    index = get_index()
    top_k = int(os.getenv("TOP_K"))  # 向量的TOP_K值
    temperature = float(os.getenv("LLM_TEMPERATURE"))  # 温度值
    similarity_top_k = int(os.getenv("similarity_top_k"))  # SQL的TOP_K值
    filters = generate_filters([])
    engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
    sql_database = SQLDatabase(engine)
    table_schema_objs = makeDescriptionByEngine(sql_database)
    table_node_mapping = SQLTableNodeMapping(sql_database)
    # 创建SQL查询工具
    sql_obj_index = ObjectIndex.from_objects(
        table_schema_objs,
        table_node_mapping,
        index_cls=VectorStoreIndex,
    )
    sql_query_engine = SQLTableRetrieverQueryEngine(sql_database,
                                                    sql_obj_index.as_retriever(similarity_top_k=similarity_top_k))
    questions_and_answers = read_questions_and_answers(questions_file_path)
    # 如果文件为空，则写入参数值
    if os.path.getsize(results_file_path) == 0:
        with open(results_file_path, 'w', encoding='utf-8') as file:
            json.dump({
                "TOP_K": top_k,
                "LLM_TEMPERATURE": temperature,
                "similarity_top_k": similarity_top_k
            }, file, ensure_ascii=False)
            file.write('\n')
    # 循环执行查询
    for i, (question, correct_answer) in enumerate(questions_and_answers):
        print(f"执行查询 {i+1}: {question}")
        if query_type == "vector":
            query_engine = index.as_query_engine(
                similarity_top_k=top_k, filters=filters
            )
            query_result = query_engine.query(question)
            print(f"向量查询结果: {query_result}\n")
            # 提取向量查询结果中的数字或编码进行匹配
            query_result_str = f"The encoding for the query \"{question}\" is {str(query_result)}"
        elif query_type == "sql":
            sql_query_result = sql_query_engine.query(question)
            print(f"SQL查询结果: {sql_query_result}\n")
            # 提取SQL查询结果中的数字或编码进行匹配
            query_result_str = f"The encoding for the query \"{question}\" is {str(sql_query_result)}"
        else:
            print("无效的查询类型，请选择 'vector' 或 'sql'")
            sys.exit(1)
        if is_answer_correct(query_result_str, correct_answer):
            # 只在查询结果正确时记录结果
            save_results_to_file(question, query_result_str, correct_answer, results_file_path)
        else:
            # 记录不正确的答案
            log_incorrect_answers(question, correct_answer, query_result_str, log_file_path)
 if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("请提供questions.json文件名和查询类型（vector 或 sql）")
        sys.exit(1)
    questions_file = sys.argv[1]
    query_type = sys.argv[2].lower()
    main(questions_file, query_type)
@@ -0,0 +1,82 @@
 import os
 import json
 from sqlalchemy import create_engine, MetaData, Table, func
 from sqlalchemy.orm import sessionmaker
 from dotenv import load_dotenv
 load_dotenv()
 def generate_questions_for_table(table_name, file_path, num_questions=50):
    engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
    metadata = MetaData()
    metadata.reflect(bind=engine)
    # 定义每张表的列索引、值列和问题模板
    tables_info = {
        "ProjectProperties": (0, "Value", "{name_value}的属性值是多少?", "属性值是{answer_value}"),
        "OtherFee": (3, "Amount", "{name_value}的金额是多少?", "金额是{answer_value}"),
        "FeeCollectionTable": (1, "Rate", "{name_value}的费率是多少?", "费率是{answer_value}"),
        "ProjectDivision": (5, "Sum_Price", "{name_value}的合价是多少?", "合价是{answer_value}"),
        "ProjectDivisions_CostPreview": (4, "Direct_Cost", "{name_value}的直接费是多少?", "直接费是{answer_value}"),
        "TotalCalculateTable": (3, "Amount", "{name_value}的金额是多少?", "金额是{answer_value}"),
        "ProjectQuantities": (6, "Encoding", "{name_value}的编码是多少?", "编码是{answer_value}")
    }
    if table_name not in tables_info:
        print(f"未找到表 {table_name} 的配置信息")
        return
    # 获取表信息
    name_index, value_column, question_template, answer_template = tables_info[table_name]
    # 加载表
    table = Table(table_name, metadata, autoload_with=engine)
    # 创建会话
    Session = sessionmaker(bind=engine)
    session = Session()
    # 获取列名
    name_column = table.columns.keys()[name_index]
    questions_and_answers = []
    # 生成指定数量的问题
    for _ in range(num_questions):
        # 查询表中的随机一行，并获取名称列和值列的值
        row = session.query(table).order_by(func.random()).first()
        name_value = getattr(row, name_column)
        answer_value = getattr(row, value_column)
        # 构造问题和答案
        question = question_template.format(name_value=name_value)
        answer = answer_template.format(answer_value=answer_value)
        # 添加到列表中
        questions_and_answers.append({
            "question": question,
            "answer": answer
        })
    # 将问题和答案以 JSON 格式写入文件
    with open(file_path, 'w', encoding='utf-8') as file:
        json.dump(questions_and_answers, file, ensure_ascii=False, indent=4)
    print(f"已生成表 {table_name} 的问题到文件: {file_path}")
 def main():
    engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
    metadata = MetaData()
    metadata.reflect(bind=engine)
    # 获取脚本所在目录
    script_dir = os.path.dirname(os.path.abspath(__file__))
    # 遍历每张表并生成对应的 JSON 文件
    for table_name in metadata.tables.keys():
        # 文件路径为：脚本目录 + 表名 + .json
        file_path = os.path.join(script_dir, f"{table_name}.json")
        generate_questions_for_table(table_name, file_path)
 if __name__ == "__main__":
    main()
@@ -0,0 +1,14 @@
 question：线路参数_转角次数的属性值是多少?     answer：线路参数_转角次数的属性值是64
 question：接地土石方量的属性值是多少?      answer：接地土石方量的属性值是16
 question：工程监理费的金额是多少?      answer：工程监理费的金额是131009.92
 question：矿产压覆评估费用的金额是多少?      answer：矿产压覆评估费用的金额是0
 question：线路取费表（余物清理）的费率是多少?      answer：线路取费表（余物清理）的费率是100
 question：线路取费表（拆除）的费率是多少?      answer：线路取费表（拆除）的费率是100
 question：一般线路本体工程的合价是多少?      answer：一般线路本体工程的合价是55105688268.5176
 question：基础工程的合价是多少?      answer：基础工程的合价是49051649642.9667
 question：线路取费表(调试工程)aa的直接费是多少?      answer：线路取费表(调试工程)aa的直接费是22411207942.4858
 question：线路取费表的直接费是多少?       answer：线路取费表的直接费是7314300665.34141
 question：一般线路本体工程的金额是多少?       answer：一般线路本体工程的金额是55105688268.5176
 question：架空输电线路本体工程的金额是多少?      answer：架空输电线路本体工程的金额是55105688268.5176
 question：截止阀的编码是多少?      answer：截止阀的编码是F01010101
 question：自定义主材的编码是多少?      answer：自定义主材的编码是asd
@@ -0,0 +1,110 @@
 import os
 import json
 import sys
 from llama_index.core import VectorStoreIndex, SQLDatabase
 from llama_index.core.indices.struct_store import SQLTableRetrieverQueryEngine
 from llama_index.core.objects import SQLTableNodeMapping, ObjectIndex
 from sqlalchemy import create_engine
 from app.api.routers.chat import generate_filters
 from app.engine import get_index, makeDescriptionByEngine
 from app.engine.vectordb import get_vector_store
 from app.observability import init_observability
 from app.settings import init_settings
 from dotenv import load_dotenv
 load_dotenv()
 def read_questions(file_path):
    with open(file_path, 'r', encoding='utf-8') as file:
        data = json.load(file)
    questions = [item["question"] for item in data]
    return questions
 def save_results_to_file(question, result, file_path):
    result_data = {
        "question": question,
        "result": result
    }
    with open(file_path, 'a', encoding='utf-8') as file:
        json.dump(result_data, file, ensure_ascii=False)
        file.write('\n')
 def main(questions_file, query_type):
    # 更新环境变量
    os.environ['TOP_K'] = str(5)  # 向量的TOP_K值
    os.environ['similarity_top_k'] = str(1)  # SQL的TOP_K值固定为1
    init_settings()
    init_observability()
    index = get_index()
    top_k = int(os.getenv("TOP_K"))  # 向量的TOP_K值
    similarity_top_k = int(os.getenv("similarity_top_k"))  # SQL的TOP_K值
    filters = generate_filters([])
    engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
    sql_database = SQLDatabase(engine)
    table_schema_objs = makeDescriptionByEngine(sql_database)
    table_node_mapping = SQLTableNodeMapping(sql_database)
    # 创建SQL查询工具
    sql_obj_index = ObjectIndex.from_objects(
        table_schema_objs,
        table_node_mapping,
        index_cls=VectorStoreIndex,
    )
    sql_query_engine = SQLTableRetrieverQueryEngine(sql_database,
                                                    sql_obj_index.as_retriever(similarity_top_k=similarity_top_k))
    script_dir = os.path.dirname(os.path.abspath(__file__))
    questions_file_path = os.path.join(script_dir, questions_file)
    results_file_path = os.path.join(script_dir, "parameters_results.json")
    questions = read_questions(questions_file_path)
    # # 如果文件为空，则写入参数值
    # if not os.path.isfile(results_file_path):
    #     with open(results_file_path, 'w', encoding='utf-8') as file:
    #         json.dump({
    #             "TOP_K": top_k,
    #             "similarity_top_k": similarity_top_k
    #         }, file, ensure_ascii=False)
    #         file.write('\n')
    # 循环执行查询
    for i, question in enumerate(questions):
        print(f"Executing query {i+1}: {question}")
        # 对于每个问题，测试不同的温度值
        for temperature in range(1, 11):  # 从1到10
            temperature_value = temperature / 10.0  # 从0.1到1.0
            os.environ['LLM_TEMPERATURE'] = str(temperature_value)
            if query_type == "vector":
                query_engine = index.as_query_engine(
                    similarity_top_k=top_k, filters=filters
                )
                query_result = query_engine.query(question)
                print(f"Vector Query Result: {query_result}\n")
                save_results_to_file(question, f"Current parameters: TOP_K={top_k}, similarity_top_k={similarity_top_k}, Temperature: {temperature_value:.1f}, Vector Query Result: {query_result}", results_file_path)
            elif query_type == "sql":
                sql_query_result = sql_query_engine.query(question)
                print(f"SQL Query Result: {sql_query_result}\n")
                save_results_to_file(question, f"Current parameters: TOP_K={top_k}, similarity_top_k={similarity_top_k}, Temperature: {temperature_value:.1f}, SQL Query Result: {sql_query_result}", results_file_path)
            else:
                print("无效的查询类型，请选择 'vector' 或 'sql'")
                sys.exit(1)
 if __name__ == "__main__":
    if len(sys.argv) < 3:
        print("请提供questions.json文件的路径和查询类型（vector 或 sql）")
        sys.exit(1)
    questions_file = sys.argv[1]
    query_type = sys.argv[2].lower()
    from phoenix.trace import using_project
    with using_project(questions_file) as obj:
        main(questions_file, query_type)
@@ -19,14 +19,13 @@ def main():
    init_settings()
    init_observability()
-    indexs = get_index()
+    index = get_index()
    if len(indexs) > 0:
        index = list(indexs.values())[0]
    top_k = 5
    filters = generate_filters([])
    #question = "从工程属性表中查找工程名称"
-    question = "总算表中名称等于架空输电线路本体工程的金额?"
+    #question = "总算表中名称等于架空输电线路本体工程的金额?"
    question = "工程监理费的金额是多少？"
    # 创建向量检索查询工具
    query_engine = index.as_query_engine(
        similarity_top_k=top_k, filters=filters
@@ -37,18 +36,20 @@ def main():
    engine = create_engine(os.getenv("SQL_DATABASE_URL", ""))
    sql_database = SQLDatabase(engine)
    loader = CustomDatabaseReader(sql_database)
    documents = loader.load_data(query="select * from ProjectProperties")
    table_schema_objs = makeDescriptionByEngine(sql_database)
    table_node_mapping = SQLTableNodeMapping(sql_database)
    vectorIndex = VectorStoreIndex()
    # 创建SQL查询工具
-    sql_obj_index = ObjectIndex.from_objects(
+    # sql_obj_index = ObjectIndex.from_objects(
    #     table_schema_objs,
    #     table_node_mapping,
    #     index_cls=VectorStoreIndex,
    # )
    sql_obj_index = ObjectIndex.from_objects_and_index(
        table_schema_objs,
        vectorIndex,
        table_node_mapping,
        index_cls=VectorStoreIndex,
    )
    query_result =vectorIndex.as_query_engine(
@@ -0,0 +1,10 @@
 # The backend API for chat endpoint.
 #NEXT_PUBLIC_CHAT_API=http://localhost:8000/api/chat
 NEXT_PUBLIC_CHAT_API=http://10.1.6.41:8000/api/chat
 #PHOENIX_SERVER_URL=http://localhost:6006/
 PHOENIX_SERVER_URL=http://10.1.6.41:6006/
 # Let's the user change indexes in LlamaCloud projects
 NEXT_PUBLIC_USE_LLAMACLOUD=false
@@ -4,7 +4,7 @@ const phoenixUrl = process.env.PHOENIX_SERVER_URL;
 export default function Header() {
  return (
-    <div className="z-10 max-w-5xl w-full items-center justify-between font-mono text-sm lg:flex">
+    <div className="z-10  w-full items-center justify-between font-mono text-sm lg:flex">
      <p className="fixed left-0 top-0 flex w-full justify-center border-b border-gray-300 bg-gradient-to-b from-zinc-200 pb-6 pt-8 backdrop-blur-2xl dark:border-neutral-800 dark:bg-zinc-800/30 dark:from-inherit lg:static lg:w-auto  lg:rounded-xl lg:border lg:bg-gray-200 lg:p-4 lg:dark:bg-zinc-800/30">
        <code className="font-mono font-bold"><a href="javascript:location.reload();">清空当前会话</a></code>
      </p>
@@ -99,9 +99,8 @@ export default function ChatInput(
        </div>
      )}
      <div className="flex w-full items-start justify-between gap-4 ">
-        <textarea
+        <Input
          autoFocus
          rows={2}
          name="message"
          placeholder="请输入消息"
          className="flex-1"
@@ -127,9 +127,26 @@ function NodeInfo({ nodeInfo }: { nodeInfo: NodeInfo }) {
  }
  // node generated by unknown loader, implement renderer by analyzing logged out metadata
  // return (
  //   <p>
  //     对不起, 未知文件类型. 无法打开当前的来源文件。
  //   </p>
  // );
  return (
-    <p>
+    <div className="flex items-center my-2">
-      对不起, 未知文件类型. 无法打开当前的来源文件。
+        <span>{nodeInfo.text}</span>
-    </p>
+      <Button
        onClick={() => copyToClipboard(nodeInfo.url!)}
        size="icon"
        variant="ghost"
        className="h-12 w-12 shrink-0"
      >
        {isCopied ? (
          <Check className="h-4 w-4" />
        ) : (
          <Copy className="h-4 w-4" />
        )}
      </Button>
    </div>
  );
 }
@@ -10,7 +10,7 @@ export interface ChatHandler {
      data?: any;
    },
  ) => void;
-  handleInputChange: (e: React.ChangeEvent<HTMLTextAreaElement>) => void;
+  handleInputChange: (e: React.ChangeEvent<HTMLInputElement>) => void;
  reload?: () => void;
  stop?: () => void;
  onFileUpload?: (file: File) => Promise<void>;
@@ -0,0 +1,3 @@
 ENV_PHOENIX_HOST=0.0.0.0
 ENV_PHOENIX_PORT=6006
 PHOENIX_HOST_ROOT_PATH=./.phoenix/
@@ -2,4 +2,4 @@ SET ENV_PHOENIX_HOST=0.0.0.0
 SET ENV_PHOENIX_PORT=6006
 SET PHOENIX_HOST_ROOT_PATH=./.phoenix/
-C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-pCyqx0Uo-py3.11\Scripts\python phoenixserver.py
+python phoenixserver.py
Author	SHA1	Message	Date
chentianrui	a200e8adfc	优化了提示词	2024-08-23 18:35:19 +08:00
chentianrui	7691b22274	将项目划分表按照业务拆分	2024-08-23 15:07:26 +08:00
chentianrui	d1117c73c4	将项目划分表按照业务拆分	2024-08-23 15:05:48 +08:00
chentianrui	5fc8375a06	Merge branch 'dev' of https://git.97id.com/ly/zjdataai-app into dev	2024-08-23 08:55:54 +08:00
chentianrui	cf1ed4e71d	解决输出频繁出现'''的问题	2024-08-23 08:53:13 +08:00
ly	8050551a53	调整创建SQL引擎函数名称	2024-08-22 21:21:37 +08:00
ly	513ce73190	Merge branch 'dev' of https://git.97id.com/ly/zjdataai-app into dev	2024-08-22 21:18:37 +08:00
chentianrui	48d10fd1f3	修复了重排的参数问题	2024-08-22 19:40:56 +08:00
ly	9cbe414a0c	调整参数顺序	2024-08-22 17:10:32 +08:00
chentianrui	4c1c67aa50	增加开启了混合检索	2024-08-22 17:06:28 +08:00
chentianrui	59ef831a41	修改了提示词	2024-08-22 16:36:37 +08:00
ly	3ceb30c375	修复缺陷	2024-08-22 16:17:10 +08:00
ly	e71da586e3	修复缺陷	2024-08-22 16:02:07 +08:00
ly	b3a575d158	调整代码结构，同时修改重定义提示词的方式。	2024-08-22 15:39:49 +08:00
chentianrui	db006985d7	修改了提示词，约束模型回答	2024-08-22 15:24:29 +08:00
wanyaokun	870af69189	新增包依赖	2024-08-22 12:09:15 +08:00
wanyaokun	3460b8410e	新增关键字缓存路径	2024-08-22 12:06:43 +08:00
wanyaokun	586bb76c9c	新增关键字检索缓存路径	2024-08-22 11:09:16 +08:00
wanyaokun	8d7190d0b6	新增关键字检索类	2024-08-22 11:07:23 +08:00
wanyaokun	043aea6cca	新增自定义关键词检索类	2024-08-22 11:06:22 +08:00
wanyaokun	f5d6eb6a22	修改重排开关失效问题	2024-08-22 09:32:19 +08:00
wanyaokun	6e473499b8	新增混合检索	2024-08-21 19:30:43 +08:00
wanyaokun	1ec122c852	修改配置	2024-08-21 17:48:16 +08:00
wanyaokun	aadbcbf15f	新增库依赖	2024-08-21 17:45:38 +08:00
wanyaokun	0d71447687	修改数组溢出问题	2024-08-20 18:53:19 +08:00
ly	59488ae459	停止使用历史消息	2024-08-19 17:23:43 +08:00
ly	9eb8142a0b	修改导入关系	2024-08-19 17:23:01 +08:00
ly	d52174532e	调整TOP_K参数值	2024-08-19 17:22:38 +08:00
ly	a82a80c56e	Merge remote-tracking branch 'origin/dev' into dev	2024-08-19 15:52:58 +08:00
ly	bbe3fd0b0b	修改XinferenceRerank类增加最多N条知识和最小匹配度过滤支持	2024-08-19 15:39:28 +08:00
ly	8366cd8f2f	修改JSON格式的表信息中字段英文单词错误问题	2024-08-19 15:36:45 +08:00
wanyaokun	c9726fbd40	更新Json文件	2024-08-19 15:36:42 +08:00
ly	92fe3c5959	调整生成数据时项目划分表的条目项和层级	2024-08-19 15:36:08 +08:00
ly	cef574818a	改进rerank效果	2024-08-19 10:24:07 +08:00
ly	240ae8e72a	改进rerank效果	2024-08-19 10:08:16 +08:00
ly	22c51218b3	改进rerank效果	2024-08-19 10:03:46 +08:00
ly	806b694b37	修复获取节点后处理机制BUG	2024-08-19 09:14:54 +08:00
ly	26ecb256ce	Merge remote-tracking branch 'origin/dev' into dev	2024-08-19 09:07:29 +08:00
ly	3e2bdea196	修复自定义JSON文件加载支持BUG	2024-08-19 09:06:12 +08:00
ly	176b49983a	调整测试代码	2024-08-19 08:59:45 +08:00
ly	2942730c9a	增加对Rerank功能支持	2024-08-19 08:59:08 +08:00
ly	8d4382376f	由于基于历史消息的提示词没有调整好，所以暂时屏蔽历史消息	2024-08-19 08:58:08 +08:00
ly	0f6d76ddbe	增加XinferenceRerank	2024-08-19 08:27:22 +08:00
ly	01c815a17b	调整入口代码结构	2024-08-19 08:26:13 +08:00
chentianrui	a9b5dc94fe	Add new files and update existing files	2024-08-16 19:02:06 +08:00
chentianrui	aa2cecc997	Add new files and update existing files	2024-08-16 14:29:35 +08:00
chentianrui	ae7e21768b	Add new files and update existing files	2024-08-16 11:17:27 +08:00
chentianrui	3082ac5f3d	Add new files and update existing files	2024-08-15 19:08:58 +08:00
ly	d3df62f454	增加xinference的配置支持	2024-08-14 08:52:56 +08:00
ly	1bfb28c40c	增加对xinference的支持。	2024-08-14 08:51:51 +08:00
ly	092d9705a7	增加对XinferenceEmbedding的支持，临死放到这里。	2024-08-14 08:50:19 +08:00
ly	4d314c9714	实现进入页面后的提示问题功能。	2024-08-13 13:57:09 +08:00
ly	5b90e0a03e	修改代码检查问题	2024-08-13 13:49:06 +08:00
ly	9fdf3286d9	调整GIT提交忽略文件范围，增加对IDEA的排除和对生成后数据的排除。	2024-08-13 13:32:31 +08:00
ly	c4062fbf48	修改关联提问提示文本为中文，同时增加上提问内容现定于知识库现有内容的要求。	2024-08-13 13:31:32 +08:00
ly	de8673059c	还原原来的将本次回答检索到的知识库返回给客户端功能；修改关联提问提示文本为中文，同时增加上提问内容现定于知识库现有内容的要求。	2024-08-13 13:30:57 +08:00
ly	acf649beb2	还原原来的单行文本输入框	2024-08-13 13:29:33 +08:00
ly	9b3dfbbee4	调整前端页面右侧链接实现右对齐	2024-08-13 13:29:10 +08:00
ly	75f6f45b24	去除批处理中的python绝对路径	2024-08-13 13:28:10 +08:00
ly	abbd116d25	增加环境配置示例文件	2024-08-13 13:27:17 +08:00
ly	20a6ad14a8	增加前端和后端依赖库版本锁定	2024-08-13 13:26:52 +08:00
		`@@ -0,0 +1 @@`
							`STORAGE_DIR = "storage" # directory to cache the generated index`
		`@@ -1 +1 @@`
			`C:\Users\liuyue\AppData\Local\pypoetry\Cache\virtualenvs\app-laEO4lY0-py3.11\Scripts\python main.py`				`python main.py`