diff --git a/backend/.env.xinference b/backend/.env.xinference
new file mode 100644
index 0000000..71bdc2b
--- /dev/null
+++ b/backend/.env.xinference
@@ -0,0 +1,103 @@
+# The Llama Cloud API key.
+# LLAMA_CLOUD_API_KEY=
+SQL_DATABASE_URL=mysql+pymysql://zjinfo1:Dy2Bcr53Hm5xRkba@110.42.234.166:3306/zjinfo1
+#SQL_DATABASE_URL=mysql+pymysql://zjinfo2:GSKcziSdBixDXwcd@110.42.234.166:3306/zjinfo2
+
+#----------   Xinference    ----------------
+# The provider for the AI models to use.
+MODEL_PROVIDER=xinference
+# The OpenAI API key to use.
+OPENAI_API_KEY=xinference
+BASE_URL=http://10.1.0.142:9995
+MODEL=Qwen2-72B-Instruct-GPTQ-Int8
+# Temperature for sampling from the model.
+LLM_TEMPERATURE=0.1
+# Maximum number of tokens to generate.
+#LLM_MAX_TOKENS=
+# Name of the embedding model to use.
+EMBEDDING_MODEL=bge-m3
+EMBEDDING_BASE_URL=http://10.1.16.39:9995
+# Dimension of the embedding model to use.
+EMBEDDING_DIM=1024
+##----------   OpenAI    ----------------
+## The provider for the AI models to use.
+#MODEL_PROVIDER=openai
+## The OpenAI API key to use.
+#OPENAI_API_KEY=xinference
+#BASE_URL=http://10.1.0.142:9995/v1
+#MODEL=Qwen2-72B-Instruct-GPTQ-Int4
+## Temperature for sampling from the model.
+#LLM_TEMPERATURE=0.1
+## Maximum number of tokens to generate.
+##LLM_MAX_TOKENS=
+## Name of the embedding model to use.
+#EMBEDDING_MODEL=text-embedding-v2
+## Dimension of the embedding model to use.
+#EMBEDDING_DIM=1024
+#----------   DashScope    ----------------
+#DASHSCOPE_API_KEY=sk-02c8540e86d84b7ca0e6f4f51bac6e60
+## The provider for the AI models to use.
+#MODEL_PROVIDER=dashscope
+## The name of LLM model to use.
+#MODEL=qwen-max
+## Name of the embedding model to use.
+#EMBEDDING_MODEL=text-embedding-v2
+
+#--------------------------
+# 是否启用检索重排功能
+ENABLE_RERANK=true
+
+
+# The questions to help users get started (multi-line).
+CONVERSATION_STARTERS=本工程指什么？\n总算表有哪些费用？\n项目划分哪些内容构成？\n其他费用表有哪些内容？
+
+# The number of similar embeddings to return when retrieving documents.
+TOP_K=5
+
+# The time in milliseconds to wait for the stream to return a response.
+STREAM_TIMEOUT=60000
+
+# 向量存储数据库类型，目前可选：chroma、qdrant
+VECTOR_STORE_TYPE=chroma
+# The name of the collection in your vector database
+VECTOR_STORE_COLLECTION=default
+
+# The API endpoint for your vector database
+# VECTOR_STORE_HOST=
+
+# The port for your vector database
+# VECTOR_STORE_PORT=
+
+# The local path to the vector database.
+# Specify this if you are using a local vector database.
+# Otherwise, use VECTOR_STORE__HOST and VECTOR_STORE__PORT config above
+VECTOR_STORE_PATH=./storage_vector
+
+
+
+PHOENIX_API_KEY=123456
+PHOENIX_URL=http://localhost:6006/v1/traces
+PHOENIX_PROJECT_NAME=ly_zjapp
+#OTEL_SERVICE_NAME=ly_zjapp
+#OTEL_RESOURCE_ATTRIBUTES=openinference.project.name=ly_zjapp
+# The address to start the backend app.
+APP_HOST=0.0.0.0
+
+# The port to start the backend app.
+APP_PORT=8000
+
+FILESERVER_URL_PREFIX=/api/files
+
+# E2B_API_KEY key is required to run code interpreter tool. Get it here: https://e2b.dev/docs/getting-started/api-key
+# E2B_API_KEY=
+
+# The system prompt for the AI model.
+SYSTEM_PROMPT="You are a weather forecast agent. You help users to get the weather forecast for a given location.
+-You are a Python interpreter that can run any python code in a secure environment.
+- The python code runs in a Jupyter notebook. Every time you call the 'interpreter' tool, the python code is executed in a separate cell. 
+- You are given tasks to complete and you run python code to solve them.
+- It's okay to make multiple calls to interpreter tool. If you get an error or the result is not what you expected, you can call the tool again. Don't give up too soon!
+- Plot visualizations using matplotlib or any other visualization library directly in the notebook.
+- You can install any pip package (if it exists) by running a cell with pip install.
+"
+