diff --git a/rag2_0/demo/intent_recognition_example.py b/rag2_0/demo/intent_recognition_example.py
index 0f55823..2981b7e 100755
--- a/rag2_0/demo/intent_recognition_example.py
+++ b/rag2_0/demo/intent_recognition_example.py
@@ -204,41 +204,7 @@ class QueryRewriteProcessor:
                                                   enable_query_expansion=True,
                                                   use_jieba=True,
                                                   cur_soft_name=current_softname))
-
-                # 提取分类信息
-                classification = result["classification"]
-                original_query = result["rewrite"]["rewrite"]
-                query_list = result["query_expand"]["all"]
-                # 将字典转换为Classification对象
-                classification_obj = Classification(**classification)
-                
-                # 根据enable_retrieval参数决定是否进行文档检索
-                retrieved_doc = None
-        
-
-                retrieved_doc_titles=[]
-                if retrieved_doc:
-                    retrieved_doc_titles=[doc["title"].split("/")[-1] for doc in retrieved_doc]     
-                # 提取槽位填充信息
-                slot_filling = result.get("slot_filling", {})
-                slot_filling_str = ""
-                if slot_filling and "filled_data" in slot_filling:
-                    # 格式化槽位填充结果
-                    slot_filling_str = json.dumps({
-                        "is_complete": slot_filling.get("is_complete", False),
-                        "missing_slots": slot_filling.get("missing_slots", {}),
-                        "filled_data": slot_filling.get("filled_data", {})
-                    }, ensure_ascii=False, indent=2)
-
-                # 处理成功，返回结果
-                return {
-                    "问题": query,
-                    "问题分类": f"{classification['vertical_classification']} - {classification['sub_classification']}",
-                    "问题改写": result["rewrite"]["rewrite"],
-                    "槽位信息": slot_filling_str,
-                    "检索的文档": "\n".join(retrieved_doc_titles),
-                    "检索的内容":  json.dumps(retrieved_doc, ensure_ascii=False, indent=2) if retrieved_doc else "",
-                }
+                return result
             except Exception as e:
                 logging.error(f"处理问题 '{query}' 时出错: ",exc_info=True)
                 retry_count += 1
@@ -424,19 +390,25 @@ def main():
         logging.info(f"所有处理完成，最终结果已保存至: {output_file}")
     else:
         logging.info(f"文档检索功能状态: 已启用")
-        for idx, query in enumerate(examples):
-            if query.strip() == "":
-                continue
-            query="怎么调整报表顺序"
+        nCount=0
+        while True:
+            query="请问怎么在南网版概算工程软件版里面查施工费？"
             conversation_context={
-                "current_softname": "储能计价通C1软件"
+                "current_softname": "电力建设计价通软件"
             }
-            # 在调试模式下使用完整的参数
-            print(json.dumps(processor.process_query(
+            result = processor.process_query(
                 query, 
                 conversation_context=conversation_context,
                 enable_retrieval=True
-            ), ensure_ascii=False, indent=2))
+            )
+            # 在调试模式下使用完整的参数
+            # print(json.dumps(processor.process_query(
+            #     query, 
+            #     conversation_context=conversation_context,
+            #     enable_retrieval=True
+            # ), ensure_ascii=False, indent=2))
+            nCount+=1
+            print("测试数量:  ", nCount)
 
 def setup_logging():
     # 配置日志输出到控制台
diff --git a/rag2_0/intent_recognition/DataModels.py b/rag2_0/intent_recognition/DataModels.py
index 60d6cec..1ed0085 100755
--- a/rag2_0/intent_recognition/DataModels.py
+++ b/rag2_0/intent_recognition/DataModels.py
@@ -132,9 +132,48 @@ class QueryRewrite(BaseModel):
             "rewrite": "问题改写"
         }
         字段说明：
-        rewrite 类型：str 描述：问题改写之后的内容
+        "rewrite" 类型：str 描述：问题改写之后的内容
         """
 
+
+# 意图优化环节数据模型
+class StepBackPrompt(BaseModel):
+    """后退提示数据模型"""
+    original_query: str = Field(description="原始查询")
+    can_use_back_prompt: bool = Field(description="原始查询是否可以进行后退提示(true/false),如果原始查询没有限定词或其他限定词语，则不能进行后退提示")
+    step_back_query: List[str] = Field(description="后退提示生成的抽象查询(多个)")
+
+    @classmethod
+    def get_format_instructions(cls):
+        return """
+        格式如下，必须严格以纯JSON格式输出
+        {
+            "original_query": "原始查询",
+            "can_use_back_prompt": "原始查询是否可以进行后退提示(true/false),如果原始查询没有限定词或其他限定词语，则不能进行后退提示",
+            "step_back_query": "后退提示生成的抽象查询(多个)"
+        }
+        字段说明：
+        "original_query" 类型：str 描述：用户输入的原始查询
+        "can_use_back_prompt" 类型：bool 描述：原始查询是否可以进行后退提示(true/false),如果原始查询没有限定词或其他限定词语，则不能进行后退提示
+        "step_back_query" 类型：list[str] 描述：后退提示生成的抽象查询(多个)
+        """
+        
+
+class FollowUpQuestions(BaseModel):
+    """后续问题数据模型"""
+    original_query: str = Field(description="原始查询")
+    follow_up_query: str = Field(description="基于历史对话生成的独立问题")
+
+class HypotheticalDocument(BaseModel):
+    """假设文档数据模型"""
+    original_query: str = Field(description="原始查询")
+    hypothetical_answer: str = Field(description="假设性回答")
+
+class MultiQuestions(BaseModel):
+    """多问题查询数据模型"""
+    original_query: str = Field(description="原始查询")
+    sub_questions: List[str] = Field(description="从不同角度生成的子问题列表")
+
 ##########################槽位模型###########################
 class SlotBase(BaseModel):
     """槽位基础模型"""
@@ -329,41 +368,3 @@ class IntentAndSlotResult(BaseModel):
         ProblemDiagnosisSlots,
         OtherSlots
     ]
-
-# 意图优化环节数据模型
-class StepBackPrompt(BaseModel):
-    """后退提示数据模型"""
-    original_query: str = Field(description="原始查询")
-    can_use_back_prompt: bool = Field(description="原始查询是否可以进行后退提示(true/false),如果原始查询没有限定词或其他限定词语，则不能进行后退提示")
-    step_back_query: List[str] = Field(description="后退提示生成的抽象查询(多个)")
-
-    @classmethod
-    def get_format_instructions(cls):
-        return """
-        格式如下，必须严格以纯JSON格式输出
-        {
-            "original_query": "原始查询",
-            "can_use_back_prompt": "原始查询是否可以进行后退提示(true/false),如果原始查询没有限定词或其他限定词语，则不能进行后退提示",
-            "step_back_query": "后退提示生成的抽象查询(多个)"
-        }
-        字段说明：
-        original_query 类型：str 描述：用户输入的原始查询
-        can_use_back_prompt 类型：bool 描述：原始查询是否可以进行后退提示(true/false),如果原始查询没有限定词或其他限定词语，则不能进行后退提示
-        step_back_query 类型：List[str] 描述：后退提示生成的抽象查询(多个)
-        """
-        
-
-class FollowUpQuestions(BaseModel):
-    """后续问题数据模型"""
-    original_query: str = Field(description="原始查询")
-    follow_up_query: str = Field(description="基于历史对话生成的独立问题")
-
-class HypotheticalDocument(BaseModel):
-    """假设文档数据模型"""
-    original_query: str = Field(description="原始查询")
-    hypothetical_answer: str = Field(description="假设性回答")
-
-class MultiQuestions(BaseModel):
-    """多问题查询数据模型"""
-    original_query: str = Field(description="原始查询")
-    sub_questions: List[str] = Field(description="从不同角度生成的子问题列表")
\ No newline at end of file
diff --git a/rag2_0/intent_recognition/IntentRecognition.py b/rag2_0/intent_recognition/IntentRecognition.py
index 06d6977..14cbe9e 100755
--- a/rag2_0/intent_recognition/IntentRecognition.py
+++ b/rag2_0/intent_recognition/IntentRecognition.py
@@ -170,6 +170,24 @@ class AsyncIntentRecognizer:
         except Exception as e:
             raise RuntimeError(f"加载后缀关键词失败: {e}") from e
     
+    def _clean_llm_json(self, content: str) -> str:
+        """
+        统一清洗LLM返回的JSON字符串：
+        1) 去首尾空白
+        2) 去除<think>…</think>段
+        3) 通过第一个"{"与最后一个"}"裁剪内容，移除首尾多余字符
+        4) 压缩所有空白字符
+        """
+        content = content.strip()
+        content = re.sub(r'<think>.*?</think>', '', content, flags=re.DOTALL)
+        # 裁剪到最外层花括号范围
+        start = content.find('{')
+        end = content.rfind('}')
+        if start != -1 and end != -1 and start < end:
+            content = content[start:end+1]
+        content = re.sub(r'\s+', '', content)
+        return content
+    
     async def _classify_intent_async(self, query: str, conversation_context: str = "", 
                       chat_history: List[Dict[str, str]] = None, 
                       previous_slots: Dict[str, Any] = None) -> Classification:
@@ -194,13 +212,11 @@ class AsyncIntentRecognizer:
         # 解析输出
         try:
             # 异步调用LLM
-            response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
-            # response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
+            # response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
+            response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"})
 
             # 尝试直接解析JSON响应
-            response.content = response.content.strip()
-            clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
-            clean_output = re.sub(r'\s+', '', clean_output)
+            clean_output = self._clean_llm_json(response.content)
             parsed_output = classification_parser.parse(clean_output)
             
             # 计算并打印耗时
@@ -264,13 +280,11 @@ class AsyncIntentRecognizer:
             formatted_prompt = formatted_prompt.replace("{output_format}", terms_list_parser.get_format_instructions())
             
             # 异步调用LLM
-            response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
-            # response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
+            # response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
+            response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"})
             
             # 尝试使用Pydantic解析器解析TermList
-            response.content = response.content.strip()
-            clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
-            clean_output = re.sub(r'\s+', '', clean_output)
+            clean_output = self._clean_llm_json(response.content)
             parsed_output = terms_list_parser.parse(clean_output)
             return parsed_output.terms
 
@@ -341,20 +355,16 @@ class AsyncIntentRecognizer:
         1、请从当前提问内容中提取电力造价行中定额编码、定额名称、清单编码、清单名称
         2、请勿随机编造，如果没有提取到内容返回空的JSON
         3、返回结果为json格式,必须严格以纯JSON格式输出
-        ```json
         {{
             "dinge_info_list":{{"dinge_code_list":["xxxx","xxxx"], "dinge_name_list":["xxxx","xxxx"]}},
             "qingdan_info":{{"qingdan_code_list":["xxxx","xxxx"], "qingdan_name_list":["xxxx","xxxx"]}}
         }}
-        ```json
         """
 
         try:
-            response = await self._llm.ainvoke(prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
-            # response = await self._llm.ainvoke(prompt, extra_body={"enable_thinking": False})
-            response.content = response.content.strip()
-            clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
-            clean_output = re.sub(r'\s+', '', clean_output)
+            # response = await self._llm.ainvoke(prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
+            response = await self._llm.ainvoke(prompt, response_format={"type": "json_object"})
+            clean_output = self._clean_llm_json(response.content)
             parsed_output = JsonOutputParser().parse(clean_output)
             
             # 计算并打印耗时
@@ -393,11 +403,9 @@ class AsyncIntentRecognizer:
         # 解析输出
         try:
             # 异步调用LLM
-            response = await self._llm.ainvoke(formatted_prompt,response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
-            # response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
-            response.content = response.content.strip()
-            clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
-            clean_output = re.sub(r'\s+', '', clean_output)
+            # response = await self._llm.ainvoke(formatted_prompt,response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
+            response = await self._llm.ainvoke(formatted_prompt,response_format={"type": "json_object"})
+            clean_output = self._clean_llm_json(response.content)
             parsed_output = query_rewrite_parser.parse(clean_output)
             end_time = time.time()
             process_time=end_time-start_time
@@ -642,9 +650,7 @@ class AsyncIntentRecognizer:
             # 异步调用LLM
             response = await self._llm.ainvoke(formatted_prompt,response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
             # response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
-            response.content = response.content.strip()
-            clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
-            clean_output = re.sub(r'\s+', '', clean_output)
+            clean_output = self._clean_llm_json(response.content)
             # 尝试解析LLM响应
             parsed_output = slot_parser.parse(clean_output)
             return parsed_output
@@ -677,22 +683,18 @@ class AsyncIntentRecognizer:
         
         try:
             # 异步调用LLM
-            response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
-            # response = await self._llm.ainvoke(formatted_prompt, extra_body={"enable_thinking": False})
+            # response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
+            response = await self._llm.ainvoke(formatted_prompt, response_format={"type": "json_object"})
             
             # 解析输出
-            response.content = response.content.strip()
-            clean_output = re.sub(r'<think>.*?</think>', '', response.content, flags=re.DOTALL)
-            clean_output = re.sub(r'\s+', '', clean_output)
+            clean_output = self._clean_llm_json(response.content)
             parsed_output = step_back_parser.parse(clean_output)
             step_back_end_time = time.time()
             step_back_time = step_back_end_time - step_back_start_time
             logging.info(f"后退提示生成耗时统计 - 总耗时: {step_back_time:.2f}秒")
             return parsed_output
         except Exception as e:
-            # 如果解析失败，返回原始查询作为后退提示
-            logging.error(f"后退提示生成失败: {e}", exc_info=True)
-            return StepBackPrompt(original_query=query, can_use_back_prompt=False, step_back_query=[query])
+            raise RuntimeError(f"解析后退提示结果时出错: {e}") from e
     
     async def _find_matching_software_docs_async(self, query: str, soft_name: str, 
                                         chat_history: List[Dict[str, str]] = None, 
@@ -739,7 +741,8 @@ class AsyncIntentRecognizer:
         try:
             # 异步调用LLM
             start_time = time.time()
-            response = await self._llm.ainvoke(prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
+            # response = await self._llm.ainvoke(prompt, response_format={"type": "json_object"}, extra_body={"enable_thinking": False})
+            response = await self._llm.ainvoke(prompt, response_format={"type": "json_object"})
             end_time = time.time()
             
             # 解析JSON响应
diff --git a/rag2_0/tool/APIKeyManager.py b/rag2_0/tool/APIKeyManager.py
index 2926f0d..0767e77 100755
--- a/rag2_0/tool/APIKeyManager.py
+++ b/rag2_0/tool/APIKeyManager.py
@@ -30,7 +30,7 @@ class APIKeyManager:
     # 密钥使用计数和上次使用时间
     _key_usage: Dict[str, Dict] = {}
     # 当前正在使用的密钥索引
-    _current_index = 0
+    _current_index = -1
 
     api_file_path = "api_key.txt"
     
diff --git a/rag2_0/tool/ModelTool.py b/rag2_0/tool/ModelTool.py
index d223f42..be9daad 100755
--- a/rag2_0/tool/ModelTool.py
+++ b/rag2_0/tool/ModelTool.py
@@ -200,7 +200,7 @@ class OpenAiLLM:
                 except Exception as e:
                     retry_count += 1
                     if retry_count == max_retries:
-                        raise RuntimeError(f"OpenAiLLM:invoke:error:{str(e)}.api_key:{api_key}") from e
+                        raise RuntimeError(f"OpenAiLLM:invoke:error:{str(e)}") from e
                     else:
                         time.sleep(5*retry_count)  # 重试前等待5秒*重试次数
         else:
@@ -215,7 +215,7 @@ class OpenAiLLM:
                     )
                     return completion.choices[0].message
             except Exception as e:
-                raise RuntimeError(f"OpenAiLLM:invoke:error:{str(e)}.api_key:{api_key}") from e
+                raise RuntimeError(f"OpenAiLLM:invoke:error:{str(e)}") from e
 
     async def ainvoke(self, user_prompt="你是谁？", **extra_kwargs):
         """异步调用OpenAI API"""