优化了提示词

2024-08-23 18:35:19 +08:00
parent 7691b22274
commit a200e8adfc
39 changed files with 3083 additions and 21 deletions
@@ -0,0 +1,109 @@
+import json
+
+from aiostream import stream
+from fastapi import Request
+from fastapi.responses import StreamingResponse
+from llama_index.core.chat_engine.types import StreamingAgentChatResponse
+
+from app.api.routers.events import EventCallbackHandler
+from app.api.routers.models import ChatData, Message, SourceNodes
+from app.api.services.suggestion import NextQuestionSuggestion
+
+
+class VercelStreamResponse(StreamingResponse):
+    """
+    Class to convert the response from the chat engine to the streaming format expected by Vercel
+    """
+
+    TEXT_PREFIX = "0:"
+    DATA_PREFIX = "8:"
+
+    @classmethod
+    def convert_text(cls, token: str):
+        # Escape newlines and double quotes to avoid breaking the stream
+        token = json.dumps(token)
+        return f"{cls.TEXT_PREFIX}{token}\n"
+
+    @classmethod
+    def convert_data(cls, data: dict):
+        data_str = json.dumps(data)
+        return f"{cls.DATA_PREFIX}[{data_str}]\n"
+
+    def __init__(
+        self,
+        request: Request,
+        event_handler: EventCallbackHandler,
+        response: StreamingAgentChatResponse,
+        chat_data: ChatData,
+    ):
+        content = VercelStreamResponse.content_generator(
+            request, event_handler, response, chat_data
+        )
+        super().__init__(content=content)
+
+    @classmethod
+    async def content_generator(
+        cls,
+        request: Request,
+        event_handler: EventCallbackHandler,
+        response: StreamingAgentChatResponse,
+        chat_data: ChatData,
+    ):
+        # Yield the text response
+        async def _chat_response_generator():
+            final_response = ""
+            async for token in response.async_response_gen():
+                final_response += token
+                yield VercelStreamResponse.convert_text(token)
+
+            # Generate questions that user might interested to
+            conversation = chat_data.messages + [
+                Message(role="assistant", content=final_response)
+            ]
+            questions = await NextQuestionSuggestion.suggest_next_questions(
+                conversation
+            )
+            if len(questions) > 0:
+                yield VercelStreamResponse.convert_data(
+                    {
+                        "type": "suggested_questions",
+                        "data": questions,
+                    }
+                )
+
+            # the text_generator is the leading stream, once it's finished, also finish the event stream
+            event_handler.is_done = True
+
+            # Yield the source nodes
+            yield cls.convert_data(
+                {
+                    "type": "sources",
+                    "data": {
+                        "nodes": [
+                            SourceNodes.from_source_node(node).dict()
+                            for node in response.source_nodes
+                        ]
+                    },
+                }
+            )
+
+        # Yield the events from the event handler
+        async def _event_generator():
+            async for event in event_handler.async_event_gen():
+                event_response = event.to_response()
+                if event_response is not None:
+                    yield VercelStreamResponse.convert_data(event_response)
+
+        combine = stream.merge(_chat_response_generator(), _event_generator())
+        is_stream_started = False
+        async with combine.stream() as streamer:
+            async for output in streamer:
+                if not is_stream_started:
+                    is_stream_started = True
+                    # Stream a blank message to start the stream
+                    yield VercelStreamResponse.convert_text("")
+
+                yield output
+
+                if await request.is_disconnected():
+                    break