diff --git a/backend/app/engine/model/xinfeng.py b/backend/app/engine/model/xinfeng.py index c2ec772..18f475a 100644 --- a/backend/app/engine/model/xinfeng.py +++ b/backend/app/engine/model/xinfeng.py @@ -22,9 +22,12 @@ from llama_index.llms.xinference.utils import ( class XinfengModel(Xinference): @llm_chat_callback() def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse: + msgs = [] assert self._generator is not None + for message in messages: + msgs.append(message.dict()) response_text = self._generator.chat( - messages=messages, + messages=msgs, generate_config={ "stream": False, "temperature": self.temperature, @@ -69,4 +72,52 @@ class XinfengModel(Xinference): delta=delta, ) + return gen() + + @llm_completion_callback() + def complete( + self, prompt: str, formatted: bool = False, **kwargs: Any + ) -> CompletionResponse: + assert self._generator is not None + message = ChatMessage.from_str(prompt,MessageRole.SYSTEM) + msgs = [message.dict()] + response_text = self._generator.chat( + messages=msgs, + generate_config={ + "stream": False, + "temperature": self.temperature, + "max_tokens": self.max_tokens, + }, + )["choices"][0]["message"]["content"] + return CompletionResponse( + delta=None, + text=response_text, + ) + + @llm_completion_callback() + def stream_complete( + self, prompt: str, formatted: bool = False, **kwargs: Any + ) -> CompletionResponseGen: + assert self._generator is not None + message = ChatMessage.from_str(prompt,MessageRole.SYSTEM) + msgs = [message.dict()] + response_iter = self._generator.chat( + messages=msgs, + generate_config={ + "stream": True, + "temperature": self.temperature, + "max_tokens": self.max_tokens, + }, + ) + + def gen() -> CompletionResponseGen: + text = "" + for c in response_iter: + delta = c["choices"][0]["delta"].get("content", "") + text += delta + yield CompletionResponse( + delta=delta, + text=text, + ) + return gen() \ No newline at end of file