自定义xinfeng接口

This commit is contained in:
wanyaokun
2024-09-10 14:39:51 +08:00
parent a165d55822
commit 0fe60a7963
+52 -1
View File
@@ -22,9 +22,12 @@ from llama_index.llms.xinference.utils import (
class XinfengModel(Xinference):
@llm_chat_callback()
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
msgs = []
assert self._generator is not None
for message in messages:
msgs.append(message.dict())
response_text = self._generator.chat(
messages=messages,
messages=msgs,
generate_config={
"stream": False,
"temperature": self.temperature,
@@ -70,3 +73,51 @@ class XinfengModel(Xinference):
)
return gen()
@llm_completion_callback()
def complete(
self, prompt: str, formatted: bool = False, **kwargs: Any
) -> CompletionResponse:
assert self._generator is not None
message = ChatMessage.from_str(prompt,MessageRole.SYSTEM)
msgs = [message.dict()]
response_text = self._generator.chat(
messages=msgs,
generate_config={
"stream": False,
"temperature": self.temperature,
"max_tokens": self.max_tokens,
},
)["choices"][0]["message"]["content"]
return CompletionResponse(
delta=None,
text=response_text,
)
@llm_completion_callback()
def stream_complete(
self, prompt: str, formatted: bool = False, **kwargs: Any
) -> CompletionResponseGen:
assert self._generator is not None
message = ChatMessage.from_str(prompt,MessageRole.SYSTEM)
msgs = [message.dict()]
response_iter = self._generator.chat(
messages=msgs,
generate_config={
"stream": True,
"temperature": self.temperature,
"max_tokens": self.max_tokens,
},
)
def gen() -> CompletionResponseGen:
text = ""
for c in response_iter:
delta = c["choices"][0]["delta"].get("content", "")
text += delta
yield CompletionResponse(
delta=delta,
text=text,
)
return gen()