from llama_index.llms.dashscope import DashScope from llama_index.core.base.llms.types import LLMMetadata class DashScopeGenerationModels: """DashScope Qwen serial models.""" QWEN_TURBO = "qwen-turbo" QWEN_PLUS = "qwen-plus" QWEN_MAX = "qwen-max" QWEN_MAX_1201 = "qwen-max-1201" QWEN_MAX_LONGCONTEXT = "qwen-max-longcontext" QWEN2_MATH_72B_INSTRUCT = 'qwen2-math-72b-instruct', QWEN2_72B = 'qwen2-72b-instruct' DASHSCOPE_MODEL_META = { DashScopeGenerationModels.QWEN_TURBO: { "context_window": 1024 * 8, "num_output": 1024 * 8, "is_chat_model": True, }, DashScopeGenerationModels.QWEN_PLUS: { "context_window": 1024 * 32, "num_output": 1024 * 32, "is_chat_model": True, }, DashScopeGenerationModels.QWEN_MAX: { "context_window": 1024 * 8, "num_output": 1024 * 8, "is_chat_model": True, }, DashScopeGenerationModels.QWEN_MAX_1201: { "context_window": 1024 * 8, "num_output": 1024 * 8, "is_chat_model": True, }, DashScopeGenerationModels.QWEN_MAX_LONGCONTEXT: { "context_window": 1024 * 30, "num_output": 1024 * 30, "is_chat_model": True, }, DashScopeGenerationModels.QWEN2_MATH_72B_INSTRUCT: { "context_window": 1024 * 2, "num_output": 1024 * 8, "is_chat_model": True, }, DashScopeGenerationModels.QWEN2_72B: { "context_window": 1024 * 2, "num_output": 1024 * 8, "is_chat_model": True, }, } class CustomDashScope(DashScope): @property def metadata(self) -> LLMMetadata: DASHSCOPE_MODEL_META[self.model_name]["num_output"] = ( self.max_tokens or DASHSCOPE_MODEL_META[self.model_name]["num_output"] ) return LLMMetadata( model_name=self.model_name, **DASHSCOPE_MODEL_META[self.model_name] )