@@ -56,6 +56,13 @@ async def get_model_list(self, required_mem: int) -> None:
5656 "object" : f"{ model_type } .setup" ,
5757 "system_prompt" : "You are a helpful assistant."
5858 })
59+
60+ mode_param = model_data .get ("mode_param" , {})
61+ precompute_len = None
62+ cmm_size = None
63+ if isinstance (mode_param , dict ):
64+ precompute_len = mode_param .get ("precompute_len" )
65+ cmm_size = mode_param .get ("cmm_size" )
5966 if '-1.5B-' in mode :
6067 new_entry ['memory_required' ] = 1782579
6168 new_entry ['pool_size' ] = 1
@@ -68,6 +75,8 @@ async def get_model_list(self, required_mem: int) -> None:
6875 else :
6976 new_entry ['memory_required' ] = 1363148
7077 new_entry ['pool_size' ] = 2
78+ if cmm_size is not None :
79+ new_entry ['memory_required' ] = cmm_size
7180
7281 if '-p256-' in mode :
7382 new_entry ['max_context_length' ] = 256
@@ -81,6 +90,9 @@ async def get_model_list(self, required_mem: int) -> None:
8190 if '-ctx-' in mode :
8291 new_entry ['max_context_length' ] = 1024
8392
93+ if precompute_len is not None :
94+ new_entry ['max_context_length' ] = precompute_len
95+
8496 elif model_type == 'tts' :
8597 if 'melotts' in mode .lower ():
8698 obj = 'melotts.setup'
0 commit comments