From 30a69d20d3e1b4b6618874e1f80a0d0fbe4939f4 Mon Sep 17 00:00:00 2001 From: Sven Knoblauch Date: Mon, 4 Nov 2024 10:11:09 +0100 Subject: [PATCH 1/2] add multi lora support --- src/engine.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/engine.py b/src/engine.py index 48e8aac..da35fa0 100644 --- a/src/engine.py +++ b/src/engine.py @@ -133,8 +133,13 @@ async def _initialize_engines(self): lora_modules = os.getenv('LORA_MODULES', None) if lora_modules is not None: try: - lora_modules = json.loads(lora_modules) - lora_modules = [LoRAModulePath(**lora_modules)] + lora_modules_dict = json.loads(lora_modules) + if type(lora_modules_dict) == list: + lora_modules = [] + for adapter in lora_modules_dict: + lora_modules.append(LoRAModulePath(**adapter)) + else: + lora_modules = [LoRAModulePath(**lora_modules_dict)] except: lora_modules = None From adc5b1e0cd2fd29a9a3d6b21cfdd11947d391589 Mon Sep 17 00:00:00 2001 From: Sven Knoblauch Date: Mon, 20 Jan 2025 10:07:56 +0100 Subject: [PATCH 2/2] small changes for multi lora support --- src/engine.py | 41 +++++++++++++++++++++++------------------ 1 file changed, 23 insertions(+), 18 deletions(-) diff --git a/src/engine.py b/src/engine.py index da35fa0..7e8de84 100644 --- a/src/engine.py +++ b/src/engine.py @@ -121,37 +121,42 @@ def __init__(self, vllm_engine): super().__init__(vllm_engine) self.served_model_name = os.getenv("OPENAI_SERVED_MODEL_NAME_OVERRIDE") or self.engine_args.model self.response_role = os.getenv("OPENAI_RESPONSE_ROLE") or "assistant" + + logging.info(f"---Loadinging adapter") + adapters = os.getenv("LORA_MODULES", []) + + try: + adapters = json.loads(adapters) + except Exception as e: + logging.info(f"Error initializing adapter: {e}") + adapters = [] + + self.lora_adapters = [] + for adapter in adapters: + try: + lora:LoRAModulePath = LoRAModulePath(**adapter) + self.lora_adapters.append(lora) + except Exception as e: + logging.info(f"Error initializing adapter: {e}") + continue + asyncio.run(self._initialize_engines()) self.raw_openai_output = bool(int(os.getenv("RAW_OPENAI_OUTPUT", 1))) - + + async def _initialize_engines(self): self.model_config = await self.llm.get_model_config() self.base_model_paths = [ BaseModelPath(name=self.engine_args.model, model_path=self.engine_args.model) ] - lora_modules = os.getenv('LORA_MODULES', None) - if lora_modules is not None: - try: - lora_modules_dict = json.loads(lora_modules) - if type(lora_modules_dict) == list: - lora_modules = [] - for adapter in lora_modules_dict: - lora_modules.append(LoRAModulePath(**adapter)) - else: - lora_modules = [LoRAModulePath(**lora_modules_dict)] - except: - lora_modules = None - - - self.chat_engine = OpenAIServingChat( engine_client=self.llm, model_config=self.model_config, base_model_paths=self.base_model_paths, response_role=self.response_role, chat_template=self.tokenizer.tokenizer.chat_template, - lora_modules=lora_modules, + lora_modules=self.lora_adapters, prompt_adapters=None, request_logger=None ) @@ -159,7 +164,7 @@ async def _initialize_engines(self): engine_client=self.llm, model_config=self.model_config, base_model_paths=self.base_model_paths, - lora_modules=lora_modules, + lora_modules=self.lora_adapters, prompt_adapters=None, request_logger=None )