From 93340974030671e1e6b1cf1928846c9c7d8b83fc Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Thu, 19 Dec 2024 10:19:38 +0000 Subject: [PATCH 01/11] Add docstrings and comments. --- examples/model_configs/base_model.yaml | 2 +- examples/model_configs/peft_model.yaml | 2 +- examples/model_configs/quantized_model.yaml | 2 +- examples/model_configs/serverless_model.yaml | 2 +- examples/model_configs/tgi_model.yaml | 2 +- .../models/transformers/adapter_model.py | 26 ++++++++++++++++++- .../models/transformers/delta_model.py | 13 +++++++++- 7 files changed, 42 insertions(+), 7 deletions(-) diff --git a/examples/model_configs/base_model.yaml b/examples/model_configs/base_model.yaml index d6563e616..802b2ebaf 100644 --- a/examples/model_configs/base_model.yaml +++ b/examples/model_configs/base_model.yaml @@ -1,6 +1,6 @@ model: base_params: - model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... + model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True.To see the full list of parameters, please click here: https://huggingface.co/docs/lighteval/main/en/quicktour#model-arguments dtype: "bfloat16" compile: true merged_weights: # Ignore this section if you are not using PEFT models diff --git a/examples/model_configs/peft_model.yaml b/examples/model_configs/peft_model.yaml index 81205818a..def069706 100644 --- a/examples/model_configs/peft_model.yaml +++ b/examples/model_configs/peft_model.yaml @@ -1,6 +1,6 @@ model: base_params: - model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied. + model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied.To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/main/en/package_reference/models#lighteval.models.transformers.adapter_model.AdapterModelConfig dtype: "4bit" # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization. compile: true merged_weights: # Ignore this section if you are not using PEFT models diff --git a/examples/model_configs/quantized_model.yaml b/examples/model_configs/quantized_model.yaml index 3bc6b2c37..51249c8ce 100644 --- a/examples/model_configs/quantized_model.yaml +++ b/examples/model_configs/quantized_model.yaml @@ -1,6 +1,6 @@ model: base_params: - model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... + model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True.To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/main/en/quicktour#model-arguments dtype: "4bit" # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization. compile: true merged_weights: # Ignore this section if you are not using PEFT models diff --git a/examples/model_configs/serverless_model.yaml b/examples/model_configs/serverless_model.yaml index af1652e1e..ae2611274 100644 --- a/examples/model_configs/serverless_model.yaml +++ b/examples/model_configs/serverless_model.yaml @@ -1,3 +1,3 @@ model: base_params: - model_name: "meta-llama/Llama-3.1-8B-Instruct" #Qwen/Qwen2.5-14B" #Qwen/Qwen2.5-7B" + model_name: "meta-llama/Llama-3.1-8B-Instruct" #Qwen/Qwen2.5-14B" #Qwen/Qwen2.5-7B"To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/package_reference/models#endpoints-based-models diff --git a/examples/model_configs/tgi_model.yaml b/examples/model_configs/tgi_model.yaml index 8db5654d8..059a1d310 100644 --- a/examples/model_configs/tgi_model.yaml +++ b/examples/model_configs/tgi_model.yaml @@ -2,4 +2,4 @@ model: instance: inference_server_address: "" inference_server_auth: null - model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory + model_id: null # Optional, only required if the TGI container was launched with model_id pointing to a local directory. To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/package_reference/models#lighteval.models.endpoints.tgi_model.TGIModelConfig diff --git a/src/lighteval/models/transformers/adapter_model.py b/src/lighteval/models/transformers/adapter_model.py index e66a1aa1d..af69ed72b 100644 --- a/src/lighteval/models/transformers/adapter_model.py +++ b/src/lighteval/models/transformers/adapter_model.py @@ -41,6 +41,10 @@ @dataclass class AdapterModelConfig(BaseModelConfig): + """ + This class is used to manage the configuration of adapter models. Adapter models are designed to extend or adapt a + base model's functionality for specific tasks while keeping most of the base model's parameters frozen. + """ # Adapter models have the specificity that they look at the base model (= the parent) for the tokenizer and config base_model: str = None @@ -58,7 +62,19 @@ def init_configs(self, env_config: EnvConfig): class AdapterModel(BaseModel): + """ + This class is designed to integrate adapter models with a pre-trained base model. + """ def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConfig) -> PreTrainedTokenizer: + """ + Creates and configures the adapter model by applying adapter weights to the base model. + + Args: + config(AdapterModelConfig): An instance of AdapterModelConfig. + env_config(EnvConfig): An instance of EnvConfig. + + Returns: PreTrainedTokenizer + """ # By default, we look at the model config for the model stored in `base_model` # (= the parent model, not the model of interest) return self._create_auto_tokenizer_with_name( @@ -71,7 +87,15 @@ def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConf ) def _create_auto_model(self, config: AdapterModelConfig, env_config: EnvConfig) -> AutoModelForCausalLM: - """Returns a PeftModel from a base model and a version fined tuned using PEFT.""" + """ + It returns a PeftModel from a base model and a version fined tuned using PEFT. + + Args: + config(AdapterModelConfig): An instance of AdapterModelConfig. + env_config(EnvConfig): An instance of EnvConfig. + + Returns: AutoModelForCasualLM + """ torch_dtype = _get_dtype(config.dtype, self._config) config.model_parallel, max_memory, device_map = self.init_model_parallel(config.model_parallel) diff --git a/src/lighteval/models/transformers/delta_model.py b/src/lighteval/models/transformers/delta_model.py index 20780f1e7..2ab675cb2 100644 --- a/src/lighteval/models/transformers/delta_model.py +++ b/src/lighteval/models/transformers/delta_model.py @@ -38,6 +38,9 @@ @dataclass class DeltaModelConfig(BaseModelConfig): + """ + This class is used to manage the configuration class for delta models. + """ # Delta models look at the pretrained (= the delta weights) for the tokenizer and model config base_model: str = None @@ -59,7 +62,15 @@ def _create_auto_model( config: DeltaModelConfig, env_config: EnvConfig, ) -> AutoModelForCausalLM: - """Returns a model created by adding the weights of a delta model to a base model.""" + """ + It returns a model created by adding the weights of a delta model to a base model. + + Args: + config(AdapterModelConfig): An instance of AdapterModelConfig. + env_config(EnvConfig): An instance of EnvConfig. + + Returns: AutoModelForCasualLM + """ config.model_parallel, max_memory, device_map = self.init_model_parallel(config.model_parallel) torch_dtype = _get_dtype(config.dtype, self._config) From f35ad576ec1e518c3c8cebc9b4fceb31a9565370 Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Fri, 20 Dec 2024 10:05:26 +0000 Subject: [PATCH 02/11] Add docstrings for config class. --- .../models/endpoints/endpoint_model.py | 21 ++++++++++++++++++ .../models/endpoints/openai_model.py | 7 ++++++ src/lighteval/models/endpoints/tgi_model.py | 8 +++++++ src/lighteval/models/vllm/vllm_model.py | 22 +++++++++++++++++++ 4 files changed, 58 insertions(+) diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 80798b616..6743d8e05 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -90,6 +90,27 @@ def from_path(cls, path: str) -> "ServerlessEndpointModelConfig": @dataclass class InferenceEndpointModelConfig: + """ + This class is designed to manage and define settings for deploying inference endpoints in machine learning models. + + Attributes: + endpoint_name (str, optional):The name of the inference endpoint. + model_name (str, optional): The name of the model for inference. + reuse_existing (bool, default: False): Indicates whether to reuse an existing endpoint. + accelerator (str, default: "gpu"): Specifies the type of hardware accelerator. + model_dtype (str, optional): The data type used by the model. Defaults to the framework's choice if None. + vendor (str, default: "aws"): Cloud service provider for hosting the endpoint. + region (str, default: "us-east-1"): Cloud region, chosen based on hardware availability. + instance_size (str, optional): Specifies the size of the instance (e.g., large, xlarge). + instance_type (str, optional): Specifies the type of the instance (e.g., g5.4xlarge). + framework (str, default: "pytorch"): Framework used for inference (e.g., pytorch, tensorflow). + endpoint_type (str, default: "protected"): Security level of the endpoint (e.g., public, protected). + add_special_tokens (bool, default: True): Specifies if special tokens should be added during processing. + revision (str, default: "main"): The Git branch or commit hash of the model. + namespace (str, optional): The namespace under which the endpoint is launched. + image_url (str, optional): Docker image URL for the endpoint. + env_vars (dict, optional): Environment variables for the endpoint. + """ endpoint_name: str = None model_name: str = None reuse_existing: bool = False diff --git a/src/lighteval/models/endpoints/openai_model.py b/src/lighteval/models/endpoints/openai_model.py index b2ca25285..c38e3d2d3 100644 --- a/src/lighteval/models/endpoints/openai_model.py +++ b/src/lighteval/models/endpoints/openai_model.py @@ -61,6 +61,13 @@ @dataclass class OpenAIModelConfig: + """ + A configuration class for OpenAI models. This class is used to specify settings related to OpenAI models, + including the model name or identifier. + + Attributes: + model: It specifies the name or identifier of the OpenAI model to be used. + """ model: str diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index 3f20e4a57..4f5118f39 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -47,6 +47,14 @@ def divide_chunks(array, n): @dataclass class TGIModelConfig: + """ + This class provides a streamlined configuration for integrating with Text Generation Inference (TGI) endpoints. + + Attributes: + inference_server_address (str, required): The endpoint address of the inference server hosting the model. + inference_server_auth (str, required): Authentication credentials or tokens required to access the server. + model_id (str, required): Identifier for the model hosted on the inference server. + """ inference_server_address: str inference_server_auth: str model_id: str diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index 2d413807d..e95bb380a 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -68,6 +68,28 @@ @dataclass class VLLMModelConfig: + """ + This class defines the configuration parameters for deploying and running models using the vLLM framework. + + Attributes: + pretrained (str, required): The identifier for the pretrained model (e.g., model name or path). + gpu_memory_utilisation (float, default: 0.9): Fraction of GPU memory to allocate for the model. Reduce this value if you encounter memory issues. + revision (str, default: "main"): Specifies the branch or version of the model repository. + dtype (str | None, optional): Data type for computations (e.g., float32, float16, or bfloat16). Defaults to the model's preset if None. + tensor_parallel_size (int, default: 1): Number of GPUs used for splitting tensors across devices. + pipeline_parallel_size (int, default: 1): Number of GPUs used for pipeline parallelism. + data_parallel_size (int, default: 1): Number of GPUs used for data parallelism. + max_model_length (int | None, optional): Maximum sequence length for the model. If None, it is inferred automatically. Can be reduced to handle Out-of-Memory (OOM) issues. + swap_space (int, default: 4): Amount of CPU swap space (in GiB) per GPU for offloading. + seed (int, default: 1234): Seed for reproducibility in experiments. + trust_remote_code (bool, default: False): Whether to trust custom code provided by remote repositories. + use_chat_template (bool, default: False): Specifies if chat-specific templates should be used for input formatting. + add_special_tokens (bool, default: True): Indicates whether to add special tokens during tokenization. + multichoice_continuations_start_space (bool, default: True): Adds a space at the beginning of each continuation during multi-choice generation. + pairwise_tokenization (bool, default: False): Specifies if context and continuation should be tokenized separately or together. + subfolder (Optional[str], optional): Path to a specific subfolder in the model repository, if applicable. + temperature (float, default: 0.6): Sampling temperature for stochastic tasks. Ignored for deterministic tasks (set internally to 0). + """ pretrained: str gpu_memory_utilisation: float = 0.9 # lower this if you are running out of memory revision: str = "main" # revision of the model From bf50f2d7a5ed2964bfdaacda3c47ee4c809a45df Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Mon, 30 Dec 2024 10:10:31 +0000 Subject: [PATCH 03/11] Add proper spacing between comments. --- examples/model_configs/peft_model.yaml | 2 +- examples/model_configs/quantized_model.yaml | 6 +++--- examples/model_configs/serverless_model.yaml | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/examples/model_configs/peft_model.yaml b/examples/model_configs/peft_model.yaml index def069706..053d42480 100644 --- a/examples/model_configs/peft_model.yaml +++ b/examples/model_configs/peft_model.yaml @@ -1,6 +1,6 @@ model: base_params: - model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied.To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/main/en/package_reference/models#lighteval.models.transformers.adapter_model.AdapterModelConfig + model_args: "pretrained=predibase/customer_support,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ... For a PEFT model, the pretrained model should be the one trained with PEFT and the base model below will contain the original model on which the adapters will be applied. To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/main/en/package_reference/models#lighteval.models.transformers.adapter_model.AdapterModelConfig dtype: "4bit" # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization. compile: true merged_weights: # Ignore this section if you are not using PEFT models diff --git a/examples/model_configs/quantized_model.yaml b/examples/model_configs/quantized_model.yaml index 51249c8ce..0647379f9 100644 --- a/examples/model_configs/quantized_model.yaml +++ b/examples/model_configs/quantized_model.yaml @@ -1,11 +1,11 @@ model: base_params: - model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True.To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/main/en/quicktour#model-arguments + model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True. To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/main/en/quicktour#model-arguments . dtype: "4bit" # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization. compile: true - merged_weights: # Ignore this section if you are not using PEFT models + merged_weights: # Ignore this section if you are not using PEFT models . delta_weights: false # set to True of your model should be merged with a base model, also need to provide the base model name adapter_weights: false # set to True of your model has been trained with peft, also need to provide the base model name - base_model: null # path to the base_model - needs to be specified only if delta_weights or adapter_weights is set to True + base_model: null # path to the base_model - needs to be specified only if delta_weights or adapter_weights is set to True generation: multichoice_continuations_start_space: null # If true/false, will force multiple choice continuations to start/not start with a space. If none, will do nothing diff --git a/examples/model_configs/serverless_model.yaml b/examples/model_configs/serverless_model.yaml index ae2611274..214b43319 100644 --- a/examples/model_configs/serverless_model.yaml +++ b/examples/model_configs/serverless_model.yaml @@ -1,3 +1,3 @@ model: base_params: - model_name: "meta-llama/Llama-3.1-8B-Instruct" #Qwen/Qwen2.5-14B" #Qwen/Qwen2.5-7B"To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/package_reference/models#endpoints-based-models + model_name: "meta-llama/Llama-3.1-8B-Instruct" #Qwen/Qwen2.5-14B" #Qwen/Qwen2.5-7B". #To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/package_reference/models#endpoints-based-models . From 3ca0960be5d2f056d28e6854d51df2b94eac4a0d Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Mon, 30 Dec 2024 15:49:36 +0530 Subject: [PATCH 04/11] Re-write comments as per review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com> --- src/lighteval/models/endpoints/endpoint_model.py | 4 ++-- src/lighteval/models/endpoints/openai_model.py | 7 +++---- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 9d3ecbcef..4f2e88ab8 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -91,10 +91,10 @@ def from_path(cls, path: str) -> "ServerlessEndpointModelConfig": @dataclass class InferenceEndpointModelConfig: """ - This class is designed to manage and define settings for deploying inference endpoints in machine learning models. +This class defines the settings used to deploy inference endpoints automatically. (Inference endpoint docs: https://huggingface.co/docs/inference-endpoints/index) Attributes: - endpoint_name (str, optional):The name of the inference endpoint. + endpoint_name (str, Optional): Inference endpoint name (to use at creation or reuse) model_name (str, optional): The name of the model for inference. reuse_existing (bool, default: False): Indicates whether to reuse an existing endpoint. accelerator (str, default: "gpu"): Specifies the type of hardware accelerator. diff --git a/src/lighteval/models/endpoints/openai_model.py b/src/lighteval/models/endpoints/openai_model.py index c38e3d2d3..d1c9f5f41 100644 --- a/src/lighteval/models/endpoints/openai_model.py +++ b/src/lighteval/models/endpoints/openai_model.py @@ -62,11 +62,10 @@ @dataclass class OpenAIModelConfig: """ - A configuration class for OpenAI models. This class is used to specify settings related to OpenAI models, - including the model name or identifier. - + Configuration class to create an [[OpenAIModel]], to call via its API at inference for evaluation. + Attributes: - model: It specifies the name or identifier of the OpenAI model to be used. + model: name or identifier of the OpenAI model to be used for inference. """ model: str From bf65e27938bb0d140bed3d8d2b0e4aaacbc58b42 Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Mon, 30 Dec 2024 10:20:08 +0000 Subject: [PATCH 05/11] Update spacing in yaml file. --- examples/model_configs/base_model.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/model_configs/base_model.yaml b/examples/model_configs/base_model.yaml index 802b2ebaf..602fa6808 100644 --- a/examples/model_configs/base_model.yaml +++ b/examples/model_configs/base_model.yaml @@ -1,6 +1,6 @@ model: base_params: - model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True.To see the full list of parameters, please click here: https://huggingface.co/docs/lighteval/main/en/quicktour#model-arguments + model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True. To see the full list of parameters, please click here: https://huggingface.co/docs/lighteval/main/en/quicktour#model-arguments dtype: "bfloat16" compile: true merged_weights: # Ignore this section if you are not using PEFT models From caae117d3261cef8b393ed14c3aa2d94b268263b Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Tue, 31 Dec 2024 15:15:08 +0530 Subject: [PATCH 06/11] Re-write comments as per review-2 Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com> --- examples/model_configs/quantized_model.yaml | 2 +- examples/model_configs/serverless_model.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/model_configs/quantized_model.yaml b/examples/model_configs/quantized_model.yaml index 0647379f9..47cf4a089 100644 --- a/examples/model_configs/quantized_model.yaml +++ b/examples/model_configs/quantized_model.yaml @@ -3,7 +3,7 @@ model: model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True. To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/main/en/quicktour#model-arguments . dtype: "4bit" # Specifying the model to be loaded in 4 bit uses BitsAndBytesConfig. The other option is to use "8bit" quantization. compile: true - merged_weights: # Ignore this section if you are not using PEFT models . + merged_weights: # Ignore this section if you are not using PEFT models. delta_weights: false # set to True of your model should be merged with a base model, also need to provide the base model name adapter_weights: false # set to True of your model has been trained with peft, also need to provide the base model name base_model: null # path to the base_model - needs to be specified only if delta_weights or adapter_weights is set to True diff --git a/examples/model_configs/serverless_model.yaml b/examples/model_configs/serverless_model.yaml index 214b43319..3c1250644 100644 --- a/examples/model_configs/serverless_model.yaml +++ b/examples/model_configs/serverless_model.yaml @@ -1,3 +1,3 @@ model: base_params: - model_name: "meta-llama/Llama-3.1-8B-Instruct" #Qwen/Qwen2.5-14B" #Qwen/Qwen2.5-7B". #To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/package_reference/models#endpoints-based-models . + model_name: "meta-llama/Llama-3.1-8B-Instruct" #Qwen/Qwen2.5-14B" #Qwen/Qwen2.5-7B". #To see the full list of parameters, please see here: https://huggingface.co/docs/lighteval/package_reference/models#endpoints-based-models From 095f2772ab0aa67372fc1d8b4ebecd7009ccf48d Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Tue, 31 Dec 2024 10:45:47 +0000 Subject: [PATCH 07/11] Make style. --- .../models/endpoints/endpoint_model.py | 4 +-- .../models/endpoints/openai_model.py | 6 ++-- src/lighteval/models/endpoints/tgi_model.py | 4 +-- .../models/transformers/adapter_model.py | 28 +++++++++++++------ src/lighteval/models/vllm/vllm_model.py | 2 +- 5 files changed, 28 insertions(+), 16 deletions(-) diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 4f2e88ab8..2f75d4412 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -91,11 +91,11 @@ def from_path(cls, path: str) -> "ServerlessEndpointModelConfig": @dataclass class InferenceEndpointModelConfig: """ -This class defines the settings used to deploy inference endpoints automatically. (Inference endpoint docs: https://huggingface.co/docs/inference-endpoints/index) +This class defines the settings used to deploy inference endpoints automatically. (Inference endpoint docs: https://huggingface.co/docs/inference-endpoints/index) Attributes: endpoint_name (str, Optional): Inference endpoint name (to use at creation or reuse) - model_name (str, optional): The name of the model for inference. + model_name (str, optional): The name of the model for inference. reuse_existing (bool, default: False): Indicates whether to reuse an existing endpoint. accelerator (str, default: "gpu"): Specifies the type of hardware accelerator. model_dtype (str, optional): The data type used by the model. Defaults to the framework's choice if None. diff --git a/src/lighteval/models/endpoints/openai_model.py b/src/lighteval/models/endpoints/openai_model.py index d1c9f5f41..6fc502e5a 100644 --- a/src/lighteval/models/endpoints/openai_model.py +++ b/src/lighteval/models/endpoints/openai_model.py @@ -62,8 +62,8 @@ @dataclass class OpenAIModelConfig: """ - Configuration class to create an [[OpenAIModel]], to call via its API at inference for evaluation. - + Configuration class to create an [[OpenAIModel]], to call via its API at inference for evaluation. + Attributes: model: name or identifier of the OpenAI model to be used for inference. """ @@ -240,7 +240,7 @@ def _loglikelihood_tokens( ), "Only single token continuations are supported when using openai API." for i in range(len(dataset)): - logit_bias = {tok: 100 for tok in dataset[i].tokenized_continuation} + logit_bias = dict.fromkeys(dataset[i].tokenized_continuation, 100) logit_biass.append(logit_bias) outputs = self.__call_api_parallel( diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index 4f5118f39..66895d81d 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -48,10 +48,10 @@ def divide_chunks(array, n): @dataclass class TGIModelConfig: """ - This class provides a streamlined configuration for integrating with Text Generation Inference (TGI) endpoints. + Provides a streamlined configuration for integrating with Text Generation Inference (TGI) endpoints. To know more, please click here: https://huggingface.co/docs/text-generation-inference/index Attributes: - inference_server_address (str, required): The endpoint address of the inference server hosting the model. + inference_server_address (str, required):Endpoint address of the inference server hosting the model. inference_server_auth (str, required): Authentication credentials or tokens required to access the server. model_id (str, required): Identifier for the model hosted on the inference server. """ diff --git a/src/lighteval/models/transformers/adapter_model.py b/src/lighteval/models/transformers/adapter_model.py index af69ed72b..2551d5346 100644 --- a/src/lighteval/models/transformers/adapter_model.py +++ b/src/lighteval/models/transformers/adapter_model.py @@ -42,7 +42,7 @@ @dataclass class AdapterModelConfig(BaseModelConfig): """ - This class is used to manage the configuration of adapter models. Adapter models are designed to extend or adapt a + Manages the configuration of adapter models. Adapter models are designed to extend or adapt a base model's functionality for specific tasks while keeping most of the base model's parameters frozen. """ # Adapter models have the specificity that they look at the base model (= the parent) for the tokenizer and config @@ -58,20 +58,32 @@ def __post_init__(self): return super().__post_init__() def init_configs(self, env_config: EnvConfig): + """ + Initializes the configurations of adapter models. + + Args: + env_configs(EnvConfig): An instance of EnvConfig. + + Returns: + Any: + """ return self._init_configs(self.base_model, env_config) class AdapterModel(BaseModel): """ - This class is designed to integrate adapter models with a pre-trained base model. + Integrates the adapter models with a pre-trained base model. + + Args: + """ def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConfig) -> PreTrainedTokenizer: """ Creates and configures the adapter model by applying adapter weights to the base model. Args: - config(AdapterModelConfig): An instance of AdapterModelConfig. - env_config(EnvConfig): An instance of EnvConfig. + config(AdapterModelConfig): An instance of AdapterModelConfig. + env_config(EnvConfig): An instance of EnvConfig. Returns: PreTrainedTokenizer """ @@ -89,12 +101,12 @@ def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConf def _create_auto_model(self, config: AdapterModelConfig, env_config: EnvConfig) -> AutoModelForCausalLM: """ It returns a PeftModel from a base model and a version fined tuned using PEFT. - + Args: - config(AdapterModelConfig): An instance of AdapterModelConfig. - env_config(EnvConfig): An instance of EnvConfig. + config(AdapterModelConfig): An instance of AdapterModelConfig. + env_config(EnvConfig): An instance of EnvConfig. - Returns: AutoModelForCasualLM + Returns: AutoModelForCasualLM """ torch_dtype = _get_dtype(config.dtype, self._config) config.model_parallel, max_memory, device_map = self.init_model_parallel(config.model_parallel) diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index e95bb380a..befa1cda9 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -69,7 +69,7 @@ @dataclass class VLLMModelConfig: """ - This class defines the configuration parameters for deploying and running models using the vLLM framework. + This class defines the configuration parameters for deploying and running models using the vLLM framework. Attributes: pretrained (str, required): The identifier for the pretrained model (e.g., model name or path). From e3f6f1357481788ff5caf483940e1703756700ee Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Thu, 9 Jan 2025 16:12:03 +0000 Subject: [PATCH 08/11] make style. --- src/lighteval/models/endpoints/openai_model.py | 1 + src/lighteval/models/endpoints/tgi_model.py | 1 + src/lighteval/models/transformers/adapter_model.py | 2 ++ src/lighteval/models/transformers/delta_model.py | 1 + 4 files changed, 5 insertions(+) diff --git a/src/lighteval/models/endpoints/openai_model.py b/src/lighteval/models/endpoints/openai_model.py index 6fc502e5a..1645260fa 100644 --- a/src/lighteval/models/endpoints/openai_model.py +++ b/src/lighteval/models/endpoints/openai_model.py @@ -67,6 +67,7 @@ class OpenAIModelConfig: Attributes: model: name or identifier of the OpenAI model to be used for inference. """ + model: str diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index 66895d81d..c15c12df4 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -55,6 +55,7 @@ class TGIModelConfig: inference_server_auth (str, required): Authentication credentials or tokens required to access the server. model_id (str, required): Identifier for the model hosted on the inference server. """ + inference_server_address: str inference_server_auth: str model_id: str diff --git a/src/lighteval/models/transformers/adapter_model.py b/src/lighteval/models/transformers/adapter_model.py index 2551d5346..211e842bb 100644 --- a/src/lighteval/models/transformers/adapter_model.py +++ b/src/lighteval/models/transformers/adapter_model.py @@ -45,6 +45,7 @@ class AdapterModelConfig(BaseModelConfig): Manages the configuration of adapter models. Adapter models are designed to extend or adapt a base model's functionality for specific tasks while keeping most of the base model's parameters frozen. """ + # Adapter models have the specificity that they look at the base model (= the parent) for the tokenizer and config base_model: str = None @@ -77,6 +78,7 @@ class AdapterModel(BaseModel): Args: """ + def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConfig) -> PreTrainedTokenizer: """ Creates and configures the adapter model by applying adapter weights to the base model. diff --git a/src/lighteval/models/transformers/delta_model.py b/src/lighteval/models/transformers/delta_model.py index 2ab675cb2..e002e7d67 100644 --- a/src/lighteval/models/transformers/delta_model.py +++ b/src/lighteval/models/transformers/delta_model.py @@ -41,6 +41,7 @@ class DeltaModelConfig(BaseModelConfig): """ This class is used to manage the configuration class for delta models. """ + # Delta models look at the pretrained (= the delta weights) for the tokenizer and model config base_model: str = None From 79a3e35f4366d0ecc4c68063a337f61b32f0ecf8 Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Thu, 9 Jan 2025 16:18:32 +0000 Subject: [PATCH 09/11] styling improvements. --- examples/model_configs/quantized_model.yaml | 2 +- .../models/endpoints/endpoint_model.py | 39 ++++++++++--------- src/lighteval/models/vllm/vllm_model.py | 1 + 3 files changed, 22 insertions(+), 20 deletions(-) diff --git a/examples/model_configs/quantized_model.yaml b/examples/model_configs/quantized_model.yaml index 47cf4a089..30e4786f1 100644 --- a/examples/model_configs/quantized_model.yaml +++ b/examples/model_configs/quantized_model.yaml @@ -6,6 +6,6 @@ model: merged_weights: # Ignore this section if you are not using PEFT models. delta_weights: false # set to True of your model should be merged with a base model, also need to provide the base model name adapter_weights: false # set to True of your model has been trained with peft, also need to provide the base model name - base_model: null # path to the base_model - needs to be specified only if delta_weights or adapter_weights is set to True + base_model: null # path to the base_model - needs to be specified only if delta_weights or adapter_weights is set to True generation: multichoice_continuations_start_space: null # If true/false, will force multiple choice continuations to start/not start with a space. If none, will do nothing diff --git a/src/lighteval/models/endpoints/endpoint_model.py b/src/lighteval/models/endpoints/endpoint_model.py index 2f75d4412..435ca0747 100644 --- a/src/lighteval/models/endpoints/endpoint_model.py +++ b/src/lighteval/models/endpoints/endpoint_model.py @@ -91,26 +91,27 @@ def from_path(cls, path: str) -> "ServerlessEndpointModelConfig": @dataclass class InferenceEndpointModelConfig: """ -This class defines the settings used to deploy inference endpoints automatically. (Inference endpoint docs: https://huggingface.co/docs/inference-endpoints/index) - - Attributes: - endpoint_name (str, Optional): Inference endpoint name (to use at creation or reuse) - model_name (str, optional): The name of the model for inference. - reuse_existing (bool, default: False): Indicates whether to reuse an existing endpoint. - accelerator (str, default: "gpu"): Specifies the type of hardware accelerator. - model_dtype (str, optional): The data type used by the model. Defaults to the framework's choice if None. - vendor (str, default: "aws"): Cloud service provider for hosting the endpoint. - region (str, default: "us-east-1"): Cloud region, chosen based on hardware availability. - instance_size (str, optional): Specifies the size of the instance (e.g., large, xlarge). - instance_type (str, optional): Specifies the type of the instance (e.g., g5.4xlarge). - framework (str, default: "pytorch"): Framework used for inference (e.g., pytorch, tensorflow). - endpoint_type (str, default: "protected"): Security level of the endpoint (e.g., public, protected). - add_special_tokens (bool, default: True): Specifies if special tokens should be added during processing. - revision (str, default: "main"): The Git branch or commit hash of the model. - namespace (str, optional): The namespace under which the endpoint is launched. - image_url (str, optional): Docker image URL for the endpoint. - env_vars (dict, optional): Environment variables for the endpoint. + This class defines the settings used to deploy inference endpoints automatically. (Inference endpoint docs: https://huggingface.co/docs/inference-endpoints/index) + + Attributes: + endpoint_name (str, Optional): Inference endpoint name (to use at creation or reuse) + model_name (str, optional): The name of the model for inference. + reuse_existing (bool, default: False): Indicates whether to reuse an existing endpoint. + accelerator (str, default: "gpu"): Specifies the type of hardware accelerator. + model_dtype (str, optional): The data type used by the model. Defaults to the framework's choice if None. + vendor (str, default: "aws"): Cloud service provider for hosting the endpoint. + region (str, default: "us-east-1"): Cloud region, chosen based on hardware availability. + instance_size (str, optional): Specifies the size of the instance (e.g., large, xlarge). + instance_type (str, optional): Specifies the type of the instance (e.g., g5.4xlarge). + framework (str, default: "pytorch"): Framework used for inference (e.g., pytorch, tensorflow). + endpoint_type (str, default: "protected"): Security level of the endpoint (e.g., public, protected). + add_special_tokens (bool, default: True): Specifies if special tokens should be added during processing. + revision (str, default: "main"): The Git branch or commit hash of the model. + namespace (str, optional): The namespace under which the endpoint is launched. + image_url (str, optional): Docker image URL for the endpoint. + env_vars (dict, optional): Environment variables for the endpoint. """ + endpoint_name: str = None model_name: str = None reuse_existing: bool = False diff --git a/src/lighteval/models/vllm/vllm_model.py b/src/lighteval/models/vllm/vllm_model.py index befa1cda9..1eea6aacd 100644 --- a/src/lighteval/models/vllm/vllm_model.py +++ b/src/lighteval/models/vllm/vllm_model.py @@ -90,6 +90,7 @@ class VLLMModelConfig: subfolder (Optional[str], optional): Path to a specific subfolder in the model repository, if applicable. temperature (float, default: 0.6): Sampling temperature for stochastic tasks. Ignored for deterministic tasks (set internally to 0). """ + pretrained: str gpu_memory_utilisation: float = 0.9 # lower this if you are running out of memory revision: str = "main" # revision of the model From 4f7ee64032ca9ac7aebf607a7d1e914a0149566c Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Sat, 11 Jan 2025 14:17:34 +0000 Subject: [PATCH 10/11] Update Docstrings and fix formatting. --- src/lighteval/models/endpoints/tgi_model.py | 2 +- src/lighteval/models/transformers/adapter_model.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/lighteval/models/endpoints/tgi_model.py b/src/lighteval/models/endpoints/tgi_model.py index c15c12df4..1c4a82bec 100644 --- a/src/lighteval/models/endpoints/tgi_model.py +++ b/src/lighteval/models/endpoints/tgi_model.py @@ -51,7 +51,7 @@ class TGIModelConfig: Provides a streamlined configuration for integrating with Text Generation Inference (TGI) endpoints. To know more, please click here: https://huggingface.co/docs/text-generation-inference/index Attributes: - inference_server_address (str, required):Endpoint address of the inference server hosting the model. + inference_server_address (str, required):Endpoint address of the inference server hosting the model. inference_server_auth (str, required): Authentication credentials or tokens required to access the server. model_id (str, required): Identifier for the model hosted on the inference server. """ diff --git a/src/lighteval/models/transformers/adapter_model.py b/src/lighteval/models/transformers/adapter_model.py index 211e842bb..0cc55d9d0 100644 --- a/src/lighteval/models/transformers/adapter_model.py +++ b/src/lighteval/models/transformers/adapter_model.py @@ -102,7 +102,7 @@ def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConf def _create_auto_model(self, config: AdapterModelConfig, env_config: EnvConfig) -> AutoModelForCausalLM: """ - It returns a PeftModel from a base model and a version fined tuned using PEFT. + Returns a PeftModel from a base model and a version fined tuned using PEFT. Args: config(AdapterModelConfig): An instance of AdapterModelConfig. From 5d225b39b75b28b27fe20aa5e5b7c40206c2ec07 Mon Sep 17 00:00:00 2001 From: Parag Ekbote Date: Sat, 11 Jan 2025 15:42:05 +0000 Subject: [PATCH 11/11] Update docstrings to files. --- .../models/endpoints/openai_model.py | 32 +++++++++++++++++-- .../models/transformers/adapter_model.py | 9 +++--- 2 files changed, 34 insertions(+), 7 deletions(-) diff --git a/src/lighteval/models/endpoints/openai_model.py b/src/lighteval/models/endpoints/openai_model.py index e9c459afa..2d8905140 100644 --- a/src/lighteval/models/endpoints/openai_model.py +++ b/src/lighteval/models/endpoints/openai_model.py @@ -66,18 +66,44 @@ class OpenAIModelConfig: Configuration class to create an [[OpenAIModel]], to call via its API at inference for evaluation. Attributes: - model: name or identifier of the OpenAI model to be used for inference. + model (str): name or identifier of the OpenAI model to be used for inference. + generation_parameters(None,GenerationParameters): Parameters for model generation. If not + provided, defaults to a new instance + of `GenerationParameters`. """ model: str generation_parameters: GenerationParameters = None def __post_init__(self): + """ + + Post-initialization that ensures the `generation_parameters` is set + to a valid `GenerationParameters`. If not provided, initializes a default one. + """ if not self.generation_parameters: self.generation_parameters = GenerationParameters() @classmethod def from_path(cls, path: str) -> "OpenAIModelConfig": + """ + Creates an instance of `OpenAIModelConfig` from a YAML configuration file. + + Loads the model configuration from a given file path and initializes the + `OpenAIModelConfig` with the model name and corresponding `GenerationParameters` parsed + from the file. + + Args: + path (str): Path to the YAML configuration file containing the model configuration. + + Returns: + OpenAIModelConfig: An instance of `OpenAIModelConfig` with the configuration loaded + from the specified YAML file. + + Raises: + FileNotFoundError: If the specified file path does not exist. + KeyError: If required keys are missing in the YAML configuration file. + """ import yaml with open(path, "r") as f: @@ -169,11 +195,11 @@ def greedy_until( Generates responses using a greedy decoding strategy until certain ending conditions are met. Args: - requests (list[Request]): list of requests containing the context and ending conditions. + requests (list[GreedyUntilRequest]): list of requests containing the context and ending conditions. override_bs (int, optional): Override the batch size for generation. Defaults to None. Returns: - list[GenerativeResponse]: list of generated responses. + list [GenerativeResponse]: list of generated responses. """ for request in requests: request.tokenized_context = self.tok_encode(request.context) diff --git a/src/lighteval/models/transformers/adapter_model.py b/src/lighteval/models/transformers/adapter_model.py index 0cc55d9d0..c39562cc2 100644 --- a/src/lighteval/models/transformers/adapter_model.py +++ b/src/lighteval/models/transformers/adapter_model.py @@ -44,6 +44,10 @@ class AdapterModelConfig(BaseModelConfig): """ Manages the configuration of adapter models. Adapter models are designed to extend or adapt a base model's functionality for specific tasks while keeping most of the base model's parameters frozen. + + Attributes: + base_model (str): The name of the parent base model. This model provides the tokenizer and configuration for the adapter model. + Defaults to None if not specified. """ # Adapter models have the specificity that they look at the base model (= the parent) for the tokenizer and config @@ -66,7 +70,7 @@ def init_configs(self, env_config: EnvConfig): env_configs(EnvConfig): An instance of EnvConfig. Returns: - Any: + Any: Result of the configuration initialization. """ return self._init_configs(self.base_model, env_config) @@ -74,9 +78,6 @@ def init_configs(self, env_config: EnvConfig): class AdapterModel(BaseModel): """ Integrates the adapter models with a pre-trained base model. - - Args: - """ def _create_auto_tokenizer(self, config: AdapterModelConfig, env_config: EnvConfig) -> PreTrainedTokenizer: