Skip to content

Commit

Permalink
Homogeneize generation params (huggingface#428)
Browse files Browse the repository at this point in the history
This PR does 3 things:

    Provide an homogeneized API for people to use to provide model generation parameters in model configs. Those parameters are notably provided to all models which can take them (vllm, open ai, tgi, transformers, ...)
    Renames BaseModel to TransformersModel
    Also allows TransformersModels to use a transformers.GenerationConfig object directly, when created programmatically

I would put system_prompt, fewshot_seeds, and use_chat_template in the GenerationParameters too since they are generation parameters logically, but it can be another PR

---------

Co-authored-by: Nathan Habib <[email protected]>
Co-authored-by: Albert Villanova del Moral <[email protected]>
  • Loading branch information
3 people authored Jan 2, 2025
1 parent 24afde2 commit 5a28b22
Show file tree
Hide file tree
Showing 20 changed files with 404 additions and 113 deletions.
6 changes: 3 additions & 3 deletions docs/source/package_reference/models.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@


## Accelerate and Transformers Models
### BaseModel
[[autodoc]] models.transformers.base_model.BaseModelConfig
[[autodoc]] models.transformers.base_model.BaseModel
### TransformersModel
[[autodoc]] models.transformers.transformers_model.TransformersModelConfig
[[autodoc]] models.transformers.transformers_model.TransformersModel

### AdapterModel
[[autodoc]] models.transformers.adapter_model.AdapterModelConfig
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
model:
base_params:
model_args: "pretrained=HuggingFaceH4/zephyr-7b-beta,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ...
model_args: "pretrained=HuggingFaceTB/SmolLM-1.7B,revision=main" # pretrained=model_name,trust_remote_code=boolean,revision=revision_to_use,model_parallel=True ...
dtype: "bfloat16"
compile: true
merged_weights: # Ignore this section if you are not using PEFT models
Expand All @@ -9,3 +9,4 @@ model:
base_model: null # path to the base_model
generation:
multichoice_continuations_start_space: null # If true/false, will force multiple choice continuations to start/not start with a space. If none, will do nothing
temperature: 0.5
5 changes: 3 additions & 2 deletions src/lighteval/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
import logging
from logging.config import dictConfig
import logging.config

import colorlog
import typer
Expand Down Expand Up @@ -57,7 +57,8 @@
},
)

dictConfig(logging_config)
logging.config.dictConfig(logging_config)
logging.captureWarnings(capture=True)

app.command(rich_help_panel="Evaluation Backends")(lighteval.main_accelerate.accelerate)
app.command(rich_help_panel="Evaluation Utils")(lighteval.main_baseline.baseline)
Expand Down
11 changes: 7 additions & 4 deletions src/lighteval/main_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def accelerate( # noqa C901
model_args: Annotated[
str,
Argument(
help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/base_model.yaml)"
help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/transformers_model.yaml)"
),
],
tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
Expand Down Expand Up @@ -107,9 +107,10 @@ def accelerate( # noqa C901
from accelerate import Accelerator, InitProcessGroupKwargs

from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.model_input import GenerationParameters
from lighteval.models.transformers.adapter_model import AdapterModelConfig
from lighteval.models.transformers.base_model import BaseModelConfig, BitsAndBytesConfig
from lighteval.models.transformers.delta_model import DeltaModelConfig
from lighteval.models.transformers.transformers_model import BitsAndBytesConfig, TransformersModelConfig
from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters

accelerator = Accelerator(kwargs_handlers=[InitProcessGroupKwargs(timeout=timedelta(seconds=3000))])
Expand Down Expand Up @@ -154,6 +155,8 @@ def accelerate( # noqa C901
# We extract the model args
args_dict = {k.split("=")[0]: k.split("=")[1] for k in config["base_params"]["model_args"].split(",")}

args_dict["generation_parameters"] = GenerationParameters.from_dict(config)

# We store the relevant other args
args_dict["base_model"] = config["merged_weights"]["base_model"]
args_dict["compile"] = bool(config["base_params"]["compile"])
Expand All @@ -180,13 +183,13 @@ def accelerate( # noqa C901
elif config["merged_weights"]["base_model"] not in ["", None]:
raise ValueError("You can't specify a base model if you are not using delta/adapter weights")
else:
model_config = BaseModelConfig(**args_dict)
model_config = TransformersModelConfig(**args_dict)
else:
model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")}
model_args_dict["accelerator"] = accelerator
model_args_dict["use_chat_template"] = use_chat_template
model_args_dict["compile"] = bool(model_args_dict["compile"]) if "compile" in model_args_dict else False
model_config = BaseModelConfig(**model_args_dict)
model_config = TransformersModelConfig(**model_args_dict)

pipeline = Pipeline(
tasks=tasks,
Expand Down
15 changes: 10 additions & 5 deletions src/lighteval/main_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,11 @@
@app.command(rich_help_panel="Evaluation Backends")
def openai(
# === general ===
model_name: Annotated[
str, Argument(help="The model name to evaluate (has to be available through the openai API.")
model_args: Annotated[
str,
Argument(
help="Model name as a string (has to be available through the openai API) or path to yaml config file (see examples/model_configs/transformers_model.yaml)"
),
],
tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
# === Common parameters ===
Expand Down Expand Up @@ -96,6 +99,11 @@ def openai(
from lighteval.models.endpoints.openai_model import OpenAIModelConfig
from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters

if model_args.endswith(".yaml"):
model_config = OpenAIModelConfig.from_path(model_args)
else:
model_config = OpenAIModelConfig(model=model_args)

env_config = EnvConfig(token=TOKEN, cache_dir=cache_dir)
evaluation_tracker = EvaluationTracker(
output_dir=output_dir,
Expand All @@ -107,7 +115,6 @@ def openai(
)

parallelism_manager = ParallelismManager.OPENAI
model_config = OpenAIModelConfig(model=model_name)

pipeline_params = PipelineParameters(
launcher_type=parallelism_manager,
Expand Down Expand Up @@ -205,7 +212,6 @@ def inference_endpoint(
"""
Evaluate models using inference-endpoints as backend.
"""

from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.endpoints.endpoint_model import InferenceEndpointModelConfig, ServerlessEndpointModelConfig
from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
Expand Down Expand Up @@ -319,7 +325,6 @@ def tgi(
"""
Evaluate models using TGI as backend.
"""

from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.endpoints.tgi_model import TGIModelConfig
from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters
Expand Down
21 changes: 18 additions & 3 deletions src/lighteval/main_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,12 @@

def vllm(
# === general ===
model_args: Annotated[str, Argument(help="Model arguments in the form key1=value1,key2=value2,...")],
model_args: Annotated[
str,
Argument(
help="Model arguments in the form key1=value1,key2=value2,... or path to yaml config file (see examples/model_configs/transformers_model.yaml)"
),
],
tasks: Annotated[str, Argument(help="Comma-separated list of tasks to evaluate on.")],
# === Common parameters ===
use_chat_template: Annotated[
Expand Down Expand Up @@ -88,7 +93,10 @@ def vllm(
"""
Evaluate models using vllm as backend.
"""
import yaml

from lighteval.logging.evaluation_tracker import EvaluationTracker
from lighteval.models.model_input import GenerationParameters
from lighteval.models.vllm.vllm_model import VLLMModelConfig
from lighteval.pipeline import EnvConfig, ParallelismManager, Pipeline, PipelineParameters

Expand Down Expand Up @@ -118,8 +126,15 @@ def vllm(
system_prompt=system_prompt,
)

model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")}
model_config = VLLMModelConfig(**model_args_dict)
if model_args.endswith(".yaml"):
with open(model_args, "r") as f:
config = yaml.safe_load(f)["model"]
generation_parameters = GenerationParameters.from_dict(config)
model_config = VLLMModelConfig(config, generation_parameters=generation_parameters)

else:
model_args_dict: dict = {k.split("=")[0]: k.split("=")[1] if "=" in k else True for k in model_args.split(",")}
model_config = VLLMModelConfig(**model_args_dict)

pipeline = Pipeline(
tasks=tasks,
Expand Down
40 changes: 29 additions & 11 deletions src/lighteval/models/endpoints/endpoint_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import logging
import re
import time
from dataclasses import dataclass
from dataclasses import dataclass, replace
from typing import Coroutine, Dict, List, Optional, Union

import requests
Expand All @@ -35,6 +35,7 @@
InferenceEndpoint,
InferenceEndpointError,
InferenceEndpointTimeoutError,
TextGenerationInputGenerateParameters,
TextGenerationInputGrammarType,
TextGenerationOutput,
create_inference_endpoint,
Expand All @@ -48,6 +49,7 @@

from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset
from lighteval.models.abstract_model import LightevalModel, ModelInfo
from lighteval.models.model_input import GenerationParameters
from lighteval.models.model_output import GenerativeResponse, LoglikelihoodResponse, LoglikelihoodSingleTokenResponse
from lighteval.tasks.requests import (
GreedyUntilRequest,
Expand Down Expand Up @@ -78,6 +80,11 @@
class ServerlessEndpointModelConfig:
model_name: str
add_special_tokens: bool = True
generation_parameters: GenerationParameters = None

def __post_init__(self):
if not self.generation_parameters:
self.generation_parameters = GenerationParameters()

@classmethod
def from_path(cls, path: str) -> "ServerlessEndpointModelConfig":
Expand Down Expand Up @@ -106,6 +113,7 @@ class InferenceEndpointModelConfig:
namespace: str = None # The namespace under which to launch the endpoint. Defaults to the current user's namespace
image_url: str = None
env_vars: dict = None
generation_parameters: GenerationParameters = None

def __post_init__(self):
# xor operator, one is None but not the other
Expand All @@ -117,6 +125,9 @@ def __post_init__(self):
if not (self.endpoint_name is None) ^ int(self.model_name is None):
raise ValueError("You need to set either endpoint_name or model_name (but not both).")

if not self.generation_parameters:
self.generation_parameters = GenerationParameters()

@classmethod
def from_path(cls, path: str) -> "InferenceEndpointModelConfig":
"""Load configuration for inference endpoint model from YAML file path.
Expand Down Expand Up @@ -305,6 +316,8 @@ def __init__( # noqa: C901
model_dtype=getattr(config, "model_dtype", "default"),
model_size=-1,
)
self.generation_parameters = config.generation_parameters
self.generation_config = TextGenerationInputGenerateParameters(**self.generation_parameters.to_tgi_ie_dict())

@staticmethod
def get_larger_hardware_suggestion(cur_instance_type: str = None, cur_instance_size: str = None):
Expand Down Expand Up @@ -388,16 +401,17 @@ def _async_process_request(
) -> Coroutine[None, list[TextGenerationOutput], str]:
# Todo: add an option to launch with conversational instead for chat prompts
# https://huggingface.co/docs/huggingface_hub/v0.20.3/en/package_reference/inference_client#huggingface_hub.AsyncInferenceClient.conversational
generated_text = self.async_client.text_generation(
prompt=context,
generation_config: TextGenerationInputGenerateParameters = replace(
self.generation_config,
stop=stop_tokens,
max_new_tokens=max_tokens,
details=True,
decoder_input_details=True,
grammar=grammar,
max_new_tokens=max_tokens,
stop_sequences=stop_tokens,
# truncate=,
)

generated_text = self.async_client.text_generation(prompt=context, generation_config=generation_config)

return generated_text

def _process_request(
Expand All @@ -409,14 +423,18 @@ def _process_request(
) -> TextGenerationOutput:
# Todo: add an option to launch with conversational instead for chat prompts
# https://huggingface.co/docs/huggingface_hub/v0.20.3/en/package_reference/inference_client#huggingface_hub.AsyncInferenceClient.conversational
generated_text = self.client.text_generation(
prompt=context,
generation_config: TextGenerationInputGenerateParameters = replace(
self.generation_config,
stop=stop_tokens,
max_new_tokens=max_tokens,
details=True,
decoder_input_details=True,
grammar=grammar,
max_new_tokens=max_tokens,
stop_sequences=stop_tokens,
# truncate=,
)

generated_text = self.client.text_generation(
prompt=context,
generation_config=generation_config,
)

return generated_text
Expand Down
20 changes: 19 additions & 1 deletion src/lighteval/models/endpoints/openai_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
from lighteval.data import GenerativeTaskDataset, LoglikelihoodDataset
from lighteval.models.abstract_model import LightevalModel
from lighteval.models.endpoints.endpoint_model import ModelInfo
from lighteval.models.model_input import GenerationParameters
from lighteval.models.model_output import (
GenerativeResponse,
LoglikelihoodResponse,
Expand Down Expand Up @@ -62,14 +63,30 @@
@dataclass
class OpenAIModelConfig:
model: str
generation_parameters: GenerationParameters = None

def __post_init__(self):
if not self.generation_parameters:
self.generation_parameters = GenerationParameters()

@classmethod
def from_path(cls, path: str) -> "OpenAIModelConfig":
import yaml

with open(path, "r") as f:
config = yaml.safe_load(f)["model"]
generation_parameters = GenerationParameters.from_dict(config)
return cls(model=config["model_name"], generation_parameters=generation_parameters)


class OpenAIClient(LightevalModel):
_DEFAULT_MAX_LENGTH: int = 4096

def __init__(self, config, env_config) -> None:
def __init__(self, config: OpenAIModelConfig, env_config) -> None:
api_key = os.environ["OPENAI_API_KEY"]
self.client = OpenAI(api_key=api_key)
self.generation_parameters = config.generation_parameters
self.sampling_params = self.generation_parameters.to_vllm_openai_dict()

self.model_info = ModelInfo(
model_name=config.model,
Expand All @@ -96,6 +113,7 @@ def __call_api(self, prompt, return_logits, max_new_tokens, num_samples, logit_b
logprobs=return_logits,
logit_bias=logit_bias,
n=num_samples,
**self.sampling_params,
)
return response
except Exception as e:
Expand Down
Loading

0 comments on commit 5a28b22

Please sign in to comment.