diff --git a/src/lighteval/evaluator.py b/src/lighteval/evaluator.py index c547bc1ad..0b09ff23f 100644 --- a/src/lighteval/evaluator.py +++ b/src/lighteval/evaluator.py @@ -5,6 +5,8 @@ import copy from typing import Dict, Union +from pytablewriter import LatexTableWriter, MarkdownTableWriter + from lighteval.logging.evaluation_tracker import EvaluationTracker from lighteval.logging.hierarchical_logger import hlog from lighteval.models.base_model import BaseModel @@ -99,8 +101,6 @@ def evaluate( # noqa: C901 def make_results_table(result_dict): """Generate table of results.""" - from pytablewriter import LatexTableWriter, MarkdownTableWriter - md_writer = MarkdownTableWriter() latex_writer = LatexTableWriter() md_writer.headers = ["Task", "Version", "Metric", "Value", "", "Stderr"] diff --git a/src/lighteval/models/nanotron_model.py b/src/lighteval/models/nanotron_model.py index 51e682eb6..e6a3223d7 100644 --- a/src/lighteval/models/nanotron_model.py +++ b/src/lighteval/models/nanotron_model.py @@ -32,6 +32,7 @@ LoglikelihoodDataset, LoglikelihoodSingleTokenDataset, ) +from lighteval.models.base_model import LightevalModel from lighteval.models.model_output import Batch, GenerateReturn, LoglikelihoodReturn, LoglikelihoodSingleTokenReturn from lighteval.tasks.requests import ( GreedyUntilRequest, @@ -51,7 +52,7 @@ # _DeviceMapping = NewType("DeviceMapping", Mapping[str, Union[int, str, torch.device]]) -class NanotronLightevalModel: +class NanotronLightevalModel(LightevalModel): # Default max sequence length setting for when no `max_length` is provided # or no max length config setting is found in the model or tokenizer. _DEFAULT_MAX_LENGTH: int = 2048 @@ -77,7 +78,6 @@ def __init__( """Initializes a nanotron model for evaluation. Args: """ - super().__init__() self._batch_size = batch_size self._max_gen_toks = max_gen_toks @@ -117,12 +117,12 @@ def __init__( self.model_config.num_hidden_layers = 1 self._add_special_tokens = add_special_tokens - self.tokenizer = self._create_auto_tokenizer( + self._tokenizer = self._create_auto_tokenizer( pretrained=tokenizer.tokenizer_name_or_path, cache_dir=cache_dir, trust_remote_code=trust_remote_code, ) - self.tokenizer.model_max_length = self.max_length + self._tokenizer.model_max_length = self.max_length model_config_cls = self.model_config.__class__.__name__ if model_class is not None: @@ -197,6 +197,10 @@ def __init__( self.multichoice_continuations_start_space = multichoice_continuations_start_space + @property + def tokenizer(self): + return self._tokenizer + def _create_auto_tokenizer( self, *, diff --git a/tasks_examples/custom_tasks/lighteval_config_override_template.yaml b/tasks_examples/custom_tasks/lighteval_config_override_template.yaml index da9258ff9..6544a88af 100644 --- a/tasks_examples/custom_tasks/lighteval_config_override_template.yaml +++ b/tasks_examples/custom_tasks/lighteval_config_override_template.yaml @@ -12,7 +12,7 @@ lighteval: push_results_to_tensorboard: true tensorboard_metric_prefix: e parallelism: - dp: 8 + dp: 1 pp: 1 pp_engine: 1f1b recompute_granularity: null @@ -20,7 +20,7 @@ lighteval: tp_linear_async_communication: false tp_mode: ALL_REDUCE tasks: - custom_tasks_file: /fsx/thomwolf/github/lighteval-harness/tasks_examples/custom_evaluation_tasks.py + custom_tasks_file: /fsx/thomwolf/github/lighteval/tasks_examples/custom_tasks/custom_evaluation_tasks.py dataset_loading_processes: 8 max_samples: 1000 multichoice_continuations_start_space: null