Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Initial proposal for model lazy loading #497

Draft
wants to merge 8 commits into
base: main
Choose a base branch
from
40 changes: 38 additions & 2 deletions src/lighteval/logging/evaluation_tracker.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,45 @@ def save_results(self, date_id: str, results_dict: dict):
with self.fs.open(output_results_file, "w") as f:
f.write(json.dumps(results_dict, cls=EnhancedJSONEncoder, indent=2, ensure_ascii=False))

def save_details(self, date_id: str, details_datasets: dict[str, Dataset]):
def _get_details_sub_folder(self, date_id: str):
output_dir_details = Path(self.output_dir) / "details" / self.general_config_logger.model_name
output_dir_details_sub_folder = output_dir_details / date_id
if date_id == "latest":
# Get all folders in output_dir_details
if not self.fs.exists(output_dir_details):
raise FileNotFoundError(f"Details directory {output_dir_details} does not exist")

# List all folders and filter out files
folders = [f["name"] for f in self.fs.listdir(output_dir_details) if f["type"] == "directory"]

if not folders:
raise FileNotFoundError(f"No timestamp folders found in {output_dir_details}")

# Parse timestamps and get latest
date_id = max(folders)
return output_dir_details / date_id

def load_details_datasets(self, date_id: str, task_names: list[str]) -> dict[str, Dataset]:
output_dir_details_sub_folder = self._get_details_sub_folder(date_id)
logger.info(f"Loading details from {output_dir_details_sub_folder}")
date_id = output_dir_details_sub_folder.name # Overwrite date_id in case of latest
details_datasets = {}
for file in self.fs.glob(str(output_dir_details_sub_folder / f"details_*_{date_id}.parquet")):
task_name = Path(file).stem.replace("details_", "").replace(f"_{date_id}", "")
if "|".join(task_name.split("|")[:-1]) not in task_names:
logger.info(f"Skipping {task_name} because it is not in the task_names list")
continue
dataset = load_dataset("parquet", data_files=file, split="train")
details_datasets[task_name] = dataset

for task_name in task_names:
if not any(task_name.startswith(task_name) for task_name in details_datasets.keys()):
raise ValueError(
f"Task {task_name} not found in details datasets. Check the tasks to be evaluated or the date_id used to load the details ({date_id})."
)
return details_datasets

def save_details(self, date_id: str, details_datasets: dict[str, Dataset]):
output_dir_details_sub_folder = self._get_details_sub_folder(date_id)
self.fs.mkdirs(output_dir_details_sub_folder, exist_ok=True)
logger.info(f"Saving details to {output_dir_details_sub_folder}")
for task_name, dataset in details_datasets.items():
Expand Down
4 changes: 4 additions & 0 deletions src/lighteval/main_accelerate.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ def accelerate( # noqa C901
num_fewshot_seeds: Annotated[
int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
] = 1,
load_responses_from_details_date_id: Annotated[
Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
] = None,
# === saving ===
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
Expand Down Expand Up @@ -137,6 +140,7 @@ def accelerate( # noqa C901
max_samples=max_samples,
use_chat_template=use_chat_template,
system_prompt=system_prompt,
load_responses_from_details_date_id=load_responses_from_details_date_id,
)

# TODO (nathan): better handling of model_args
Expand Down
12 changes: 12 additions & 0 deletions src/lighteval/main_endpoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -179,6 +179,9 @@ def inference_endpoint(
num_fewshot_seeds: Annotated[
int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
] = 1,
load_responses_from_details_date_id: Annotated[
Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
] = None,
# === saving ===
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
Expand Down Expand Up @@ -247,6 +250,7 @@ def inference_endpoint(
max_samples=max_samples,
use_chat_template=use_chat_template,
system_prompt=system_prompt,
load_responses_from_details_date_id=load_responses_from_details_date_id,
)
pipeline = Pipeline(
tasks=tasks,
Expand Down Expand Up @@ -292,6 +296,9 @@ def tgi(
num_fewshot_seeds: Annotated[
int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
] = 1,
load_responses_from_details_date_id: Annotated[
Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
] = None,
# === saving ===
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
Expand Down Expand Up @@ -355,6 +362,7 @@ def tgi(
max_samples=max_samples,
use_chat_template=use_chat_template,
system_prompt=system_prompt,
load_responses_from_details_date_id=load_responses_from_details_date_id,
)
pipeline = Pipeline(
tasks=tasks,
Expand Down Expand Up @@ -400,6 +408,9 @@ def litellm(
num_fewshot_seeds: Annotated[
int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
] = 1,
load_responses_from_details_date_id: Annotated[
Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
] = None,
# === saving ===
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
Expand Down Expand Up @@ -464,6 +475,7 @@ def litellm(
max_samples=max_samples,
use_chat_template=use_chat_template,
system_prompt=system_prompt,
load_responses_from_details_date_id=load_responses_from_details_date_id,
)
pipeline = Pipeline(
tasks=tasks,
Expand Down
4 changes: 4 additions & 0 deletions src/lighteval/main_vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@ def vllm(
num_fewshot_seeds: Annotated[
int, Option(help="Number of seeds to use for few-shot evaluation.", rich_help_panel=HELP_PANEL_NAME_1)
] = 1,
load_responses_from_details_date_id: Annotated[
Optional[str], Option(help="Load responses from details directory.", rich_help_panel=HELP_PANEL_NAME_1)
] = None,
# === saving ===
output_dir: Annotated[
str, Option(help="Output directory for evaluation results.", rich_help_panel=HELP_PANEL_NAME_2)
Expand Down Expand Up @@ -124,6 +127,7 @@ def vllm(
max_samples=max_samples,
use_chat_template=use_chat_template,
system_prompt=system_prompt,
load_responses_from_details_date_id=load_responses_from_details_date_id,
)

if model_args.endswith(".yaml"):
Expand Down
Loading