Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Might Need Disscusion] Refine Time Limit #143

Merged
merged 6 commits into from
Nov 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 60 additions & 21 deletions src/autogluon/assistant/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,9 @@
import os
import subprocess
import sys
import time
from contextlib import contextmanager
from dataclasses import dataclass
from importlib.metadata import PackageNotFoundError, version
from pathlib import Path
from typing import List, Optional
Expand Down Expand Up @@ -129,6 +132,34 @@ def launch_ui(port: int = typer.Option(8501, help="Port to run the UI on")):
sys.exit(1)


@dataclass
class TimingContext:
start_time: float
total_time_limit: float

@property
def time_elapsed(self) -> float:
return time.time() - self.start_time

@property
def time_remaining(self) -> float:
return self.total_time_limit - self.time_elapsed


@contextmanager
def time_block(description: str, timer: TimingContext):
"""Context manager for timing code blocks and logging the duration."""
start_time = time.time()
try:
yield
finally:
duration = time.time() - start_time
logging.info(
f"It took {duration:.2f} seconds {description}. "
f"Time remaining: {timer.time_remaining:.2f}/{timer.total_time_limit:.2f}"
)


def run_assistant(
task_path: Annotated[str, typer.Argument(help="Directory where task files are included")],
presets: Annotated[
Expand All @@ -149,6 +180,8 @@ def run_assistant(
] = None,
output_filename: Annotated[Optional[str], typer.Option(help="Output File")] = "",
) -> str:
start_time = time.time()

logging.info("Starting AutoGluon-Assistant")

if presets is None or presets not in PRESETS:
Expand All @@ -165,39 +198,45 @@ def run_assistant(
logging.error(f"Failed to load config: {e}")
raise

rprint("🤖 [bold red] Welcome to AutoGluon-Assistant [/bold red]")
timer = TimingContext(start_time=start_time, total_time_limit=config.time_limit)
with time_block("initializing components", timer):
rprint("🤖 [bold red] Welcome to AutoGluon-Assistant [/bold red]")

rprint("Will use task config:")
rprint(OmegaConf.to_container(config))

rprint("Will use task config:")
rprint(OmegaConf.to_container(config))
task_path = Path(task_path).resolve()
assert task_path.is_dir(), "Task path does not exist, please provide a valid directory."
rprint(f"Task path: {task_path}")

task_path = Path(task_path).resolve()
assert task_path.is_dir(), "Task path does not exist, please provide a valid directory."
rprint(f"Task path: {task_path}")
task = TabularPredictionTask.from_path(task_path)

task = TabularPredictionTask.from_path(task_path)
rprint("[green]Task loaded![/green]")
rprint(task)

rprint("[green]Task loaded![/green]")
rprint(task)
assistant = TabularPredictionAssistant(config)

assistant = TabularPredictionAssistant(config)
task = assistant.preprocess_task(task)
with time_block("preprocessing task", timer):
task = assistant.preprocess_task(task)

rprint("Model training starts...")
with time_block("training model", timer):
rprint("Model training starts...")

assistant.fit_predictor(task)
assistant.fit_predictor(task, time_limit=timer.time_remaining)

rprint("[green]Model training complete![/green]")
rprint("[green]Model training complete![/green]")

rprint("Prediction starts...")
with time_block("making predictions", timer):
rprint("Prediction starts...")

predictions = assistant.predict(task)
predictions = assistant.predict(task)

if not output_filename:
output_filename = f"aga-output-{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
with open(output_filename, "w") as fp:
make_prediction_outputs(task, predictions).to_csv(fp, index=False)
if not output_filename:
output_filename = f"aga-output-{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
with open(output_filename, "w") as fp:
make_prediction_outputs(task, predictions).to_csv(fp, index=False)

rprint(f"[green]Prediction complete! Outputs written to {output_filename}[/green]")
rprint(f"[green]Prediction complete! Outputs written to {output_filename}[/green]")

if config.save_artifacts.enabled:
# Determine the artifacts_dir with or without timestamp
Expand Down
4 changes: 2 additions & 2 deletions src/autogluon/assistant/assistant.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,9 +126,9 @@ def preprocess_task(self, task: TabularPredictionTask) -> TabularPredictionTask:
logger.info("Automatic feature generation is disabled. ")
return task

def fit_predictor(self, task: TabularPredictionTask):
def fit_predictor(self, task: TabularPredictionTask, time_limit: float):
try:
self.predictor.fit(task)
self.predictor.fit(task, time_limit=time_limit)
except Exception as e:
self.handle_exception("Predictor Fit", e)

Expand Down
2 changes: 1 addition & 1 deletion src/autogluon/assistant/configs/default.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
infer_eval_metric: True
detect_and_drop_id_column: False
task_preprocessors_timeout: 3600
time_limit: 14400
save_artifacts:
enabled: False
append_timestamp: True
Expand All @@ -26,7 +27,6 @@ autogluon:
predictor_init_kwargs: {}
predictor_fit_kwargs:
presets: best_quality
time_limit: 14400
llm:
# Note: bedrock is only supported in limited AWS regions
# and requires AWS credentials
Expand Down
2 changes: 1 addition & 1 deletion src/autogluon/assistant/configs/high_quality.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
time_limit: 3600
autogluon:
predictor_fit_kwargs:
presets: high_quality
time_limit: 3600
2 changes: 1 addition & 1 deletion src/autogluon/assistant/configs/medium_quality.yaml
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
time_limit: 600
autogluon:
predictor_fit_kwargs:
presets: medium_quality
time_limit: 600
11 changes: 7 additions & 4 deletions src/autogluon/assistant/predictor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

import logging
from collections import defaultdict
from typing import Any, Dict
from typing import Any, Dict, Optional

import numpy as np
from autogluon.common.features.feature_metadata import FeatureMetadata
Expand All @@ -29,7 +29,7 @@ def rmsle_func(y_true, y_pred, **kwargs):


class Predictor:
def fit(self, task: TabularPredictionTask) -> "Predictor":
def fit(self, task: TabularPredictionTask, time_limit: Optional[float] = None) -> "Predictor":
return self

def predict(self, task: TabularPredictionTask) -> Any:
Expand Down Expand Up @@ -57,7 +57,7 @@ def save_dataset_details(self, task: TabularPredictionTask) -> None:
def describe(self) -> Dict[str, Any]:
return dict(self.metadata)

def fit(self, task: TabularPredictionTask) -> "AutogluonTabularPredictor":
def fit(self, task: TabularPredictionTask, time_limit: Optional[float] = None) -> "AutogluonTabularPredictor":
"""Trains an AutoGluon TabularPredictor with parsed arguments. Saves trained predictor to
`self.predictor`.

Expand All @@ -78,6 +78,7 @@ def fit(self, task: TabularPredictionTask) -> "AutogluonTabularPredictor":
**self.config.predictor_init_kwargs,
}
predictor_fit_kwargs = self.config.predictor_fit_kwargs.copy()
predictor_fit_kwargs.pop("time_limit", None)

logger.info("Fitting AutoGluon TabularPredictor")
logger.info(f"predictor_init_kwargs: {predictor_init_kwargs}")
Expand All @@ -88,7 +89,9 @@ def fit(self, task: TabularPredictionTask) -> "AutogluonTabularPredictor":
"predictor_fit_kwargs": predictor_fit_kwargs,
}
self.save_dataset_details(task)
self.predictor = TabularPredictor(**predictor_init_kwargs).fit(task.train_data, **predictor_fit_kwargs)
self.predictor = TabularPredictor(**predictor_init_kwargs).fit(
task.train_data, **predictor_fit_kwargs, time_limit=time_limit
)

self.metadata["leaderboard"] = self.predictor.leaderboard().to_dict()
return self
Expand Down
Loading