autogluon · tonyhoo · Nov 16, 2024 · Nov 15, 2024 · Nov 15, 2024 · Nov 15, 2024
diff --git a/src/autogluon/assistant/__init__.py b/src/autogluon/assistant/__init__.py
@@ -3,6 +3,9 @@
 import os
 import subprocess
 import sys
+import time
+from contextlib import contextmanager
+from dataclasses import dataclass
 from importlib.metadata import PackageNotFoundError, version
 from pathlib import Path
 from typing import List, Optional
@@ -129,6 +132,34 @@ def launch_ui(port: int = typer.Option(8501, help="Port to run the UI on")):
         sys.exit(1)
 
 
+@dataclass
+class TimingContext:
+    start_time: float
+    total_time_limit: float
+
+    @property
+    def time_elapsed(self) -> float:
+        return time.time() - self.start_time
+
+    @property
+    def time_remaining(self) -> float:
+        return self.total_time_limit - self.time_elapsed
+
+
+@contextmanager
+def time_block(description: str, timer: TimingContext):
+    """Context manager for timing code blocks and logging the duration."""
+    start_time = time.time()
+    try:
+        yield
+    finally:
+        duration = time.time() - start_time
+        logging.info(
+            f"It took {duration:.2f} seconds {description}. "
+            f"Time remaining: {timer.time_remaining:.2f}/{timer.total_time_limit:.2f}"
+        )
+
+
 def run_assistant(
     task_path: Annotated[str, typer.Argument(help="Directory where task files are included")],
     presets: Annotated[
@@ -149,6 +180,8 @@ def run_assistant(
     ] = None,
     output_filename: Annotated[Optional[str], typer.Option(help="Output File")] = "",
 ) -> str:
+    start_time = time.time()
+
     logging.info("Starting AutoGluon-Assistant")
 
     if presets is None or presets not in PRESETS:
@@ -165,39 +198,45 @@ def run_assistant(
         logging.error(f"Failed to load config: {e}")
         raise
 
-    rprint("🤖 [bold red] Welcome to AutoGluon-Assistant [/bold red]")
+    timer = TimingContext(start_time=start_time, total_time_limit=config.time_limit)
+    with time_block("initializing components", timer):
+        rprint("🤖 [bold red] Welcome to AutoGluon-Assistant [/bold red]")
+
+        rprint("Will use task config:")
+        rprint(OmegaConf.to_container(config))
 
-    rprint("Will use task config:")
-    rprint(OmegaConf.to_container(config))
+        task_path = Path(task_path).resolve()
+        assert task_path.is_dir(), "Task path does not exist, please provide a valid directory."
+        rprint(f"Task path: {task_path}")
 
-    task_path = Path(task_path).resolve()
-    assert task_path.is_dir(), "Task path does not exist, please provide a valid directory."
-    rprint(f"Task path: {task_path}")
+        task = TabularPredictionTask.from_path(task_path)
 
-    task = TabularPredictionTask.from_path(task_path)
+        rprint("[green]Task loaded![/green]")
+        rprint(task)
 
-    rprint("[green]Task loaded![/green]")
-    rprint(task)
+        assistant = TabularPredictionAssistant(config)
 
-    assistant = TabularPredictionAssistant(config)
-    task = assistant.preprocess_task(task)
+    with time_block("preprocessing task", timer):
+        task = assistant.preprocess_task(task)
 
-    rprint("Model training starts...")
+    with time_block("training model", timer):
+        rprint("Model training starts...")
 
-    assistant.fit_predictor(task)
+        assistant.fit_predictor(task, time_limit=timer.time_remaining)
 
-    rprint("[green]Model training complete![/green]")
+        rprint("[green]Model training complete![/green]")
 
-    rprint("Prediction starts...")
+    with time_block("making predictions", timer):
+        rprint("Prediction starts...")
 
-    predictions = assistant.predict(task)
+        predictions = assistant.predict(task)
 
-    if not output_filename:
-        output_filename = f"aga-output-{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
-    with open(output_filename, "w") as fp:
-        make_prediction_outputs(task, predictions).to_csv(fp, index=False)
+        if not output_filename:
+            output_filename = f"aga-output-{pd.Timestamp.now().strftime('%Y%m%d_%H%M%S')}.csv"
+        with open(output_filename, "w") as fp:
+            make_prediction_outputs(task, predictions).to_csv(fp, index=False)
 
-    rprint(f"[green]Prediction complete! Outputs written to {output_filename}[/green]")
+        rprint(f"[green]Prediction complete! Outputs written to {output_filename}[/green]")
 
     if config.save_artifacts.enabled:
         # Determine the artifacts_dir with or without timestamp

diff --git a/src/autogluon/assistant/assistant.py b/src/autogluon/assistant/assistant.py
@@ -126,9 +126,9 @@ def preprocess_task(self, task: TabularPredictionTask) -> TabularPredictionTask:
             logger.info("Automatic feature generation is disabled. ")
         return task
 
-    def fit_predictor(self, task: TabularPredictionTask):
+    def fit_predictor(self, task: TabularPredictionTask, time_limit: float):
         try:
-            self.predictor.fit(task)
+            self.predictor.fit(task, time_limit=time_limit)
         except Exception as e:
             self.handle_exception("Predictor Fit", e)
 

diff --git a/src/autogluon/assistant/configs/default.yaml b/src/autogluon/assistant/configs/default.yaml
@@ -1,6 +1,7 @@
 infer_eval_metric: True
 detect_and_drop_id_column: False
 task_preprocessors_timeout: 3600
+time_limit: 14400
 save_artifacts:
   enabled: False
   append_timestamp: True
@@ -26,7 +27,6 @@ autogluon:
   predictor_init_kwargs: {}
   predictor_fit_kwargs:
     presets: best_quality
-    time_limit: 14400
 llm:
   # Note: bedrock is only supported in limited AWS regions
   #       and requires AWS credentials

diff --git a/src/autogluon/assistant/configs/high_quality.yaml b/src/autogluon/assistant/configs/high_quality.yaml
@@ -1,4 +1,4 @@
+time_limit: 3600
 autogluon:
   predictor_fit_kwargs:
     presets: high_quality
-    time_limit: 3600
diff --git a/src/autogluon/assistant/configs/medium_quality.yaml b/src/autogluon/assistant/configs/medium_quality.yaml
@@ -1,4 +1,4 @@
+time_limit: 600
 autogluon:
   predictor_fit_kwargs:
     presets: medium_quality
-    time_limit: 600
diff --git a/src/autogluon/assistant/predictor.py b/src/autogluon/assistant/predictor.py
@@ -2,7 +2,7 @@
 
 import logging
 from collections import defaultdict
-from typing import Any, Dict
+from typing import Any, Dict, Optional
 
 import numpy as np
 from autogluon.common.features.feature_metadata import FeatureMetadata
@@ -29,7 +29,7 @@ def rmsle_func(y_true, y_pred, **kwargs):
 
 
 class Predictor:
-    def fit(self, task: TabularPredictionTask) -> "Predictor":
+    def fit(self, task: TabularPredictionTask, time_limit: Optional[float] = None) -> "Predictor":
         return self
 
     def predict(self, task: TabularPredictionTask) -> Any:
@@ -57,7 +57,7 @@ def save_dataset_details(self, task: TabularPredictionTask) -> None:
     def describe(self) -> Dict[str, Any]:
         return dict(self.metadata)
 
-    def fit(self, task: TabularPredictionTask) -> "AutogluonTabularPredictor":
+    def fit(self, task: TabularPredictionTask, time_limit: Optional[float] = None) -> "AutogluonTabularPredictor":
         """Trains an AutoGluon TabularPredictor with parsed arguments. Saves trained predictor to
         `self.predictor`.
 
@@ -78,6 +78,7 @@ def fit(self, task: TabularPredictionTask) -> "AutogluonTabularPredictor":
             **self.config.predictor_init_kwargs,
         }
         predictor_fit_kwargs = self.config.predictor_fit_kwargs.copy()
+        predictor_fit_kwargs.pop("time_limit", None)
 
         logger.info("Fitting AutoGluon TabularPredictor")
         logger.info(f"predictor_init_kwargs: {predictor_init_kwargs}")
@@ -88,7 +89,9 @@ def fit(self, task: TabularPredictionTask) -> "AutogluonTabularPredictor":
             "predictor_fit_kwargs": predictor_fit_kwargs,
         }
         self.save_dataset_details(task)
-        self.predictor = TabularPredictor(**predictor_init_kwargs).fit(task.train_data, **predictor_fit_kwargs)
+        self.predictor = TabularPredictor(**predictor_init_kwargs).fit(
+            task.train_data, **predictor_fit_kwargs, time_limit=time_limit
+        )
 
         self.metadata["leaderboard"] = self.predictor.leaderboard().to_dict()
         return self