diff --git a/frameworks/AutoGluon/README.md b/frameworks/AutoGluon/README.md index 1b5c2dc65..d10bb3a7f 100644 --- a/frameworks/AutoGluon/README.md +++ b/frameworks/AutoGluon/README.md @@ -3,3 +3,79 @@ To run v0.8.2: ```python3 ../automlbenchmark/runbenchmark.py autogluon ...``` To run mainline: ```python3 ../automlbenchmark/runbenchmark.py autogluon:latest ...``` + + +## Running with Learning Curves Enabled + +To run with learning curves enabled, you must define a new framework in the frameworks YAML file which enables the appropriate parameters. Instructions on how to do this are listed here: +https://openml.github.io/automlbenchmark/docs/extending/framework/ + +To summarize these steps: +1. Add one of the below frameworks (or your own) to [this](https://github.com/openml/automlbenchmark/blob/master/examples/custom/frameworks.yaml) frameworks.yaml file. Alternatively, you can create your own custom user_dir. +3. Navigate to the root directory of the automlbenchmark repository +4. Run `python3 runbenchmark.py {YOUR_CUSTOM_FRAMEWORK_NAME} -u {PATH_TO_USER_DIR} ...` \ + where PATH_TO_USER_DIR is the path to the user_dir containing your frameworks.yaml file. If you used the example user_dir, replace the path with `examples/custom` +5. For example, if running the most basic framework listed below in the example user_dir, the command would look like: \ + `python3 runbenchmark.py AutoGluon_curves_true -u examples/custom ...` + +### Sample Framework Definitions +``` +# simplest usage +AutoGluon_curves_true: + extends: AutoGluon + params: + learning_curves: True + _save_artifacts: ['learning_curves'] +``` +``` +# including test data +AutoGluon_curves_test: + extends: AutoGluon + params: + learning_curves: True + _include_test_during_fit: True + _save_artifacts: ['learning_curves'] +``` +``` +# parameterizing learning_curves dictionary +AutoGluon_curves_parameters: + extends: AutoGluon + params: + learning_curves: + use_error: True + _curve_metrics: + binary: ["log_loss", "accuracy", "precision"] + regression: ['root_mean_squared_error', 'median_absolute_error', 'r2'] + multiclass: ["accuracy", "precision_weighted", "recall_weighted"] + _include_test_during_fit: True + _save_artifacts: ['learning_curves'] +``` +``` +# adding custom hyperparameters +Defaults: &defaults + NN_TORCH: + - num_epochs: 1000 + epochs_wo_improve: 999999999 + GBM: + - num_boost_round: 20000 + ag_args_fit: + early_stop: 999999999 + XGB: + - n_estimators: 20000 + ag_args_fit: + early_stop: 999999999 + +AutoGluon_curves_hyperparameters: + extends: AutoGluon + params: + hyperparameters: + <<: *defaults + learning_curves: + use_error: True + _curve_metrics: + binary: ["log_loss", "accuracy", "precision"] + regression: ['root_mean_squared_error', 'median_absolute_error', 'r2'] + multiclass: ["accuracy", "precision_weighted", "recall_weighted"] + _include_test_during_fit: True + _save_artifacts: ['learning_curves'] +``` \ No newline at end of file diff --git a/frameworks/AutoGluon/exec.py b/frameworks/AutoGluon/exec.py index 4b670c4fd..6811f7381 100644 --- a/frameworks/AutoGluon/exec.py +++ b/frameworks/AutoGluon/exec.py @@ -16,7 +16,7 @@ matplotlib.use('agg') # no need for tk from autogluon.tabular import TabularPredictor, TabularDataset -from autogluon.core.utils.savers import save_pd, save_pkl +from autogluon.core.utils.savers import save_pd, save_pkl, save_json import autogluon.core.metrics as metrics from autogluon.tabular.version import __version__ @@ -68,6 +68,25 @@ def run(dataset, config): label = dataset.target.name problem_type = dataset.problem_type + """ + The _include_test_during_fit flag enables the test_data to be passed into AutoGluon's predictor object + during the fit call. If enabled, it is ensured that the test_data is seperated from all training and validation + data. It is never seen by the models, nor does it influence the training process in any way. + + One might want to use this flag when generating learning curves. If this flag is enabled and learning_curves + have been turned on, then your learning curve artifacts will also include curves for your test dataset. + """ + _include_test_during_fit = config.framework_params.get('_include_test_during_fit', False) + if _include_test_during_fit: + training_params["test_data"] = test_path + + # whether to generate learning curves (VERY EXPENSIVE. Do not enable for benchmark comparisons.) + if "learning_curves" in training_params: + lc = training_params["learning_curves"] + _curve_metrics = config.framework_params.get('_curve_metrics', {}) + if isinstance(lc, dict) and "metrics" not in lc and problem_type in _curve_metrics: + training_params["learning_curves"]["metrics"] = _curve_metrics[problem_type] + models_dir = tempfile.mkdtemp() + os.sep # passed to AG with Timer() as training: @@ -119,6 +138,7 @@ def inference_time_regression(data: Union[str, pd.DataFrame]): prob_labels = probabilities.columns.values.astype(str).tolist() if probabilities is not None else None log.info(f"Finished predict in {predict.duration}s.") + learning_curves = predictor.learning_curves() if training_params.get("learning_curves", None) else None _leaderboard_extra_info = config.framework_params.get('_leaderboard_extra_info', False) # whether to get extra model info (very verbose) _leaderboard_test = config.framework_params.get('_leaderboard_test', False) # whether to compute test scores in leaderboard (expensive) leaderboard_kwargs = dict(extra_info=_leaderboard_extra_info) @@ -136,7 +156,7 @@ def inference_time_regression(data: Union[str, pd.DataFrame]): else: num_models_ensemble = 1 - save_artifacts(predictor, leaderboard, config) + save_artifacts(predictor, leaderboard, learning_curves, config) shutil.rmtree(predictor.path, ignore_errors=True) return result(output_file=config.output_predictions_file, @@ -151,7 +171,7 @@ def inference_time_regression(data: Union[str, pd.DataFrame]): inference_times=inference_times,) -def save_artifacts(predictor, leaderboard, config): +def save_artifacts(predictor, leaderboard, learning_curves, config): artifacts = config.framework_params.get('_save_artifacts', ['leaderboard']) try: if 'leaderboard' in artifacts: @@ -167,6 +187,12 @@ def save_artifacts(predictor, leaderboard, config): shutil.rmtree(os.path.join(predictor.path, "utils"), ignore_errors=True) models_dir = output_subdir("models", config) zip_path(predictor.path, os.path.join(models_dir, "models.zip")) + + if 'learning_curves' in artifacts: + assert learning_curves is not None, "No learning curves were generated!" + learning_curves_dir = output_subdir("learning_curves", config) + save_json.save(path=os.path.join(learning_curves_dir, "learning_curves.json"), obj=learning_curves) + except Exception: log.warning("Error when saving artifacts.", exc_info=True)