Skip to content

Commit

Permalink
Add test for Isolation Forest grid search test in unsupervised fashion.
Browse files Browse the repository at this point in the history
In my opinion grid search for Isolation Forest should be unsupervised by default and supervised on option.
  • Loading branch information
valenad1 committed Aug 31, 2021
1 parent bb2c7f4 commit 304f579
Show file tree
Hide file tree
Showing 4 changed files with 39 additions and 5 deletions.
6 changes: 3 additions & 3 deletions h2o-r/h2o-package/R/grid.R
Original file line number Diff line number Diff line change
Expand Up @@ -74,11 +74,11 @@ h2o.grid <- function(algorithm,
parallelism = 1)
{
#Unsupervised algos to account for in grid (these algos do not need response)
unsupervised_algos <- c("kmeans", "pca", "svd", "glrm", "extendedisolationforest")
unsupervised_algos <- c("kmeans", "pca", "svd", "glrm", "isolationforest", "extendedisolationforest")
# Parameter list
dots <- list(...)
# Add x, y, and training_frame
if(!(algorithm %in% c(unsupervised_algos, toupper(unsupervised_algos)))) {
if(!(algorithm %in% c(unsupervised_algos, toupper(unsupervised_algos))) || is_supervised) {
if(!missing(y)) {
dots$y <- y
} else {
Expand All @@ -94,7 +94,7 @@ h2o.grid <- function(algorithm,
stop("Must specify training frame, training_frame")
}
# If x is missing, then assume user wants to use all columns as features for supervised models only
if(!(algorithm %in% c(unsupervised_algos, toupper(unsupervised_algos)))) {
if(!(algorithm %in% c(unsupervised_algos, toupper(unsupervised_algos))) || is_supervised) {
if (missing(x)) {
if (is.numeric(y)) {
dots$x <- setdiff(col(training_frame), y)
Expand Down
1 change: 1 addition & 0 deletions h2o-r/h2o-package/R/models.R
Original file line number Diff line number Diff line change
Expand Up @@ -4048,6 +4048,7 @@ h2o.sdev <- function(object) {
if (algo == "kmeans" ||
algo == "glrm" ||
algo == "pca" ||
algo == "isolationforest" ||
algo == "extendedisolationforest" ||
(algo == "deeplearning" && !is.null(params$autoencoder) && params$autoencoder)) {
FALSE
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ source("../../../scripts/h2o-r-test-setup.R")



test.grid.resume <- function() {
test.grid.isolationforest.supervised <- function() {
iris.hex <-
h2o.importFile(path = locate("smalldata/iris/iris.csv"),
destination_frame = "iris.hex")
Expand All @@ -21,6 +21,7 @@ test.grid.resume <- function() {
x = 1:4,
y = 5,
training_frame = iris.hex,
is_supervised = TRUE,
hyper_params = hyper_parameters,
parallelism = 0
)
Expand Down Expand Up @@ -54,9 +55,10 @@ test.grid.resume <- function() {
training_frame = train,
validation_frame = test,
hyper_params = hyper_parameters,
is_supervised = TRUE,
validation_response_column = "label",
parallelism = 0
)
}

doTest("Parallel Grid Search test", test.grid.resume)
doTest("Parallel Grid Search test", test.grid.isolationforest.supervised)
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
setwd(normalizePath(dirname(
R.utils::commandArgs(asValues = TRUE)$"f"
)))
source("../../../scripts/h2o-r-test-setup.R")


test.grid.isolationforest.unsupervised.default <- function() {
single_blob.hex <-
h2o.importFile(path = locate("smalldata/anomaly/single_blob.csv"),
destination_frame = "single_blob.hex")

ntrees <- c(25, 50, 100)
sample_size <- c(64, 128, 256)
size_of_hyper_space <- length(ntrees)*length(sample_size)

hyper_parameters <- list(ntrees = ntrees, sample_size = sample_size)
baseline_grid <-
h2o.grid(
"isolationforest",
grid_id = "isofor_grid_test",
x = c(1, 2),
training_frame = single_blob.hex,
hyper_params = hyper_parameters,
parallelism = 0
)
print(paste("Expected size of hyperparameter space is", length(baseline_grid@model_ids)))
expect_equal(length(baseline_grid@model_ids), size_of_hyper_space)
}


doTest("Parallel Grid Search test for Isolation Forest - default grid search is in unsupervised fashion", test.grid.isolationforest.unsupervised.default)

0 comments on commit 304f579

Please sign in to comment.