Skip to content

Commit

Permalink
Deep Isolation Forest method implemented
Browse files Browse the repository at this point in the history
  • Loading branch information
xuhongzuo committed Sep 5, 2023
1 parent 33981cc commit 8c00d8a
Showing 1 changed file with 12 additions and 15 deletions.
27 changes: 12 additions & 15 deletions pyod/models/dif.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,6 @@
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from torch.utils.data import DataLoader

# from pyod.models.base import BaseDetector
# from pyod.utils.utility import check_parameter
# from pyod.utils.torch_utility import get_activation_by_name
from .base import BaseDetector
from ..utils.utility import check_parameter
from ..utils.torch_utility import get_activation_by_name
Expand All @@ -26,8 +23,7 @@
class DIF(BaseDetector):
"""Deep Isolation Forest (DIF) is an extension of iForest. It uses deep
representation ensemble to achieve non-linear isolation on original data
space. See :cite:`xu2023dif`
for details.
space. See :cite:`xu2023dif` for details.
Parameters
----------
Expand Down Expand Up @@ -265,7 +261,9 @@ def _deep_representation(self, net, X):
x_reduced = []

with torch.no_grad():
loader = DataLoader(X, batch_size=self.batch_size, drop_last=False, pin_memory=True, shuffle=False)
loader = DataLoader(X, batch_size=self.batch_size,
drop_last=False, pin_memory=True,
shuffle=False)
for batch_x in loader:
batch_x = batch_x.float().to(self.device)
batch_x_reduced = net(batch_x)
Expand All @@ -278,7 +276,7 @@ def _deep_representation(self, net, X):


class MLPnet(torch.nn.Module):
def __init__(self, n_features, n_hidden=[500, 100], n_output=20, mid_channels=None,
def __init__(self, n_features, n_hidden=[500, 100], n_output=20,
activation='ReLU', bias=False, batch_norm=False,
skip_connection=False):
super(MLPnet, self).__init__()
Expand All @@ -295,8 +293,9 @@ def __init__(self, n_features, n_hidden=[500, 100], n_output=20, mid_channels=No

self.layers = []
for i in range(num_layers+1):
in_channels, out_channels = self.get_in_out_channels(i, num_layers, n_features,
n_hidden, n_output, skip_connection)
in_channels, out_channels = \
self.get_in_out_channels(i, num_layers, n_features,
n_hidden, n_output, skip_connection)
self.layers += [
LinearBlock(in_channels, out_channels,
bias=bias, batch_norm=batch_norm,
Expand Down Expand Up @@ -362,10 +361,6 @@ def _cal_score(xx, clf):
leaf_samples = np.zeros((xx.shape[0], len(clf.estimators_)))

for ii, estimator_tree in enumerate(clf.estimators_):
# estimator_population_ind = sample_without_replacement(n_population=xx.shape[0], n_samples=256,
# random_state=estimator_tree.random_state)
# estimator_population = xx[estimator_population_ind]

tree = estimator_tree.tree_
n_node = tree.node_count

Expand All @@ -383,7 +378,8 @@ def _cal_score(xx, clf):
# The number of training samples in each test sample leaf
n_node_samples = estimator_tree.tree_.n_node_samples

# node_indicator is a sparse matrix with shape (n_samples, n_nodes), indicating the path of input data samples
# node_indicator is a sparse matrix with shape (n_samples, n_nodes),
# indicating the path of input data samples
# each layer would result in a non-zero element in this matrix,
# and then the row-wise summation is the depth of data sample
n_samples_leaf = estimator_tree.tree_.n_node_samples[leaves_index]
Expand All @@ -394,7 +390,8 @@ def _cal_score(xx, clf):
# decision path of data matrix XX
node_indicator = np.array(node_indicator.todense())

# set a matrix with shape [n_sample, n_node], representing the feature value of each sample on each node
# set a matrix with shape [n_sample, n_node],
# representing the feature value of each sample on each node
# set the leaf node as -2
value_mat = np.array([xx[i][feature_lst] for i in range(xx.shape[0])])
value_mat[:, np.where(feature_lst == -2)[0]] = -2
Expand Down

0 comments on commit 8c00d8a

Please sign in to comment.