diff --git a/sklego/meta/outlier_classifier.py b/sklego/meta/outlier_classifier.py index 789770aa3..56313d555 100644 --- a/sklego/meta/outlier_classifier.py +++ b/sklego/meta/outlier_classifier.py @@ -23,6 +23,30 @@ class OutlierClassifier(BaseEstimator, ClassifierMixin): The fitted underlying outlier detection model. classes_ : array-like of shape (2,) Classes used for prediction (0 or 1) + + Example + ------- + ```py + from sklearn.ensemble import IsolationForest + from sklego.meta.outlier_classifier import OutlierClassifier + + X = [[0], [0.5], [-1], [99]] + y = [0, 0, 0, 1] + + isolation_forest = IsolationForest() + + outlier_clf = OutlierClassifier(isolation_forest) + _ = outlier_clf.fit(X, y) + + preds = outlier_clf.predict([[100], [-0.5], [0.5], [1]]) + # array[1. 0. 0. 0.] + + proba_preds = outlier_clf.predict_proba([[100], [-0.5], [0.5], [1]]) + # [[0.34946567 0.65053433] + # [0.79707913 0.20292087] + # [0.80275406 0.19724594] + # [0.80275406 0.19724594]] + ``` """ def __init__(self, model): diff --git a/sklego/model_selection.py b/sklego/model_selection.py index cc882cb74..28cbc9a53 100644 --- a/sklego/model_selection.py +++ b/sklego/model_selection.py @@ -260,7 +260,7 @@ def KlusterFoldValidation(**kwargs): class ClusterFoldValidation: """Cross validator that creates folds based on provided cluster method. This ensures that data points in the same cluster are not split across different folds. - + !!! info "New in version 0.9.0" Parameters diff --git a/sklego/preprocessing/dictmapper.py b/sklego/preprocessing/dictmapper.py index 428038d90..d6373dcb2 100644 --- a/sklego/preprocessing/dictmapper.py +++ b/sklego/preprocessing/dictmapper.py @@ -23,6 +23,34 @@ class DictMapper(TransformerMixin, BaseEstimator): Number of features seen during `fit`. dim_ : int Deprecated, please use `n_features_in_` instead. + + Example + ------- + ```py + import pandas as pd + from sklego.preprocessing.dictmapper import DictMapper + from sklearn.compose import ColumnTransformer + + X = pd.DataFrame({ + "city_pop": ["Amsterdam", "Leiden", "Utrecht", "None", "Haarlem"] + }) + + mapper = { + "Amsterdam": 1_181_817, + "Leiden": 130_181, + "Utrecht": 367_984, + "Haarlem": 165_396, + } + + ct = ColumnTransformer([("dictmapper", DictMapper(mapper, 0), ["city_pop"])]) + X_trans = ct.fit_transform(X) + X_trans + # array([[1181817], + # [ 130181], + # [ 367984], + # [ 0], + # [ 165396]]) + ``` """ def __init__(self, mapper, default): diff --git a/sklego/preprocessing/outlier_remover.py b/sklego/preprocessing/outlier_remover.py index d3511c9e7..a539d4fc7 100644 --- a/sklego/preprocessing/outlier_remover.py +++ b/sklego/preprocessing/outlier_remover.py @@ -34,7 +34,7 @@ class OutlierRemover(TrainOnlyTransformerMixin, BaseEstimator): isolation_forest = IsolationForest() isolation_forest.fit(X) - detector_preds = isolator_forest.predict(X) + detector_preds = isolation_forest.predict(X) outlier_remover = OutlierRemover(isolation_forest, refit=True) outlier_remover.fit(X)