Skip to content

Commit

Permalink
updated a parameter name that changed for the OneHotEncoder and added…
Browse files Browse the repository at this point in the history
… an idempotence check to the model's fit() function
  • Loading branch information
pavelkomarov committed Aug 26, 2024
1 parent 9c94169 commit 8c658a5
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 5 deletions.
17 changes: 13 additions & 4 deletions skpp/skpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,13 @@ def fit(self, X, Y):
Y = Y.reshape((-1,1)) # reshape returns a view to existing data
self.n_features_in_ = X.shape[1]

self._random = check_random_state(self.random_state)
# Due to the enormous number of least squares fits happening here,
# numerical drift is unavoidable, so short-circuit for idempotence
idempotence_hash = hash((X.tobytes() if isinstance(X, numpy.ndarray)
else str(X), Y.tobytes() if isinstance(Y, numpy.ndarray) else str(Y)))
if hasattr(self, 'idempotence_hash_') and \
self.idempotence_hash_ == idempotence_hash: return self
self.idempotence_hash_ = idempotence_hash

# Sklearn does not allow mutation of object parameters (the ones not
# prepended by an underscore), so construct or reassign weights
Expand Down Expand Up @@ -231,8 +237,11 @@ def fit(self, X, Y):
else:
self._out_dim_weights = self.out_dim_weights

self._random = check_random_state(self.random_state)

# Now that input and output dimensions are known, parameters vectors
# can be initialized. Vectors are always stored vertically.
# can be initialized. Vectors are always stored vertically. Use the random
# state to initialize idempotently if a random state is set.
self._alpha_ = self._random.randn(X.shape[1], self.r) # p x r
self._beta_ = self._random.randn(Y.shape[1], self.r) # d x r
self._f_ = [lambda x: x*0 for j in range(self.r)] # zero functions
Expand Down Expand Up @@ -501,8 +510,8 @@ def fit(self, X, Y):
self.classes_ = unique_labels(Y) # also performs some input validation.

# Encode the input Y as a multi-column H.
# sparse=False until numpy fixes crazy sparse matrix dot() behavior
self._encoder = OneHotEncoder(categories='auto', sparse=False)
# sparse_output=False until numpy fixes crazy sparse matrix dot() behavior
self._encoder = OneHotEncoder(categories='auto', sparse_output=False)
H = self._encoder.fit_transform(Y)

# Calculate the weights. See section 4 of math.pdf.
Expand Down
2 changes: 1 addition & 1 deletion skpp/tests/test_skpp.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
def test_regressor_passes_sklearn_checks():
estimator_checks.check_estimator(ProjectionPursuitRegressor())

def test_classifier_passes_sklearn_checks():
def test_classifier_passes_sklearn_checks(): # Note this one causes a warning in the single-class case
estimator_checks.check_estimator(ProjectionPursuitClassifier())

def test_construction_errors():
Expand Down

0 comments on commit 8c658a5

Please sign in to comment.