diff --git a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer index 8aa1b1a00bcac..84652286fdc8b 100644 --- a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer +++ b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer @@ -36,6 +36,7 @@ org.apache.spark.ml.feature.FeatureHasher org.apache.spark.ml.feature.ElementwiseProduct org.apache.spark.ml.feature.HashingTF org.apache.spark.ml.feature.IndexToString +org.apache.spark.ml.feature.PolynomialExpansion ########### Model for loading # classification diff --git a/python/pyspark/ml/tests/test_feature.py b/python/pyspark/ml/tests/test_feature.py index aa5643d699115..9eba5df33826a 100644 --- a/python/pyspark/ml/tests/test_feature.py +++ b/python/pyspark/ml/tests/test_feature.py @@ -77,6 +77,7 @@ MinHashLSH, MinHashLSHModel, IndexToString, + PolynomialExpansion, ) from pyspark.ml.linalg import DenseVector, SparseVector, Vectors from pyspark.sql import Row @@ -85,6 +86,31 @@ class FeatureTestsMixin: + def test_polynomial_expansion(self): + df = self.spark.createDataFrame([(Vectors.dense([0.5, 2.0]),)], ["dense"]) + px = PolynomialExpansion(degree=2) + px.setInputCol("dense") + px.setOutputCol("expanded") + self.assertTrue( + np.allclose( + px.transform(df).head().expanded.toArray(), [0.5, 0.25, 2.0, 1.0, 4.0], atol=1e-4 + ) + ) + + def check(p: PolynomialExpansion) -> None: + self.assertEqual(p.getInputCol(), "dense") + self.assertEqual(p.getOutputCol(), "expanded") + self.assertEqual(p.getDegree(), 2) + + check(px) + + # save & load + with tempfile.TemporaryDirectory(prefix="px") as d: + px.write().overwrite().save(d) + px2 = PolynomialExpansion.load(d) + self.assertEqual(str(px), str(px2)) + check(px2) + def test_index_string(self): dataset = self.spark.createDataFrame( [