This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch branch-4.0
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.0 by this push:
new 5e0ddb571c31 [SPARK-50812][ML][PYTHON][CONNECT] Add support
PolynomialExpansion
5e0ddb571c31 is described below
commit 5e0ddb571c31a746be2d228d46d37f5c33fbb19a
Author: Bobby Wang <[email protected]>
AuthorDate: Tue Jan 28 12:15:08 2025 +0800
[SPARK-50812][ML][PYTHON][CONNECT] Add support PolynomialExpansion
### What changes were proposed in this pull request?
Support PolynomialExpansion on connect
### Why are the changes needed?
feature parity
### Does this PR introduce _any_ user-facing change?
Yes
### How was this patch tested?
CI passes
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #49702 from wbo4958/px.
Authored-by: Bobby Wang <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
(cherry picked from commit aa24a9a235b1e33adf39f67b661852298b3d3bdf)
Signed-off-by: Ruifeng Zheng <[email protected]>
---
.../services/org.apache.spark.ml.Transformer | 1 +
python/pyspark/ml/tests/test_feature.py | 26 ++++++++++++++++++++++
2 files changed, 27 insertions(+)
diff --git
a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer
b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer
index 8aa1b1a00bca..84652286fdc8 100644
--- a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer
+++ b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer
@@ -36,6 +36,7 @@ org.apache.spark.ml.feature.FeatureHasher
org.apache.spark.ml.feature.ElementwiseProduct
org.apache.spark.ml.feature.HashingTF
org.apache.spark.ml.feature.IndexToString
+org.apache.spark.ml.feature.PolynomialExpansion
########### Model for loading
# classification
diff --git a/python/pyspark/ml/tests/test_feature.py
b/python/pyspark/ml/tests/test_feature.py
index aa5643d69911..9eba5df33826 100644
--- a/python/pyspark/ml/tests/test_feature.py
+++ b/python/pyspark/ml/tests/test_feature.py
@@ -77,6 +77,7 @@ from pyspark.ml.feature import (
MinHashLSH,
MinHashLSHModel,
IndexToString,
+ PolynomialExpansion,
)
from pyspark.ml.linalg import DenseVector, SparseVector, Vectors
from pyspark.sql import Row
@@ -85,6 +86,31 @@ from pyspark.testing.mlutils import SparkSessionTestCase
class FeatureTestsMixin:
+ def test_polynomial_expansion(self):
+ df = self.spark.createDataFrame([(Vectors.dense([0.5, 2.0]),)],
["dense"])
+ px = PolynomialExpansion(degree=2)
+ px.setInputCol("dense")
+ px.setOutputCol("expanded")
+ self.assertTrue(
+ np.allclose(
+ px.transform(df).head().expanded.toArray(), [0.5, 0.25, 2.0,
1.0, 4.0], atol=1e-4
+ )
+ )
+
+ def check(p: PolynomialExpansion) -> None:
+ self.assertEqual(p.getInputCol(), "dense")
+ self.assertEqual(p.getOutputCol(), "expanded")
+ self.assertEqual(p.getDegree(), 2)
+
+ check(px)
+
+ # save & load
+ with tempfile.TemporaryDirectory(prefix="px") as d:
+ px.write().overwrite().save(d)
+ px2 = PolynomialExpansion.load(d)
+ self.assertEqual(str(px), str(px2))
+ check(px2)
+
def test_index_string(self):
dataset = self.spark.createDataFrame(
[
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]