This is an automated email from the ASF dual-hosted git repository.
ruifengz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 8d2b57d1a1f8 [SPARK-50812][ML][PYTHON][CONNECT] Support
TargetEncoderModel on Connect
8d2b57d1a1f8 is described below
commit 8d2b57d1a1f8eda45bfe48d0e0aca996207c2bf2
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Sat Jan 25 15:28:47 2025 +0800
[SPARK-50812][ML][PYTHON][CONNECT] Support TargetEncoderModel on Connect
### What changes were proposed in this pull request?
Support TargetEncoderModel on Connect
### Why are the changes needed?
feature parity
### Does this PR introduce _any_ user-facing change?
yes
### How was this patch tested?
enabled parity tests
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #49648 from zhengruifeng/ml_connect_te.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Ruifeng Zheng <[email protected]>
---
.../resources/META-INF/services/org.apache.spark.ml.Estimator | 2 +-
.../resources/META-INF/services/org.apache.spark.ml.Transformer | 2 +-
.../main/scala/org/apache/spark/ml/feature/TargetEncoder.scala | 2 ++
python/pyspark/ml/tests/connect/test_parity_feature.py | 8 --------
4 files changed, 4 insertions(+), 10 deletions(-)
diff --git
a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Estimator
b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Estimator
index ca728566490d..0dcbe66afd34 100644
--- a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Estimator
+++ b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Estimator
@@ -60,4 +60,4 @@ org.apache.spark.ml.feature.PCA
org.apache.spark.ml.feature.Word2Vec
org.apache.spark.ml.feature.CountVectorizer
org.apache.spark.ml.feature.OneHotEncoder
-
+org.apache.spark.ml.feature.TargetEncoder
diff --git
a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer
b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer
index dbedcf3e26e0..1ebe5f733925 100644
--- a/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer
+++ b/mllib/src/main/resources/META-INF/services/org.apache.spark.ml.Transformer
@@ -65,4 +65,4 @@ org.apache.spark.ml.feature.PCAModel
org.apache.spark.ml.feature.Word2VecModel
org.apache.spark.ml.feature.CountVectorizerModel
org.apache.spark.ml.feature.OneHotEncoderModel
-
+org.apache.spark.ml.feature.TargetEncoderModel
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala
b/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala
index 48783410448b..31504fead161 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/feature/TargetEncoder.scala
@@ -286,6 +286,8 @@ class TargetEncoderModel private[ml] (
@Since("4.0.0") private[ml] val stats: Array[Map[Double, (Double,
Double)]])
extends Model[TargetEncoderModel] with TargetEncoderBase with MLWritable {
+ private[ml] def this() = this(Identifiable.randomUID("TargetEncoder"),
Array.empty)
+
/** @group setParam */
@Since("4.0.0")
def setInputCol(value: String): this.type = set(inputCol, value)
diff --git a/python/pyspark/ml/tests/connect/test_parity_feature.py
b/python/pyspark/ml/tests/connect/test_parity_feature.py
index 55d299c06370..86fe42f5df89 100644
--- a/python/pyspark/ml/tests/connect/test_parity_feature.py
+++ b/python/pyspark/ml/tests/connect/test_parity_feature.py
@@ -58,14 +58,6 @@ class FeatureParityTests(FeatureTestsMixin,
ReusedConnectTestCase):
def test_string_indexer_from_labels(self):
super().test_string_indexer_from_labels()
- @unittest.skip("Need to support.")
- def test_target_encoder_binary(self):
- super().test_target_encoder_binary()
-
- @unittest.skip("Need to support.")
- def test_target_encoder_continuous(self):
- super().test_target_encoder_continuous()
-
@unittest.skip("Need to support.")
def test_vector_size_hint(self):
super().test_vector_size_hint()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]