This is an automated email from the ASF dual-hosted git repository.
dongjoon pushed a commit to branch branch-4.1
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/branch-4.1 by this push:
new 3ae7142d5dab [SPARK-54592][ML][4.1] Make `estimatedSize` private
3ae7142d5dab is described below
commit 3ae7142d5dabf52ddc702a43622a4d0a43aa03de
Author: Ruifeng Zheng <[email protected]>
AuthorDate: Thu Dec 4 08:27:48 2025 -0800
[SPARK-54592][ML][4.1] Make `estimatedSize` private
### What changes were proposed in this pull request?
Make estimatedSize private
### Why are the changes needed?
it is a internal method, should not be exposed to users
### Does this PR introduce _any_ user-facing change?
no
### How was this patch tested?
ci
### Was this patch authored or co-authored using generative AI tooling?
no
Closes #53322 from zhengruifeng/ml_private_estimate_size_41.
Authored-by: Ruifeng Zheng <[email protected]>
Signed-off-by: Dongjoon Hyun <[email protected]>
---
.../org/apache/spark/ml/classification/DecisionTreeClassifier.scala | 2 +-
.../main/scala/org/apache/spark/ml/classification/FMClassifier.scala | 2 +-
.../main/scala/org/apache/spark/ml/classification/GBTClassifier.scala | 2 +-
.../org/apache/spark/ml/classification/RandomForestClassifier.scala | 2 +-
.../main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala | 3 ++-
.../main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala | 3 ++-
mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala | 3 ++-
mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala | 2 +-
mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala | 2 +-
mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala | 2 +-
.../scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala | 2 +-
mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala | 4 ++--
.../src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala | 2 +-
.../scala/org/apache/spark/ml/regression/RandomForestRegressor.scala | 2 +-
mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala | 2 +-
15 files changed, 19 insertions(+), 16 deletions(-)
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
index 8902d12bdf94..887d8277d311 100644
---
a/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
+++
b/mllib/src/main/scala/org/apache/spark/ml/classification/DecisionTreeClassifier.scala
@@ -195,7 +195,7 @@ class DecisionTreeClassificationModel private[ml] (
// For ml connect only
private[ml] def this() = this("", Node.dummyNode, -1, -1)
- override def estimatedSize: Long = getEstimatedSize()
+ private[spark] override def estimatedSize: Long = getEstimatedSize()
override def predict(features: Vector): Double = {
rootNode.predictImpl(features).prediction
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
index b653383161e7..29ca909f7930 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/classification/FMClassifier.scala
@@ -314,7 +314,7 @@ class FMClassificationModel private[classification] (
copyValues(new FMClassificationModel(uid, intercept, linear, factors),
extra)
}
- override def estimatedSize: Long = {
+ private[spark] override def estimatedSize: Long = {
var size = this.estimateMatadataSize
if (this.linear != null) {
size += this.linear.getSizeInBytes
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
index 9ca3a1660958..2c5c7e7740a3 100644
---
a/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
+++
b/mllib/src/main/scala/org/apache/spark/ml/classification/GBTClassifier.scala
@@ -276,7 +276,7 @@ class GBTClassificationModel private[ml](
private[ml] def this() = this("",
Array(new DecisionTreeRegressionModel), Array(Double.NaN), -1, -1)
- override def estimatedSize: Long = getEstimatedSize()
+ private[spark] override def estimatedSize: Long = getEstimatedSize()
@Since("1.4.0")
override def trees: Array[DecisionTreeRegressionModel] = _trees
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
index 8b580b1e075c..fb61358536d0 100644
---
a/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
+++
b/mllib/src/main/scala/org/apache/spark/ml/classification/RandomForestClassifier.scala
@@ -240,7 +240,7 @@ class RandomForestClassificationModel private[ml] (
// For ml connect only
private[ml] def this() = this("", Array(new
DecisionTreeClassificationModel), -1, -1)
- override def estimatedSize: Long = getEstimatedSize()
+ private[spark] override def estimatedSize: Long = getEstimatedSize()
@Since("1.4.0")
override def trees: Array[DecisionTreeClassificationModel] = _trees
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
index 9e09ee00c3e3..c129da5f0d7e 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/BisectingKMeans.scala
@@ -179,7 +179,8 @@ class BisectingKMeansModel private[ml] (
@Since("2.1.0")
override def summary: BisectingKMeansSummary = super.summary
- override def estimatedSize: Long = SizeEstimator.estimate(parentModel)
+ private[spark] override def estimatedSize: Long =
+ SizeEstimator.estimate(parentModel)
// BisectingKMeans model hasn't supported offloading, so put an empty
`saveSummary` here for now
override private[spark] def saveSummary(path: String): Unit = {}
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
index 5d3e36be2808..ddcca167ff30 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/GaussianMixture.scala
@@ -222,7 +222,8 @@ class GaussianMixtureModel private[ml] (
@Since("2.0.0")
override def summary: GaussianMixtureSummary = super.summary
- override def estimatedSize: Long = SizeEstimator.estimate((weights,
gaussians))
+ private[spark] override def estimatedSize: Long =
+ SizeEstimator.estimate((weights, gaussians))
private[spark] def createSummary(
predictions: DataFrame, logLikelihood: Double, iteration: Int
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
index 2abd82c71296..ad6ca0924064 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/KMeans.scala
@@ -213,7 +213,8 @@ class KMeansModel private[ml] (
@Since("2.0.0")
override def summary: KMeansSummary = super.summary
- override def estimatedSize: Long =
SizeEstimator.estimate(parentModel.clusterCenters)
+ private[spark] override def estimatedSize: Long =
+ SizeEstimator.estimate(parentModel.clusterCenters)
private[spark] def createSummary(
predictions: DataFrame, numIter: Int, trainingCost: Double
diff --git a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
index 67c9a8f58dd2..c64d3a98c0a9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/clustering/LDA.scala
@@ -826,7 +826,7 @@ class DistributedLDAModel private[ml] (
s"DistributedLDAModel: uid=$uid, k=${$(k)}, numFeatures=$vocabSize"
}
- override def estimatedSize: Long = {
+ private[spark] override def estimatedSize: Long = {
// TODO: Implement this method.
throw new UnsupportedOperationException
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
index 6fd20ceb562b..acd4635c5bbf 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/fpm/FPGrowth.scala
@@ -323,7 +323,7 @@ class FPGrowthModel private[ml] (
s"FPGrowthModel: uid=$uid, numTrainingRecords=$numTrainingRecords"
}
- override def estimatedSize: Long = {
+ private[spark] override def estimatedSize: Long = {
// TODO: Implement this method.
throw new UnsupportedOperationException
}
diff --git a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
index 538ad0382075..1cee915046c0 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/recommendation/ALS.scala
@@ -540,7 +540,7 @@ class ALSModel private[ml] (
}
}
- override def estimatedSize: Long = {
+ private[spark] override def estimatedSize: Long = {
val userCount = userFactors.count()
val itemCount = itemFactors.count()
(userCount + itemCount) * (rank + 1) * 4
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
index f049e9a44cc2..5387e0e282a3 100644
---
a/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
+++
b/mllib/src/main/scala/org/apache/spark/ml/regression/DecisionTreeRegressor.scala
@@ -190,7 +190,7 @@ class DecisionTreeRegressionModel private[ml] (
// For ml connect only
private[ml] def this() = this("", Node.dummyNode, -1)
- override def estimatedSize: Long = getEstimatedSize()
+ private[spark] override def estimatedSize: Long = getEstimatedSize()
override def predict(features: Vector): Double = {
rootNode.predictImpl(features).prediction
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
index 1b624895c7f3..d2fcb9280c63 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/FMRegressor.scala
@@ -443,7 +443,7 @@ class FMRegressor @Since("3.0.0") (
@Since("3.0.0")
override def copy(extra: ParamMap): FMRegressor = defaultCopy(extra)
- override def estimateModelSize(dataset: Dataset[_]): Long = {
+ private[spark] override def estimateModelSize(dataset: Dataset[_]): Long = {
val numFeatures = DatasetUtils.getNumFeatures(dataset, $(featuresCol))
var size = this.estimateMatadataSize
@@ -488,7 +488,7 @@ class FMRegressionModel private[regression] (
copyValues(new FMRegressionModel(uid, intercept, linear, factors), extra)
}
- override def estimatedSize: Long = {
+ private[spark] override def estimatedSize: Long = {
var size = this.estimateMatadataSize
if (this.linear != null) {
size += this.linear.getSizeInBytes
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
index c8fa97bfccce..71436036d1ea 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/regression/GBTRegressor.scala
@@ -245,7 +245,7 @@ class GBTRegressionModel private[ml](
// For ml connect only
private[ml] def this() = this("", Array(new DecisionTreeRegressionModel),
Array(Double.NaN), -1)
- override def estimatedSize: Long = getEstimatedSize()
+ private[spark] override def estimatedSize: Long = getEstimatedSize()
@Since("1.4.0")
override def trees: Array[DecisionTreeRegressionModel] = _trees
diff --git
a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
index a9e2c47a3229..8d9b4817833b 100644
---
a/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
+++
b/mllib/src/main/scala/org/apache/spark/ml/regression/RandomForestRegressor.scala
@@ -215,7 +215,7 @@ class RandomForestRegressionModel private[ml] (
// For ml connect only
private[ml] def this() = this("", Array(new DecisionTreeRegressionModel), -1)
- override def estimatedSize: Long = getEstimatedSize()
+ private[spark] override def estimatedSize: Long = getEstimatedSize()
@Since("1.4.0")
override def trees: Array[DecisionTreeRegressionModel] = _trees
diff --git a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
index b20b2e943dee..4e9fa89cbde9 100644
--- a/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
+++ b/mllib/src/main/scala/org/apache/spark/ml/tree/treeModels.scala
@@ -175,7 +175,7 @@ private[spark] trait TreeEnsembleModel[M <:
DecisionTreeModel] {
new AttributeGroup(leafCol, attrs = trees.map(_.leafAttr)).toStructField()
}
- def getEstimatedSize(): Long = {
+ private[ml] def getEstimatedSize(): Long = {
org.apache.spark.util.SizeEstimator.estimate(trees.map(_.rootNode))
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]